Returns the configuration of a causal model
Usage
causal_config(
model = getOption("pangoling.causal.default"),
checkpoint = NULL,
config_model = NULL
)
Arguments
- model
Name of a pre-trained model or folder. One should be able to use models based on "gpt2". See hugging face website.
- checkpoint
Folder of a checkpoint.
- config_model
List with other arguments that control how the model from Hugging Face is accessed.
More details about causal models
A causal language model (also called GPT-like, auto-regressive, or decoder model) is a type of large language model usually used for text-generation that can predict the next word (or more accurately in fact token) based on a preceding context.
If not specified, the causal model used will be the one set in the global
option pangoling.causal.default
, this can be
accessed via getOption("pangoling.causal.default")
(by default
"gpt2"). To change the default option
use options(pangoling.causal.default = "newcausalmodel")
.
A list of possible causal models can be found in Hugging Face website.
Using the config_model
and config_tokenizer
arguments, it's possible to
control how the model and tokenizer from Hugging Face is accessed, see the
Python method
from_pretrained
for details.
In case of errors when a new model is run, check the status of https://status.huggingface.co/
See also
Other causal model helper functions:
causal_preload()
Examples
causal_config(model = "gpt2")
#> $vocab_size
#> [1] 50257
#>
#> $n_positions
#> [1] 1024
#>
#> $n_embd
#> [1] 768
#>
#> $n_layer
#> [1] 12
#>
#> $n_head
#> [1] 12
#>
#> $n_inner
#> NULL
#>
#> $activation_function
#> [1] "gelu_new"
#>
#> $resid_pdrop
#> [1] 0.1
#>
#> $embd_pdrop
#> [1] 0.1
#>
#> $attn_pdrop
#> [1] 0.1
#>
#> $layer_norm_epsilon
#> [1] 1e-05
#>
#> $initializer_range
#> [1] 0.02
#>
#> $summary_type
#> [1] "cls_index"
#>
#> $summary_use_proj
#> [1] TRUE
#>
#> $summary_activation
#> NULL
#>
#> $summary_first_dropout
#> [1] 0.1
#>
#> $summary_proj_to_labels
#> [1] TRUE
#>
#> $scale_attn_weights
#> [1] TRUE
#>
#> $use_cache
#> [1] TRUE
#>
#> $scale_attn_by_inverse_layer_idx
#> [1] FALSE
#>
#> $reorder_and_upcast_attn
#> [1] FALSE
#>
#> $bos_token_id
#> [1] 50256
#>
#> $eos_token_id
#> [1] 50256
#>
#> $return_dict
#> [1] TRUE
#>
#> $output_hidden_states
#> [1] FALSE
#>
#> $output_attentions
#> [1] FALSE
#>
#> $torchscript
#> [1] FALSE
#>
#> $torch_dtype
#> NULL
#>
#> $use_bfloat16
#> [1] FALSE
#>
#> $tf_legacy_loss
#> [1] FALSE
#>
#> $pruned_heads
#> named list()
#>
#> $tie_word_embeddings
#> [1] TRUE
#>
#> $chunk_size_feed_forward
#> [1] 0
#>
#> $is_encoder_decoder
#> [1] FALSE
#>
#> $is_decoder
#> [1] FALSE
#>
#> $cross_attention_hidden_size
#> NULL
#>
#> $add_cross_attention
#> [1] FALSE
#>
#> $tie_encoder_decoder
#> [1] FALSE
#>
#> $max_length
#> [1] 20
#>
#> $min_length
#> [1] 0
#>
#> $do_sample
#> [1] FALSE
#>
#> $early_stopping
#> [1] FALSE
#>
#> $num_beams
#> [1] 1
#>
#> $num_beam_groups
#> [1] 1
#>
#> $diversity_penalty
#> [1] 0
#>
#> $temperature
#> [1] 1
#>
#> $top_k
#> [1] 50
#>
#> $top_p
#> [1] 1
#>
#> $typical_p
#> [1] 1
#>
#> $repetition_penalty
#> [1] 1
#>
#> $length_penalty
#> [1] 1
#>
#> $no_repeat_ngram_size
#> [1] 0
#>
#> $encoder_no_repeat_ngram_size
#> [1] 0
#>
#> $bad_words_ids
#> NULL
#>
#> $num_return_sequences
#> [1] 1
#>
#> $output_scores
#> [1] FALSE
#>
#> $return_dict_in_generate
#> [1] "TRUE"
#>
#> $forced_bos_token_id
#> NULL
#>
#> $forced_eos_token_id
#> NULL
#>
#> $remove_invalid_values
#> [1] FALSE
#>
#> $exponential_decay_length_penalty
#> NULL
#>
#> $suppress_tokens
#> NULL
#>
#> $begin_suppress_tokens
#> NULL
#>
#> $architectures
#> [1] "GPT2LMHeadModel"
#>
#> $finetuning_task
#> NULL
#>
#> $id2label
#> $id2label$`0`
#> [1] "LABEL_0"
#>
#> $id2label$`1`
#> [1] "LABEL_1"
#>
#>
#> $label2id
#> $label2id$LABEL_0
#> [1] 0
#>
#> $label2id$LABEL_1
#> [1] 1
#>
#>
#> $tokenizer_class
#> NULL
#>
#> $prefix
#> NULL
#>
#> $pad_token_id
#> NULL
#>
#> $sep_token_id
#> NULL
#>
#> $decoder_start_token_id
#> NULL
#>
#> $task_specific_params
#> $task_specific_params$`text-generation`
#> $task_specific_params$`text-generation`$do_sample
#> [1] TRUE
#>
#> $task_specific_params$`text-generation`$max_length
#> [1] 50
#>
#>
#>
#> $problem_type
#> NULL
#>
#> $`_name_or_path`
#> [1] "gpt2"
#>
#> $`_attn_implementation_autoset`
#> [1] TRUE
#>
#> $transformers_version
#> [1] "4.48.0"
#>
#> $model_type
#> [1] "gpt2"
#>
#> $n_ctx
#> [1] 1024
#>