,

Skip to contents

Returns the configuration of a causal model

Usage

causal_config(
  model = getOption("pangoling.causal.default"),
  checkpoint = NULL,
  config_model = NULL
)

Arguments

model

Name of a pre-trained model or folder. One should be able to use models based on "gpt2". See hugging face website.

checkpoint

Folder of a checkpoint.

config_model

List with other arguments that control how the model from Hugging Face is accessed.

Value

A list with the configuration of the model.

More details about causal models

A causal language model (also called GPT-like, auto-regressive, or decoder model) is a type of large language model usually used for text-generation that can predict the next word (or more accurately in fact token) based on a preceding context.

If not specified, the causal model used will be the one set in the global option pangoling.causal.default, this can be accessed via getOption("pangoling.causal.default") (by default "gpt2"). To change the default option use options(pangoling.causal.default = "newcausalmodel").

A list of possible causal models can be found in Hugging Face website.

Using the config_model and config_tokenizer arguments, it's possible to control how the model and tokenizer from Hugging Face is accessed, see the Python method from_pretrained for details.

In case of errors when a new model is run, check the status of https://status.huggingface.co/

See also

Other causal model helper functions: causal_preload()

Examples

causal_config(model = "gpt2")
#> $vocab_size
#> [1] 50257
#> 
#> $n_positions
#> [1] 1024
#> 
#> $n_embd
#> [1] 768
#> 
#> $n_layer
#> [1] 12
#> 
#> $n_head
#> [1] 12
#> 
#> $n_inner
#> NULL
#> 
#> $activation_function
#> [1] "gelu_new"
#> 
#> $resid_pdrop
#> [1] 0.1
#> 
#> $embd_pdrop
#> [1] 0.1
#> 
#> $attn_pdrop
#> [1] 0.1
#> 
#> $layer_norm_epsilon
#> [1] 1e-05
#> 
#> $initializer_range
#> [1] 0.02
#> 
#> $summary_type
#> [1] "cls_index"
#> 
#> $summary_use_proj
#> [1] TRUE
#> 
#> $summary_activation
#> NULL
#> 
#> $summary_first_dropout
#> [1] 0.1
#> 
#> $summary_proj_to_labels
#> [1] TRUE
#> 
#> $scale_attn_weights
#> [1] TRUE
#> 
#> $use_cache
#> [1] TRUE
#> 
#> $scale_attn_by_inverse_layer_idx
#> [1] FALSE
#> 
#> $reorder_and_upcast_attn
#> [1] FALSE
#> 
#> $bos_token_id
#> [1] 50256
#> 
#> $eos_token_id
#> [1] 50256
#> 
#> $return_dict
#> [1] TRUE
#> 
#> $output_hidden_states
#> [1] FALSE
#> 
#> $output_attentions
#> [1] FALSE
#> 
#> $torchscript
#> [1] FALSE
#> 
#> $torch_dtype
#> NULL
#> 
#> $use_bfloat16
#> [1] FALSE
#> 
#> $tf_legacy_loss
#> [1] FALSE
#> 
#> $pruned_heads
#> named list()
#> 
#> $tie_word_embeddings
#> [1] TRUE
#> 
#> $chunk_size_feed_forward
#> [1] 0
#> 
#> $is_encoder_decoder
#> [1] FALSE
#> 
#> $is_decoder
#> [1] FALSE
#> 
#> $cross_attention_hidden_size
#> NULL
#> 
#> $add_cross_attention
#> [1] FALSE
#> 
#> $tie_encoder_decoder
#> [1] FALSE
#> 
#> $max_length
#> [1] 20
#> 
#> $min_length
#> [1] 0
#> 
#> $do_sample
#> [1] FALSE
#> 
#> $early_stopping
#> [1] FALSE
#> 
#> $num_beams
#> [1] 1
#> 
#> $num_beam_groups
#> [1] 1
#> 
#> $diversity_penalty
#> [1] 0
#> 
#> $temperature
#> [1] 1
#> 
#> $top_k
#> [1] 50
#> 
#> $top_p
#> [1] 1
#> 
#> $typical_p
#> [1] 1
#> 
#> $repetition_penalty
#> [1] 1
#> 
#> $length_penalty
#> [1] 1
#> 
#> $no_repeat_ngram_size
#> [1] 0
#> 
#> $encoder_no_repeat_ngram_size
#> [1] 0
#> 
#> $bad_words_ids
#> NULL
#> 
#> $num_return_sequences
#> [1] 1
#> 
#> $output_scores
#> [1] FALSE
#> 
#> $return_dict_in_generate
#> [1] "TRUE"
#> 
#> $forced_bos_token_id
#> NULL
#> 
#> $forced_eos_token_id
#> NULL
#> 
#> $remove_invalid_values
#> [1] FALSE
#> 
#> $exponential_decay_length_penalty
#> NULL
#> 
#> $suppress_tokens
#> NULL
#> 
#> $begin_suppress_tokens
#> NULL
#> 
#> $architectures
#> [1] "GPT2LMHeadModel"
#> 
#> $finetuning_task
#> NULL
#> 
#> $id2label
#> $id2label$`0`
#> [1] "LABEL_0"
#> 
#> $id2label$`1`
#> [1] "LABEL_1"
#> 
#> 
#> $label2id
#> $label2id$LABEL_0
#> [1] 0
#> 
#> $label2id$LABEL_1
#> [1] 1
#> 
#> 
#> $tokenizer_class
#> NULL
#> 
#> $prefix
#> NULL
#> 
#> $pad_token_id
#> NULL
#> 
#> $sep_token_id
#> NULL
#> 
#> $decoder_start_token_id
#> NULL
#> 
#> $task_specific_params
#> $task_specific_params$`text-generation`
#> $task_specific_params$`text-generation`$do_sample
#> [1] TRUE
#> 
#> $task_specific_params$`text-generation`$max_length
#> [1] 50
#> 
#> 
#> 
#> $problem_type
#> NULL
#> 
#> $`_name_or_path`
#> [1] "gpt2"
#> 
#> $`_attn_implementation_autoset`
#> [1] TRUE
#> 
#> $transformers_version
#> [1] "4.48.0"
#> 
#> $model_type
#> [1] "gpt2"
#> 
#> $n_ctx
#> [1] 1024
#>