Returns the configuration of a masked model

Returns the configuration of a masked model.

Usage

masked_config(
  model = getOption("pangoling.masked.default"),
  config_model = NULL
)

Arguments

model: Name of a pre-trained model or folder. One should be able to use models based on "bert". See hugging face website.
config_model: List with other arguments that control how the model from Hugging Face is accessed.

Value

A list with the configuration of the model.

Details

A masked language model (also called BERT-like, or encoder model) is a type of large language model that can be used to predict the content of a mask in a sentence.

If not specified, the masked model that will be used is the one set in specified in the global option pangoling.masked.default, this can be accessed via getOption("pangoling.masked.default") (by default "bert-base-uncased"). To change the default option use options(pangoling.masked.default = "newmaskedmodel").

A list of possible masked can be found in Hugging Face website

Using the config_model and config_tokenizer arguments, it's possible to control how the model and tokenizer from Hugging Face is accessed, see the python method from_pretrained for details. In case of errors check the status of https://status.huggingface.co/

Examples

masked_config(model = "bert-base-uncased")
#> $return_dict
#> [1] TRUE
#> 
#> $output_hidden_states
#> [1] FALSE
#> 
#> $output_attentions
#> [1] FALSE
#> 
#> $torchscript
#> [1] FALSE
#> 
#> $torch_dtype
#> NULL
#> 
#> $use_bfloat16
#> [1] FALSE
#> 
#> $tf_legacy_loss
#> [1] FALSE
#> 
#> $pruned_heads
#> named list()
#> 
#> $tie_word_embeddings
#> [1] TRUE
#> 
#> $chunk_size_feed_forward
#> [1] 0
#> 
#> $is_encoder_decoder
#> [1] FALSE
#> 
#> $is_decoder
#> [1] FALSE
#> 
#> $cross_attention_hidden_size
#> NULL
#> 
#> $add_cross_attention
#> [1] FALSE
#> 
#> $tie_encoder_decoder
#> [1] FALSE
#> 
#> $max_length
#> [1] 20
#> 
#> $min_length
#> [1] 0
#> 
#> $do_sample
#> [1] FALSE
#> 
#> $early_stopping
#> [1] FALSE
#> 
#> $num_beams
#> [1] 1
#> 
#> $num_beam_groups
#> [1] 1
#> 
#> $diversity_penalty
#> [1] 0
#> 
#> $temperature
#> [1] 1
#> 
#> $top_k
#> [1] 50
#> 
#> $top_p
#> [1] 1
#> 
#> $typical_p
#> [1] 1
#> 
#> $repetition_penalty
#> [1] 1
#> 
#> $length_penalty
#> [1] 1
#> 
#> $no_repeat_ngram_size
#> [1] 0
#> 
#> $encoder_no_repeat_ngram_size
#> [1] 0
#> 
#> $bad_words_ids
#> NULL
#> 
#> $num_return_sequences
#> [1] 1
#> 
#> $output_scores
#> [1] FALSE
#> 
#> $return_dict_in_generate
#> [1] "TRUE"
#> 
#> $forced_bos_token_id
#> NULL
#> 
#> $forced_eos_token_id
#> NULL
#> 
#> $remove_invalid_values
#> [1] FALSE
#> 
#> $exponential_decay_length_penalty
#> NULL
#> 
#> $suppress_tokens
#> NULL
#> 
#> $begin_suppress_tokens
#> NULL
#> 
#> $architectures
#> [1] "BertForMaskedLM"
#> 
#> $finetuning_task
#> NULL
#> 
#> $id2label
#> $id2label$`0`
#> [1] "LABEL_0"
#> 
#> $id2label$`1`
#> [1] "LABEL_1"
#> 
#> 
#> $label2id
#> $label2id$LABEL_0
#> [1] 0
#> 
#> $label2id$LABEL_1
#> [1] 1
#> 
#> 
#> $tokenizer_class
#> NULL
#> 
#> $prefix
#> NULL
#> 
#> $bos_token_id
#> NULL
#> 
#> $pad_token_id
#> [1] 0
#> 
#> $eos_token_id
#> NULL
#> 
#> $sep_token_id
#> NULL
#> 
#> $decoder_start_token_id
#> NULL
#> 
#> $task_specific_params
#> NULL
#> 
#> $problem_type
#> NULL
#> 
#> $`_name_or_path`
#> [1] "bert-base-uncased"
#> 
#> $`_attn_implementation_autoset`
#> [1] TRUE
#> 
#> $transformers_version
#> [1] "4.48.0"
#> 
#> $gradient_checkpointing
#> [1] FALSE
#> 
#> $model_type
#> [1] "bert"
#> 
#> $vocab_size
#> [1] 30522
#> 
#> $hidden_size
#> [1] 768
#> 
#> $num_hidden_layers
#> [1] 12
#> 
#> $num_attention_heads
#> [1] 12
#> 
#> $hidden_act
#> [1] "gelu"
#> 
#> $intermediate_size
#> [1] 3072
#> 
#> $hidden_dropout_prob
#> [1] 0.1
#> 
#> $attention_probs_dropout_prob
#> [1] 0.1
#> 
#> $max_position_embeddings
#> [1] 512
#> 
#> $type_vocab_size
#> [1] 2
#> 
#> $initializer_range
#> [1] 0.02
#> 
#> $layer_norm_eps
#> [1] 1e-12
#> 
#> $position_embedding_type
#> [1] "absolute"
#> 
#> $use_cache
#> [1] TRUE
#> 
#> $classifier_dropout
#> NULL
#>

Usage

Arguments

Value

Details

See also

Examples