Returns the configuration of a masked model.
Usage
masked_config(
model = getOption("pangoling.masked.default"),
config_model = NULL
)
Arguments
- model
Name of a pre-trained model or folder. One should be able to use models based on "bert". See hugging face website.
- config_model
List with other arguments that control how the model from Hugging Face is accessed.
Details
A masked language model (also called BERT-like, or encoder model) is a type of large language model that can be used to predict the content of a mask in a sentence.
If not specified, the masked model that will be used is the one set in
specified in the global option pangoling.masked.default
, this can be
accessed via getOption("pangoling.masked.default")
(by default
"bert-base-uncased"). To change the default option
use options(pangoling.masked.default = "newmaskedmodel")
.
A list of possible masked can be found in Hugging Face website
Using the config_model
and config_tokenizer
arguments, it's possible to
control how the model and tokenizer from Hugging Face is accessed, see the
python method
from_pretrained
for details. In case of errors check the status of
https://status.huggingface.co/
See also
Other masked model helper functions:
masked_preload()
Examples
masked_config(model = "bert-base-uncased")
#> $return_dict
#> [1] TRUE
#>
#> $output_hidden_states
#> [1] FALSE
#>
#> $output_attentions
#> [1] FALSE
#>
#> $torchscript
#> [1] FALSE
#>
#> $torch_dtype
#> NULL
#>
#> $use_bfloat16
#> [1] FALSE
#>
#> $tf_legacy_loss
#> [1] FALSE
#>
#> $pruned_heads
#> named list()
#>
#> $tie_word_embeddings
#> [1] TRUE
#>
#> $chunk_size_feed_forward
#> [1] 0
#>
#> $is_encoder_decoder
#> [1] FALSE
#>
#> $is_decoder
#> [1] FALSE
#>
#> $cross_attention_hidden_size
#> NULL
#>
#> $add_cross_attention
#> [1] FALSE
#>
#> $tie_encoder_decoder
#> [1] FALSE
#>
#> $max_length
#> [1] 20
#>
#> $min_length
#> [1] 0
#>
#> $do_sample
#> [1] FALSE
#>
#> $early_stopping
#> [1] FALSE
#>
#> $num_beams
#> [1] 1
#>
#> $num_beam_groups
#> [1] 1
#>
#> $diversity_penalty
#> [1] 0
#>
#> $temperature
#> [1] 1
#>
#> $top_k
#> [1] 50
#>
#> $top_p
#> [1] 1
#>
#> $typical_p
#> [1] 1
#>
#> $repetition_penalty
#> [1] 1
#>
#> $length_penalty
#> [1] 1
#>
#> $no_repeat_ngram_size
#> [1] 0
#>
#> $encoder_no_repeat_ngram_size
#> [1] 0
#>
#> $bad_words_ids
#> NULL
#>
#> $num_return_sequences
#> [1] 1
#>
#> $output_scores
#> [1] FALSE
#>
#> $return_dict_in_generate
#> [1] "TRUE"
#>
#> $forced_bos_token_id
#> NULL
#>
#> $forced_eos_token_id
#> NULL
#>
#> $remove_invalid_values
#> [1] FALSE
#>
#> $exponential_decay_length_penalty
#> NULL
#>
#> $suppress_tokens
#> NULL
#>
#> $begin_suppress_tokens
#> NULL
#>
#> $architectures
#> [1] "BertForMaskedLM"
#>
#> $finetuning_task
#> NULL
#>
#> $id2label
#> $id2label$`0`
#> [1] "LABEL_0"
#>
#> $id2label$`1`
#> [1] "LABEL_1"
#>
#>
#> $label2id
#> $label2id$LABEL_0
#> [1] 0
#>
#> $label2id$LABEL_1
#> [1] 1
#>
#>
#> $tokenizer_class
#> NULL
#>
#> $prefix
#> NULL
#>
#> $bos_token_id
#> NULL
#>
#> $pad_token_id
#> [1] 0
#>
#> $eos_token_id
#> NULL
#>
#> $sep_token_id
#> NULL
#>
#> $decoder_start_token_id
#> NULL
#>
#> $task_specific_params
#> NULL
#>
#> $problem_type
#> NULL
#>
#> $`_name_or_path`
#> [1] "bert-base-uncased"
#>
#> $`_attn_implementation_autoset`
#> [1] TRUE
#>
#> $transformers_version
#> [1] "4.48.0"
#>
#> $gradient_checkpointing
#> [1] FALSE
#>
#> $model_type
#> [1] "bert"
#>
#> $vocab_size
#> [1] 30522
#>
#> $hidden_size
#> [1] 768
#>
#> $num_hidden_layers
#> [1] 12
#>
#> $num_attention_heads
#> [1] 12
#>
#> $hidden_act
#> [1] "gelu"
#>
#> $intermediate_size
#> [1] 3072
#>
#> $hidden_dropout_prob
#> [1] 0.1
#>
#> $attention_probs_dropout_prob
#> [1] 0.1
#>
#> $max_position_embeddings
#> [1] 512
#>
#> $type_vocab_size
#> [1] 2
#>
#> $initializer_range
#> [1] 0.02
#>
#> $layer_norm_eps
#> [1] 1e-12
#>
#> $position_embedding_type
#> [1] "absolute"
#>
#> $use_cache
#> [1] TRUE
#>
#> $classifier_dropout
#> NULL
#>