How to train a new language model from scratch using Transformers and Tokenizers
julien-c
• • 61How to use julien-c/EsperBERTo-small with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("fill-mask", model="julien-c/EsperBERTo-small") # Load model directly
from transformers import AutoTokenizer, AutoModelForMaskedLM
tokenizer = AutoTokenizer.from_pretrained("julien-c/EsperBERTo-small")
model = AutoModelForMaskedLM.from_pretrained("julien-c/EsperBERTo-small")Companion model to blog post https://huggingface.co/blog/how-to-train 🔥
galinettefrom transformers import pipeline
fill_mask = pipeline(
"fill-mask",
model="julien-c/EsperBERTo-small",
tokenizer="julien-c/EsperBERTo-small"
)
fill_mask("Jen la komenco de bela <mask>.")
# This is the beginning of a beautiful <mask>.
# =>
# {
# 'score':0.06502299010753632
# 'sequence':'<s> Jen la komenco de bela vivo.</s>'
# 'token':1099
# }
# {
# 'score':0.0421181358397007
# 'sequence':'<s> Jen la komenco de bela vespero.</s>'
# 'token':5100
# }
# {
# 'score':0.024884626269340515
# 'sequence':'<s> Jen la komenco de bela laboro.</s>'
# 'token':1570
# }
# {
# 'score':0.02324388362467289
# 'sequence':'<s> Jen la komenco de bela tago.</s>'
# 'token':1688
# }
# {
# 'score':0.020378097891807556
# 'sequence':'<s> Jen la komenco de bela festo.</s>'
# 'token':4580
# }