|
|
|
|
@@ -89,7 +89,7 @@ each other. The process is the following:
|
|
|
|
|
>>> import torch
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
|
|
|
|
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True)
|
|
|
|
|
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
|
|
|
|
|
|
|
|
|
|
>>> classes = ["not paraphrase", "is paraphrase"]
|
|
|
|
|
|
|
|
|
|
@@ -122,7 +122,7 @@ each other. The process is the following:
|
|
|
|
|
>>> import tensorflow as tf
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
|
|
|
|
|
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
|
|
|
|
|
|
|
|
|
|
>>> classes = ["not paraphrase", "is paraphrase"]
|
|
|
|
|
|
|
|
|
|
@@ -211,7 +211,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
|
|
|
|
|
>>> import torch
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
|
|
|
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)
|
|
|
|
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
|
|
|
|
|
|
|
|
|
>>> text = r"""
|
|
|
|
|
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
|
|
|
|
|
@@ -253,7 +253,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
|
|
|
|
|
>>> import tensorflow as tf
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
|
|
|
|
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
|
|
|
|
|
|
|
|
|
>>> text = r"""
|
|
|
|
|
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
|
|
|
|
|
@@ -373,7 +373,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
|
|
|
|
|
>>> import torch
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True)
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased")
|
|
|
|
|
|
|
|
|
|
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
|
|
|
|
|
|
|
|
|
|
@@ -389,7 +389,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
|
|
|
|
|
>>> import tensorflow as tf
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased")
|
|
|
|
|
|
|
|
|
|
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
|
|
|
|
|
|
|
|
|
|
@@ -437,7 +437,7 @@ of tokens.
|
|
|
|
|
>>> from torch.nn import functional as F
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("gpt2", return_dict=True)
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("gpt2")
|
|
|
|
|
|
|
|
|
|
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
|
|
|
|
|
|
|
|
|
|
@@ -461,7 +461,7 @@ of tokens.
|
|
|
|
|
>>> import tensorflow as tf
|
|
|
|
|
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2")
|
|
|
|
|
|
|
|
|
|
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
|
|
|
|
|
|
|
|
|
|
@@ -520,7 +520,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
|
|
|
|
|
>>> ## PYTORCH CODE
|
|
|
|
|
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True)
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
|
|
|
|
|
|
|
|
|
|
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
|
|
|
|
|
@@ -545,7 +545,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
|
|
|
|
|
>>> ## TENSORFLOW CODE
|
|
|
|
|
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
|
|
|
|
|
|
|
|
|
|
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
|
|
|
|
|
@@ -664,7 +664,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
|
|
|
|
|
>>> from transformers import AutoModelForTokenClassification, AutoTokenizer
|
|
|
|
|
>>> import torch
|
|
|
|
|
|
|
|
|
|
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True)
|
|
|
|
|
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
|
|
|
|
|
|
|
|
|
>>> label_list = [
|
|
|
|
|
@@ -692,7 +692,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
|
|
|
|
|
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
|
|
|
|
|
>>> import tensorflow as tf
|
|
|
|
|
|
|
|
|
|
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
|
|
|
|
|
|
|
|
|
>>> label_list = [
|
|
|
|
|
@@ -790,7 +790,7 @@ CNN / Daily Mail), it yields very good results.
|
|
|
|
|
>>> ## PYTORCH CODE
|
|
|
|
|
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("t5-base")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
|
|
|
|
|
|
|
|
|
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
|
|
|
|
|
@@ -799,7 +799,7 @@ CNN / Daily Mail), it yields very good results.
|
|
|
|
|
>>> ## TENSORFLOW CODE
|
|
|
|
|
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
|
|
|
|
|
|
|
|
|
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
|
|
|
|
|
@@ -843,7 +843,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
|
|
|
|
|
>>> ## PYTORCH CODE
|
|
|
|
|
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
|
|
|
|
>>> model = AutoModelWithLMHead.from_pretrained("t5-base")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
|
|
|
|
|
|
|
|
|
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="pt")
|
|
|
|
|
@@ -851,7 +851,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
|
|
|
|
|
>>> ## TENSORFLOW CODE
|
|
|
|
|
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
|
|
|
|
|
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
|
|
|
|
|
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
|
|
|
|
|
|
|
|
|
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="tf")
|
|
|
|
|
|