updating hub
This commit is contained in:
@@ -23,6 +23,9 @@ bert_docstring = """
|
||||
. `bert-base-multilingual-uncased`
|
||||
. `bert-base-multilingual-cased`
|
||||
. `bert-base-chinese`
|
||||
. `bert-base-german-cased`
|
||||
. `bert-large-uncased-whole-word-masking`
|
||||
. `bert-large-cased-whole-word-masking`
|
||||
- a path or url to a pretrained model archive containing:
|
||||
. `bert_config.json` a configuration file for the model
|
||||
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining
|
||||
@@ -81,6 +84,7 @@ def bertTokenizer(*args, **kwargs):
|
||||
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
|
||||
|
||||
Example:
|
||||
>>> import torch
|
||||
>>> sentence = 'Hello, World!'
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
>>> toks = tokenizer.tokenize(sentence)
|
||||
@@ -101,6 +105,7 @@ def bertModel(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -129,6 +134,7 @@ def bertForNextSentencePrediction(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -158,6 +164,7 @@ def bertForPreTraining(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -181,6 +188,7 @@ def bertForMaskedLM(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -222,6 +230,7 @@ def bertForSequenceClassification(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -256,6 +265,7 @@ def bertForMultipleChoice(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -288,6 +298,7 @@ def bertForQuestionAnswering(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
@@ -326,6 +337,7 @@ def bertForTokenClassification(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
|
||||
# Prepare tokenized input
|
||||
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
|
||||
|
||||
@@ -11,7 +11,7 @@ gpt2_docstring = """
|
||||
Params:
|
||||
pretrained_model_name_or_path: either:
|
||||
- a str with the name of a pre-trained model to load selected in the list of:
|
||||
. `gpt2`
|
||||
. `gpt2`, `gpt2-medium`
|
||||
- a path or url to a pretrained model archive containing:
|
||||
. `gpt2_config.json` a configuration file for the model
|
||||
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
|
||||
@@ -147,10 +147,14 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
|
||||
|
||||
# Prepare tokenized input
|
||||
>>> text = "Who was Jim Henson ?"
|
||||
>>> indexed_tokens = tokenizer.encode(text)
|
||||
>>> tokens_tensor = torch.tensor([indexed_tokens])
|
||||
>>> mc_token_ids = torch.LongTensor([ [len(indexed_tokens)] ])
|
||||
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
|
||||
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
|
||||
>>> tokenized_text1 = tokenizer.tokenize(text1)
|
||||
>>> tokenized_text2 = tokenizer.tokenize(text2)
|
||||
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
|
||||
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
|
||||
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
|
||||
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
|
||||
|
||||
# Load gpt2DoubleHeadsModel
|
||||
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2')
|
||||
|
||||
@@ -126,7 +126,7 @@ def openAIGPTLMHeadModel(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt')
|
||||
|
||||
# Prepare tokenized input
|
||||
@@ -161,15 +161,18 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
|
||||
|
||||
Example:
|
||||
# Load the tokenizer
|
||||
>>> import torch
|
||||
>>> import torch
|
||||
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt')
|
||||
|
||||
# Prepare tokenized input
|
||||
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer"
|
||||
>>> tokenized_text = tokenizer.tokenize(text)
|
||||
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
|
||||
>>> tokens_tensor = torch.tensor([indexed_tokens])
|
||||
>>> mc_token_ids = torch.LongTensor([ [len(tokenized_text)] ])
|
||||
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
|
||||
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
|
||||
>>> tokenized_text1 = tokenizer.tokenize(text1)
|
||||
>>> tokenized_text2 = tokenizer.tokenize(text2)
|
||||
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
|
||||
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
|
||||
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
|
||||
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
|
||||
|
||||
# Load openAIGPTDoubleHeadsModel
|
||||
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
|
||||
|
||||
Reference in New Issue
Block a user