Fixed some typos and removed legacy url (#10989)
* Fixed typos * Removed legacy colab notebook from readme Co-authored-by: WybeKoper <WybeKoper@users.noreply.github.com>
This commit is contained in:
@@ -129,6 +129,3 @@ python ./examples/multiple-choice/run_tf_multiple_choice.py \
|
|||||||
--gradient_accumulation_steps 2 \
|
--gradient_accumulation_steps 2 \
|
||||||
--overwrite_output
|
--overwrite_output
|
||||||
```
|
```
|
||||||
|
|
||||||
# Run it in colab
|
|
||||||
[](https://colab.research.google.com/github/ViktorAlm/notebooks/blob/master/MPC_GPU_Demo_for_TF_and_PT.ipynb)
|
|
||||||
|
|||||||
@@ -1302,10 +1302,10 @@ class GenerationMixin:
|
|||||||
# argmax
|
# argmax
|
||||||
next_tokens = torch.argmax(next_tokens_scores, dim=-1)
|
next_tokens = torch.argmax(next_tokens_scores, dim=-1)
|
||||||
|
|
||||||
# add code that transfomers next_tokens to tokens_to_add
|
# add code that transforms next_tokens to tokens_to_add
|
||||||
if eos_token_id is not None:
|
if eos_token_id is not None:
|
||||||
assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined."
|
assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined."
|
||||||
next_tokens = next_tokens * unfinished_sequences + (pad_token_id) * (1 - unfinished_sequences)
|
next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
|
||||||
|
|
||||||
# add token and increase length by one
|
# add token and increase length by one
|
||||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||||
@@ -1526,10 +1526,10 @@ class GenerationMixin:
|
|||||||
|
|
||||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
|
||||||
# add code that transfomers next_tokens to tokens_to_add
|
# add code that transforms next_tokens to tokens_to_add
|
||||||
if eos_token_id is not None:
|
if eos_token_id is not None:
|
||||||
assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined."
|
assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined."
|
||||||
next_tokens = next_tokens * unfinished_sequences + (pad_token_id) * (1 - unfinished_sequences)
|
next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
|
||||||
|
|
||||||
# add token and increase length by one
|
# add token and increase length by one
|
||||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ def load_vocab(vocab_file):
|
|||||||
|
|
||||||
class XLMProphetNetTokenizer(PreTrainedTokenizer):
|
class XLMProphetNetTokenizer(PreTrainedTokenizer):
|
||||||
"""
|
"""
|
||||||
Adapted from :class:`~transfomers.RobertaTokenizer` and class:`~transfomers.XLNetTokenizer`. Based on
|
Adapted from :class:`~transformers.RobertaTokenizer` and class:`~transformers.XLNetTokenizer`. Based on
|
||||||
`SentencePiece <https://github.com/google/sentencepiece>`__.
|
`SentencePiece <https://github.com/google/sentencepiece>`__.
|
||||||
|
|
||||||
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods.
|
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods.
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
|||||||
class XLMRobertaTokenizerFast(PreTrainedTokenizerFast):
|
class XLMRobertaTokenizerFast(PreTrainedTokenizerFast):
|
||||||
"""
|
"""
|
||||||
Construct a "fast" XLM-RoBERTa tokenizer (backed by HuggingFace's `tokenizers` library). Adapted from
|
Construct a "fast" XLM-RoBERTa tokenizer (backed by HuggingFace's `tokenizers` library). Adapted from
|
||||||
:class:`~transfomers.RobertaTokenizer` and class:`~transfomers.XLNetTokenizer`. Based on `BPE
|
:class:`~transformers.RobertaTokenizer` and class:`~transformers.XLNetTokenizer`. Based on `BPE
|
||||||
<https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=BPE#models>`__.
|
<https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=BPE#models>`__.
|
||||||
|
|
||||||
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
|
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
|
||||||
|
|||||||
Reference in New Issue
Block a user