fix some typos
This commit is contained in:
committed by
Lysandre Debut
parent
84a0b522cf
commit
cb7b77a8a2
@@ -12,7 +12,7 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Tokenization classes for OpenAI GPT."""
|
"""Tokenization classes for XLM."""
|
||||||
from __future__ import (absolute_import, division, print_function,
|
from __future__ import (absolute_import, division, print_function,
|
||||||
unicode_literals)
|
unicode_literals)
|
||||||
|
|
||||||
@@ -758,9 +758,9 @@ class XLMTokenizer(PreTrainedTokenizer):
|
|||||||
"""
|
"""
|
||||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks
|
Build model inputs from a sequence or a pair of sequence for sequence classification tasks
|
||||||
by concatenating and adding special tokens.
|
by concatenating and adding special tokens.
|
||||||
A RoBERTa sequence has the following format:
|
A XLM sequence has the following format:
|
||||||
single sequence: <s> X </s>
|
single sequence: <s> X </s>
|
||||||
pair of sequences: <s> A </s></s> B </s>
|
pair of sequences: <s> A </s> B </s>
|
||||||
"""
|
"""
|
||||||
if token_ids_1 is None:
|
if token_ids_1 is None:
|
||||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||||
|
|||||||
Reference in New Issue
Block a user