docstrings

This commit is contained in:
thomwolf
2019-02-07 17:07:22 +01:00
parent 438db43d46
commit f99f2fb661
2 changed files with 87 additions and 19 deletions

View File

@@ -193,6 +193,7 @@ class OpenAIGPTTokenizer(object):
return word
def tokenize(self, text):
""" Tokenize a string. """
split_tokens = []
text = self.nlp(text_standardize(self.fix_text(text)))
for token in text:
@@ -200,7 +201,7 @@ class OpenAIGPTTokenizer(object):
return split_tokens
def convert_tokens_to_ids(self, tokens):
"""Converts a sequence of tokens into ids using the vocab."""
""" Converts a sequence of tokens into ids using the vocab. """
ids = []
if isinstance(tokens, str) or (sys.version_info[0] == 2 and isinstance(tokens, unicode)):
if tokens in self.special_tokens: