directly load from TF checkpoints + code cleanup

This commit is contained in:
thomwolf
2019-01-28 16:50:23 +01:00
parent 9c35c132fa
commit d77dd62ff8
8 changed files with 225 additions and 178 deletions

View File

@@ -130,6 +130,9 @@ class OpenAIGPTTokenizer(object):
else:
self.special_tokens = dict((tok, len(self.encoder) + i) for i, tok in enumerate(special_tokens))
def __len__(self):
return len(self.encoder) + len(self.special_tokens)
def set_special_tokens(self, special_tokens):
self.special_tokens = dict((tok, len(self.encoder) + i) for i, tok in enumerate(special_tokens))