From 5eab3cf6bce7b6f11793056d8772aeb6e761ac4f Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sat, 21 Dec 2019 18:03:57 +0100 Subject: [PATCH] Fix W605 flake8 warning (x5). --- examples/contrib/run_openai_gpt.py | 4 ++-- transformers/tokenization_xlm.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/contrib/run_openai_gpt.py b/examples/contrib/run_openai_gpt.py index e35f3d4fe9..80331f3402 100644 --- a/examples/contrib/run_openai_gpt.py +++ b/examples/contrib/run_openai_gpt.py @@ -22,8 +22,8 @@ --model_name openai-gpt \ --do_train \ --do_eval \ - --train_dataset $ROC_STORIES_DIR/cloze_test_val__spring2016\ -\ cloze_test_ALL_val.csv \ - --eval_dataset $ROC_STORIES_DIR/cloze_test_test__spring2016\ -\ cloze_test_ALL_test.csv \ + --train_dataset "$ROC_STORIES_DIR/cloze_test_val__spring2016 - cloze_test_ALL_val.csv" \ + --eval_dataset "$ROC_STORIES_DIR/cloze_test_test__spring2016 - cloze_test_ALL_test.csv" \ --output_dir ../log \ --train_batch_size 16 \ """ diff --git a/transformers/tokenization_xlm.py b/transformers/tokenization_xlm.py index 7ef53cf80a..4651629312 100644 --- a/transformers/tokenization_xlm.py +++ b/transformers/tokenization_xlm.py @@ -725,10 +725,10 @@ class XLMTokenizer(PreTrainedTokenizer): make && make install pip install kytea ``` - - [jieba](https://github.com/fxsjy/jieba): Chinese tokenizer * + - [jieba](https://github.com/fxsjy/jieba): Chinese tokenizer (*) - Install with `pip install jieba` - \* The original XLM used [Stanford Segmenter](https://nlp.stanford.edu/software/stanford-segmenter-2018-10-16.zip). + (*) The original XLM used [Stanford Segmenter](https://nlp.stanford.edu/software/stanford-segmenter-2018-10-16.zip). However, the wrapper (`nltk.tokenize.stanford_segmenter`) is slow due to JVM overhead, and it will be deprecated. Jieba is a lot faster and pip-installable. Note there is some mismatch with the Stanford Segmenter. It should be fine if you fine-tune the model with Chinese supervisionself. If you want the same exact behaviour, use the original XLM