From 2a358f45efd27f40134692229f1665ffbdc729f7 Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Thu, 1 Oct 2020 12:51:09 -0400 Subject: [PATCH] [s2s] fix nltk pytest race condition with FileLock (#7515) --- examples/seq2seq/sentence_splitter.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/seq2seq/sentence_splitter.py b/examples/seq2seq/sentence_splitter.py index 197c4b250b..62fec13c3c 100644 --- a/examples/seq2seq/sentence_splitter.py +++ b/examples/seq2seq/sentence_splitter.py @@ -1,5 +1,7 @@ import re +from filelock import FileLock + try: import nltk @@ -9,13 +11,12 @@ except (ImportError, ModuleNotFoundError): NLTK_AVAILABLE = False if NLTK_AVAILABLE: - try: + with FileLock("a_random_string") as lock: nltk.download("punkt", quiet=True) - except FileExistsError: # multiprocessing race condition - pass def add_newline_to_end_of_each_sentence(x: str) -> str: + """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" re.sub("", "", x) # remove pegasus newline char - assert NLTK_AVAILABLE, "nltk must be installed to separate newlines betwee sentences. (pip install nltk)" + assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)" return "\n".join(nltk.sent_tokenize(x))