[s2s] fix nltk pytest race condition with FileLock (#7515)

This commit is contained in:
Sam Shleifer
2020-10-01 12:51:09 -04:00
committed by GitHub
parent 72d363d979
commit 2a358f45ef

View File

@@ -1,5 +1,7 @@
import re
from filelock import FileLock
try:
import nltk
@@ -9,13 +11,12 @@ except (ImportError, ModuleNotFoundError):
NLTK_AVAILABLE = False
if NLTK_AVAILABLE:
try:
with FileLock("a_random_string") as lock:
nltk.download("punkt", quiet=True)
except FileExistsError: # multiprocessing race condition
pass
def add_newline_to_end_of_each_sentence(x: str) -> str:
"""This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
re.sub("<n>", "", x) # remove pegasus newline char
assert NLTK_AVAILABLE, "nltk must be installed to separate newlines betwee sentences. (pip install nltk)"
assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)"
return "\n".join(nltk.sent_tokenize(x))