[s2s] rougeLSum expects \n between sentences (#7410)

Co-authored-by: Swetha Mandava <smandava@nvidia.com>
This commit is contained in:
Sam Shleifer
2020-09-27 16:27:19 -04:00
committed by GitHub
parent eab5f59682
commit 7296fea1d6
7 changed files with 176 additions and 14 deletions

View File

@@ -0,0 +1,21 @@
import re
try:
import nltk
NLTK_AVAILABLE = True
except (ImportError, ModuleNotFoundError):
NLTK_AVAILABLE = False
if NLTK_AVAILABLE:
try:
nltk.download("punkt", quiet=True)
except FileExistsError: # multiprocessing race condition
pass
def add_newline_to_end_of_each_sentence(x: str) -> str:
re.sub("<n>", "", x) # remove pegasus newline char
assert NLTK_AVAILABLE, "nltk must be installed to separate newlines betwee sentences. (pip install nltk)"
return "\n".join(nltk.sent_tokenize(x))