clean for release

2019-12-06 22:01:48 +01:00
parent 2a64107e44
commit f7eba09007
8 changed files with 49 additions and 376 deletions
--- a/examples/summarization/modeling_bertabs.py
+++ b/examples/summarization/modeling_bertabs.py
@@ -1,6 +1,6 @@
 # MIT License

-# Copyright (c) 2019 Yang Liu
+# Copyright (c) 2019 Yang Liu and the HuggingFace team

 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/examples/summarization/requirements.txt
+++ b/examples/summarization/requirements.txt
@@ -0,0 +1,9 @@
+# progress bars in model download and training scripts
+tqdm
+# Accessing files from S3 directly.
+boto3
+# Used for downloading models over HTTP
+requests
+# For ROUGE
+nltk
+py-rouge
--- a/examples/summarization/run_summarization.py
+++ b/examples/summarization/run_summarization.py
@@ -1,3 +1,4 @@
+#! /usr/bin/python3
 import argparse
 from collections import namedtuple
 import logging
@@ -97,6 +98,32 @@ def evaluate(args):
        print(str_scores)


+def save_summaries(summaries, path, original_document_name):
+    """ Write the summaries in fies that are prefixed by the original
+    files' name with the `_summary` appended.
+
+    Attributes:
+        original_document_names: List[string]
+            Name of the document that was summarized.
+        path: string
+            Path were the summaries will be written
+        summaries: List[string]
+            The summaries that we produced.
+    """
+    for summary, document_name in zip(summaries, original_document_name):
+        # Prepare the summary file's name
+        if "." in document_name:
+            bare_document_name = ".".join(document_name.split(".")[:-1])
+            extension = document_name.split(".")[-1]
+            name = bare_document_name + "_summary." + extension
+        else:
+            name = document_name + "_summary"
+
+        file_path = os.path.join(path, name)
+        with open(file_path, "w") as output:
+            output.write(summary)
+
+
 def format_summary(translation):
    """ Transforms the output of the `from_batch` function
    into nicely formatted summaries.
@@ -151,32 +178,6 @@ def save_rouge_scores(str_scores):
        output.write(str_scores)


-def save_summaries(summaries, path, original_document_name):
-    """ Write the summaries in fies that are prefixed by the original
-    files' name with the `_summary` appended.
-
-    Attributes:
-        original_document_names: List[string]
-            Name of the document that was summarized.
-        path: string
-            Path were the summaries will be written
-        summaries: List[string]
-            The summaries that we produced.
-    """
-    for summary, document_name in zip(summaries, original_document_name):
-        # Prepare the summary file's name
-        if "." in document_name:
-            bare_document_name = ".".join(document_name.split(".")[:-1])
-            extension = document_name.split(".")[-1]
-            name = bare_document_name + "_summary." + extension
-        else:
-            name = document_name + "_summary"
-
-        file_path = os.path.join(path, name)
-        with open(file_path, "w") as output:
-            output.write(summary)
-
-
 #
 # LOAD the dataset
 #
@@ -323,7 +324,7 @@ def main():
        raise FileNotFoundError(
            "We could not find the directory you specified for the documents to summarize, or it was empty. Please specify a valid path."
        )
-    maybe_create_output_dir(args.summaries_output_dir)
+    os.makedirs(args.summaries_output_dir, exist_ok=True)

    evaluate(args)

@@ -339,10 +340,5 @@ def documents_dir_is_valid(path):
    return True


-def maybe_create_output_dir(path):
-    if not os.path.exists(path):
-        os.makedirs(path)
-
-
 if __name__ == "__main__":
    main()