From 2421e54f8c354fc110a7f8819a9161163813f7ad Mon Sep 17 00:00:00 2001 From: Juha Kiili Date: Fri, 29 Nov 2019 15:39:28 +0200 Subject: [PATCH] Add link to original source and license to download_glue.data.py --- utils/download_glue_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/download_glue_data.py b/utils/download_glue_data.py index 86a4e8951f..f676a71c76 100644 --- a/utils/download_glue_data.py +++ b/utils/download_glue_data.py @@ -1,5 +1,8 @@ ''' Script for downloading all GLUE data. +Original source: https://github.com/kamalkraj/ALBERT-TF2.0/blob/fa90194e5fe729dbb19f32ac29c8d6d6372c0f93/download_glue_data.py +Original license: https://github.com/kamalkraj/ALBERT-TF2.0/blob/fa90194e5fe729dbb19f32ac29c8d6d6372c0f93/LICENSE (Apache-2.0) + Note: for legal reasons, we are unable to host MRPC. You can either use the version hosted by the SentEval team, which is already tokenized, or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually.