From 297233fa92f4f47e77762430369a61d17ba57134 Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Thu, 8 Oct 2020 21:22:22 -0400 Subject: [PATCH] [s2s] Switch README urls to cdn (#7670) --- examples/seq2seq/README.md | 8 ++++---- examples/seq2seq/finetune_bart_tiny.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/seq2seq/README.md b/examples/seq2seq/README.md index 2efde25c50..cc411da7a2 100644 --- a/examples/seq2seq/README.md +++ b/examples/seq2seq/README.md @@ -19,7 +19,7 @@ For `bertabs` instructions, see [`bertabs/README.md`](bertabs/README.md). #### XSUM: ```bash cd examples/seq2seq -wget https://s3.amazonaws.com/datasets.huggingface.co/summarization/xsum.tar.gz +wget https://cdn-datasets.huggingface.co/summarization/xsum.tar.gz tar -xzvf xsum.tar.gz export XSUM_DIR=${PWD}/xsum ``` @@ -29,7 +29,7 @@ To use your own data, copy that files format. Each article to be summarized is o #### CNN/DailyMail ```bash cd examples/seq2seq -wget https://s3.amazonaws.com/datasets.huggingface.co/summarization/cnn_dm_v2.tgz +wget https://cdn-datasets.huggingface.co/summarization/cnn_dm_v2.tgz tar -xzvf cnn_dm_v2.tgz # empty lines removed mv cnn_cln cnn_dm export CNN_DIR=${PWD}/cnn_dm @@ -39,7 +39,7 @@ this should make a directory called `cnn_dm/` with 6 files. #### WMT16 English-Romanian Translation Data: download with this command: ```bash -wget https://s3.amazonaws.com/datasets.huggingface.co/translation/wmt_en_ro.tar.gz +wget https://cdn-datasets.huggingface.co/translation/wmt_en_ro.tar.gz tar -xzvf wmt_en_ro.tar.gz export ENRO_DIR=${PWD}/wmt_en_ro ``` @@ -47,7 +47,7 @@ this should make a directory called `wmt_en_ro/` with 6 files. #### WMT English-German: ```bash -wget https://s3.amazonaws.com/datasets.huggingface.co/translation/wmt_en_de.tgz +wget https://cdn-datasets.huggingface.co/translation/wmt_en_de.tgz tar -xzvf wmt_en_de.tgz export DATA_DIR=${PWD}/wmt_en_de ``` diff --git a/examples/seq2seq/finetune_bart_tiny.sh b/examples/seq2seq/finetune_bart_tiny.sh index dcdb0db979..f0289b45ab 100755 --- a/examples/seq2seq/finetune_bart_tiny.sh +++ b/examples/seq2seq/finetune_bart_tiny.sh @@ -1,7 +1,7 @@ # Script for verifying that run_bart_sum can be invoked from its directory # Get tiny dataset with cnn_dm format (4 examples for train, val, test) -wget https://s3.amazonaws.com/datasets.huggingface.co/summarization/cnn_tiny.tgz +wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz tar -xzvf cnn_tiny.tgz rm cnn_tiny.tgz