From ee9eae4e06386763f36f05a345d7470b7ba3464a Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Fri, 18 Sep 2020 12:18:06 +0200 Subject: [PATCH] token-classification: update url of GermEval 2014 dataset (#6571) --- examples/token-classification/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/token-classification/README.md b/examples/token-classification/README.md index fb6291fc37..8b2c2335ac 100644 --- a/examples/token-classification/README.md +++ b/examples/token-classification/README.md @@ -19,11 +19,11 @@ Data can be obtained from the [GermEval 2014](https://sites.google.com/site/germ Here are the commands for downloading and pre-processing train, dev and test datasets. The original data format has four (tab-separated) columns, in a pre-processing step only the two relevant columns (token and outer span NER annotation) are extracted: ```bash -curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \ +curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \ | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp -curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \ +curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \ | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp -curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \ +curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \ | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp ```