From e3ef62bce150e9200b70d46d3abbc094364330eb Mon Sep 17 00:00:00 2001 From: mdermentzi Date: Fri, 27 Nov 2020 14:34:57 +0100 Subject: [PATCH] Update README.md (#8815) The tokenizer called at the input_ids of example 2 is currently encoding text_1. I think this should be changed to text_2. --- model_cards/nlpaueb/bert-base-greek-uncased-v1/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model_cards/nlpaueb/bert-base-greek-uncased-v1/README.md b/model_cards/nlpaueb/bert-base-greek-uncased-v1/README.md index a8243a60b8..90e10fcb62 100644 --- a/model_cards/nlpaueb/bert-base-greek-uncased-v1/README.md +++ b/model_cards/nlpaueb/bert-base-greek-uncased-v1/README.md @@ -91,7 +91,7 @@ print(tokenizer_greek.convert_ids_to_tokens(outputs[0, 5].max(0)[1].item())) # ================ EXAMPLE 2 ================ text_2 = 'Είναι ένας [MASK] άνθρωπος.' # EN: 'He is a [MASK] person.' -input_ids = tokenizer_greek.encode(text_1) +input_ids = tokenizer_greek.encode(text_2) print(tokenizer_greek.convert_ids_to_tokens(input_ids)) # ['[CLS]', 'ειναι', 'ενας', '[MASK]', 'ανθρωπος', '.', '[SEP]'] outputs = lm_model_greek(torch.tensor([input_ids]))[0]