This commit is contained in:
Lysandre
2019-11-20 09:42:48 -05:00
parent f3386d9383
commit 454455c695

View File

@@ -240,6 +240,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
# The -3 accounts for [CLS], [SEP] and [SEP] # The -3 accounts for [CLS], [SEP] and [SEP]
max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
assert max_tokens_for_doc > 0
# We can have documents that are longer than the maximum sequence length. # We can have documents that are longer than the maximum sequence length.
# To deal with this we do a sliding window approach, where we take chunks # To deal with this we do a sliding window approach, where we take chunks