Fixed bug to calculate correct xpath_sub_list in MarkupLMTokenizer (#22302)

Fixed bug to calculate correct xpath_sub_list in MarkupLMTokenizer. Earlier xpath_sub_list was same as xpath_tags_list

Co-authored-by: dusejat <dusejat@amazon.com>
This commit is contained in:
silentghoul-spec
2023-03-22 17:37:49 +05:30
committed by GitHub
parent 4e94c6c008
commit 48bef3a734
3 changed files with 8 additions and 14 deletions

View File

@@ -301,7 +301,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
xpath_subs_list.append(min(self.max_width, sub))
xpath_tags_list = xpath_tags_list[: self.max_depth]
xpath_subs_list = xpath_tags_list[: self.max_depth]
xpath_subs_list = xpath_subs_list[: self.max_depth]
xpath_tags_list += [self.pad_tag_id] * (self.max_depth - len(xpath_tags_list))
xpath_subs_list += [self.pad_width] * (self.max_depth - len(xpath_subs_list))

View File

@@ -275,7 +275,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
xpath_subs_list.append(min(self.max_width, sub))
xpath_tags_list = xpath_tags_list[: self.max_depth]
xpath_subs_list = xpath_tags_list[: self.max_depth]
xpath_subs_list = xpath_subs_list[: self.max_depth]
xpath_tags_list += [self.pad_tag_id] * (self.max_depth - len(xpath_tags_list))
xpath_subs_list += [self.pad_width] * (self.max_depth - len(xpath_subs_list))

File diff suppressed because one or more lines are too long