@@ -23,8 +23,6 @@ import re
|
|||||||
# Original: https://github.com/google-research/tapas/master/wikisql_utils.py
|
# Original: https://github.com/google-research/tapas/master/wikisql_utils.py
|
||||||
from typing import Any, List, Text
|
from typing import Any, List, Text
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
|
|
||||||
EMPTY_ANSWER = "none"
|
EMPTY_ANSWER = "none"
|
||||||
EMPTY_ANSWER_AGG = "none"
|
EMPTY_ANSWER_AGG = "none"
|
||||||
@@ -49,7 +47,7 @@ def convert_to_float(value):
|
|||||||
return value
|
return value
|
||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return float(value)
|
return float(value)
|
||||||
if not isinstance(value, six.string_types):
|
if not isinstance(value, str):
|
||||||
raise ValueError("Argument value is not a string. Can't parse it as float")
|
raise ValueError("Argument value is not a string. Can't parse it as float")
|
||||||
sanitized = value
|
sanitized = value
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ import unicodedata
|
|||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import sentencepiece as sp
|
import sentencepiece as sp
|
||||||
import six
|
|
||||||
|
|
||||||
from ...tokenization_utils import PreTrainedTokenizer
|
from ...tokenization_utils import PreTrainedTokenizer
|
||||||
|
|
||||||
@@ -523,17 +522,9 @@ def _is_punctuation(char):
|
|||||||
|
|
||||||
def convert_to_unicode(text):
|
def convert_to_unicode(text):
|
||||||
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
|
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
|
||||||
if six.PY3:
|
|
||||||
if isinstance(text, str):
|
if isinstance(text, str):
|
||||||
return text
|
return text
|
||||||
elif isinstance(text, bytes):
|
elif isinstance(text, bytes):
|
||||||
return text.decode("utf-8", "ignore")
|
return text.decode("utf-8", "ignore")
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported string type: {type(text)}")
|
raise ValueError(f"Unsupported string type: {type(text)}")
|
||||||
elif six.PY2:
|
|
||||||
if isinstance(text, str):
|
|
||||||
return text.decode("utf-8", "ignore")
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported string type: {type(text)}")
|
|
||||||
else:
|
|
||||||
raise ValueError("Not running on Python2 or Python 3?")
|
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from ..xlm.tokenization_xlm import XLMTokenizer
|
from ..xlm.tokenization_xlm import XLMTokenizer
|
||||||
|
|
||||||
@@ -76,16 +74,16 @@ def convert_to_unicode(text):
|
|||||||
"""
|
"""
|
||||||
Converts `text` to Unicode (if it's not already), assuming UTF-8 input.
|
Converts `text` to Unicode (if it's not already), assuming UTF-8 input.
|
||||||
"""
|
"""
|
||||||
# six_ensure_text is copied from https://github.com/benjaminp/six
|
|
||||||
def six_ensure_text(s, encoding="utf-8", errors="strict"):
|
def ensure_text(s, encoding="utf-8", errors="strict"):
|
||||||
if isinstance(s, six.binary_type):
|
if isinstance(s, bytes):
|
||||||
return s.decode(encoding, errors)
|
return s.decode(encoding, errors)
|
||||||
elif isinstance(s, six.text_type):
|
elif isinstance(s, str):
|
||||||
return s
|
return s
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"not expecting type '{type(s)}'")
|
raise TypeError(f"not expecting type '{type(s)}'")
|
||||||
|
|
||||||
return six_ensure_text(text, encoding="utf-8", errors="ignore")
|
return ensure_text(text, encoding="utf-8", errors="ignore")
|
||||||
|
|
||||||
|
|
||||||
class FlaubertTokenizer(XLMTokenizer):
|
class FlaubertTokenizer(XLMTokenizer):
|
||||||
|
|||||||
Reference in New Issue
Block a user