Change the import of kenlm from github to pypi (#19770)
* Change the import of kenlm from github to pypi * Change the import of kenlm from github to pypi in circleci config * Fix code quality issues * Fix isort issue, add kenlm in extras for audio * Add kenlm to deps * Add kenlm to deps * Commit 'make fixup' changes * Remove version from kenlm deps * commit make fixup changes * Remove manual installation of kenlm * Remove manual installation of kenlm * Remove manual installation of kenlm
This commit is contained in:
@@ -127,7 +127,6 @@ torch_and_tf_job = CircleCIJob(
|
|||||||
"pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
|
"pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
|
||||||
TORCH_SCATTER_INSTALL,
|
TORCH_SCATTER_INSTALL,
|
||||||
"pip install tensorflow_probability",
|
"pip install tensorflow_probability",
|
||||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
|
||||||
"pip install git+https://github.com/huggingface/accelerate",
|
"pip install git+https://github.com/huggingface/accelerate",
|
||||||
],
|
],
|
||||||
marker="is_pt_tf_cross_test",
|
marker="is_pt_tf_cross_test",
|
||||||
@@ -143,7 +142,6 @@ torch_and_flax_job = CircleCIJob(
|
|||||||
"pip install --upgrade pip",
|
"pip install --upgrade pip",
|
||||||
"pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
|
"pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
|
||||||
TORCH_SCATTER_INSTALL,
|
TORCH_SCATTER_INSTALL,
|
||||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
|
||||||
"pip install git+https://github.com/huggingface/accelerate",
|
"pip install git+https://github.com/huggingface/accelerate",
|
||||||
],
|
],
|
||||||
marker="is_pt_flax_cross_test",
|
marker="is_pt_flax_cross_test",
|
||||||
@@ -158,7 +156,6 @@ torch_job = CircleCIJob(
|
|||||||
"pip install --upgrade pip",
|
"pip install --upgrade pip",
|
||||||
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
|
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
|
||||||
TORCH_SCATTER_INSTALL,
|
TORCH_SCATTER_INSTALL,
|
||||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
|
||||||
"pip install git+https://github.com/huggingface/accelerate",
|
"pip install git+https://github.com/huggingface/accelerate",
|
||||||
],
|
],
|
||||||
pytest_num_workers=3,
|
pytest_num_workers=3,
|
||||||
@@ -172,7 +169,6 @@ tf_job = CircleCIJob(
|
|||||||
"pip install --upgrade pip",
|
"pip install --upgrade pip",
|
||||||
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
|
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
|
||||||
"pip install tensorflow_probability",
|
"pip install tensorflow_probability",
|
||||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
|
||||||
],
|
],
|
||||||
pytest_options={"rA": None},
|
pytest_options={"rA": None},
|
||||||
)
|
)
|
||||||
@@ -184,7 +180,6 @@ flax_job = CircleCIJob(
|
|||||||
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
|
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
|
||||||
"pip install --upgrade pip",
|
"pip install --upgrade pip",
|
||||||
"pip install .[flax,testing,sentencepiece,flax-speech,vision]",
|
"pip install .[flax,testing,sentencepiece,flax-speech,vision]",
|
||||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
|
||||||
],
|
],
|
||||||
pytest_options={"rA": None},
|
pytest_options={"rA": None},
|
||||||
)
|
)
|
||||||
@@ -197,7 +192,6 @@ pipelines_torch_job = CircleCIJob(
|
|||||||
"pip install --upgrade pip",
|
"pip install --upgrade pip",
|
||||||
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
|
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
|
||||||
TORCH_SCATTER_INSTALL,
|
TORCH_SCATTER_INSTALL,
|
||||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
|
||||||
],
|
],
|
||||||
pytest_options={"rA": None},
|
pytest_options={"rA": None},
|
||||||
tests_to_run="tests/pipelines/"
|
tests_to_run="tests/pipelines/"
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/onnx/tensorflow
|
|||||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html
|
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html
|
||||||
RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://software.intel.com/ipex-whl-stable
|
RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://software.intel.com/ipex-whl-stable
|
||||||
|
|
||||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||||
|
|
||||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y te
|
|||||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
||||||
|
|
||||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python -c "from torch import version; print(version.__version__.split('+')[0])")+cpu.html
|
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python -c "from torch import version; print(version.__version__.split('+')[0])")+cpu.html
|
||||||
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||||
RUN python3 -m pip install --no-cache-dir pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
|
RUN python3 -m pip install --no-cache-dir pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
|
||||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' || VERSI
|
|||||||
RUN python3 -m pip uninstall -y tensorflow flax
|
RUN python3 -m pip uninstall -y tensorflow flax
|
||||||
|
|
||||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu113.html
|
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu113.html
|
||||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||||
|
|
||||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||||
|
|||||||
3
setup.py
3
setup.py
@@ -123,6 +123,7 @@ _deps = [
|
|||||||
"jax>=0.2.8,!=0.3.2,<=0.3.6",
|
"jax>=0.2.8,!=0.3.2,<=0.3.6",
|
||||||
"jaxlib>=0.1.65,<=0.3.6",
|
"jaxlib>=0.1.65,<=0.3.6",
|
||||||
"jieba",
|
"jieba",
|
||||||
|
"kenlm",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy>=1.17",
|
"numpy>=1.17",
|
||||||
"onnxconverter-common",
|
"onnxconverter-common",
|
||||||
@@ -274,7 +275,7 @@ extras["sigopt"] = deps_list("sigopt")
|
|||||||
extras["integrations"] = extras["optuna"] + extras["ray"] + extras["sigopt"]
|
extras["integrations"] = extras["optuna"] + extras["ray"] + extras["sigopt"]
|
||||||
|
|
||||||
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
||||||
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer")
|
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer", "kenlm")
|
||||||
# `pip install ".[speech]"` is deprecated and `pip install ".[torch-speech]"` should be used instead
|
# `pip install ".[speech]"` is deprecated and `pip install ".[torch-speech]"` should be used instead
|
||||||
extras["speech"] = deps_list("torchaudio") + extras["audio"]
|
extras["speech"] = deps_list("torchaudio") + extras["audio"]
|
||||||
extras["torch-speech"] = deps_list("torchaudio") + extras["audio"]
|
extras["torch-speech"] = deps_list("torchaudio") + extras["audio"]
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ deps = {
|
|||||||
"jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
|
"jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
|
||||||
"jaxlib": "jaxlib>=0.1.65,<=0.3.6",
|
"jaxlib": "jaxlib>=0.1.65,<=0.3.6",
|
||||||
"jieba": "jieba",
|
"jieba": "jieba",
|
||||||
|
"kenlm": "kenlm",
|
||||||
"nltk": "nltk",
|
"nltk": "nltk",
|
||||||
"numpy": "numpy>=1.17",
|
"numpy": "numpy>=1.17",
|
||||||
"onnxconverter-common": "onnxconverter-common",
|
"onnxconverter-common": "onnxconverter-common",
|
||||||
|
|||||||
@@ -36,7 +36,14 @@ from ..models.auto.modeling_auto import AutoModelForDepthEstimation
|
|||||||
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||||
from ..tokenization_utils import PreTrainedTokenizer
|
from ..tokenization_utils import PreTrainedTokenizer
|
||||||
from ..tokenization_utils_fast import PreTrainedTokenizerFast
|
from ..tokenization_utils_fast import PreTrainedTokenizerFast
|
||||||
from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT, is_tf_available, is_torch_available, logging
|
from ..utils import (
|
||||||
|
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
|
||||||
|
is_kenlm_available,
|
||||||
|
is_pyctcdecode_available,
|
||||||
|
is_tf_available,
|
||||||
|
is_torch_available,
|
||||||
|
logging,
|
||||||
|
)
|
||||||
from .audio_classification import AudioClassificationPipeline
|
from .audio_classification import AudioClassificationPipeline
|
||||||
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
|
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
|
||||||
from .base import (
|
from .base import (
|
||||||
@@ -837,11 +844,12 @@ def pipeline(
|
|||||||
|
|
||||||
kwargs["decoder"] = decoder
|
kwargs["decoder"] = decoder
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
logger.warning(
|
logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
|
||||||
f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Try to install"
|
if not is_kenlm_available():
|
||||||
" `pyctcdecode` and `kenlm`: (`pip install pyctcdecode`, `pip install"
|
logger.warning("Try to install `kenlm`: `pip install kenlm")
|
||||||
f" https://github.com/kpu/kenlm/archive/master.zip`): Error: {e}"
|
|
||||||
)
|
if not is_pyctcdecode_available():
|
||||||
|
logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
|
||||||
|
|
||||||
if task == "translation" and model.config.task_specific_params:
|
if task == "translation" and model.config.task_specific_params:
|
||||||
for key in model.config.task_specific_params:
|
for key in model.config.task_specific_params:
|
||||||
|
|||||||
@@ -108,6 +108,7 @@ from .import_utils import (
|
|||||||
is_in_notebook,
|
is_in_notebook,
|
||||||
is_ipex_available,
|
is_ipex_available,
|
||||||
is_jumanpp_available,
|
is_jumanpp_available,
|
||||||
|
is_kenlm_available,
|
||||||
is_librosa_available,
|
is_librosa_available,
|
||||||
is_more_itertools_available,
|
is_more_itertools_available,
|
||||||
is_ninja_available,
|
is_ninja_available,
|
||||||
|
|||||||
@@ -271,6 +271,10 @@ TORCH_FX_REQUIRED_VERSION = version.parse("1.10")
|
|||||||
TORCH_ONNX_DICT_INPUTS_MINIMUM_VERSION = version.parse("1.8")
|
TORCH_ONNX_DICT_INPUTS_MINIMUM_VERSION = version.parse("1.8")
|
||||||
|
|
||||||
|
|
||||||
|
def is_kenlm_available():
|
||||||
|
return importlib.util.find_spec("kenlm") is not None
|
||||||
|
|
||||||
|
|
||||||
def is_torch_available():
|
def is_torch_available():
|
||||||
return _torch_available
|
return _torch_available
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user