Change the import of kenlm from github to pypi (#19770)
* Change the import of kenlm from github to pypi * Change the import of kenlm from github to pypi in circleci config * Fix code quality issues * Fix isort issue, add kenlm in extras for audio * Add kenlm to deps * Add kenlm to deps * Commit 'make fixup' changes * Remove version from kenlm deps * commit make fixup changes * Remove manual installation of kenlm * Remove manual installation of kenlm * Remove manual installation of kenlm
This commit is contained in:
@@ -127,7 +127,6 @@ torch_and_tf_job = CircleCIJob(
|
||||
"pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
|
||||
TORCH_SCATTER_INSTALL,
|
||||
"pip install tensorflow_probability",
|
||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
||||
"pip install git+https://github.com/huggingface/accelerate",
|
||||
],
|
||||
marker="is_pt_tf_cross_test",
|
||||
@@ -143,7 +142,6 @@ torch_and_flax_job = CircleCIJob(
|
||||
"pip install --upgrade pip",
|
||||
"pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
|
||||
TORCH_SCATTER_INSTALL,
|
||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
||||
"pip install git+https://github.com/huggingface/accelerate",
|
||||
],
|
||||
marker="is_pt_flax_cross_test",
|
||||
@@ -158,7 +156,6 @@ torch_job = CircleCIJob(
|
||||
"pip install --upgrade pip",
|
||||
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
|
||||
TORCH_SCATTER_INSTALL,
|
||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
||||
"pip install git+https://github.com/huggingface/accelerate",
|
||||
],
|
||||
pytest_num_workers=3,
|
||||
@@ -172,7 +169,6 @@ tf_job = CircleCIJob(
|
||||
"pip install --upgrade pip",
|
||||
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
|
||||
"pip install tensorflow_probability",
|
||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
||||
],
|
||||
pytest_options={"rA": None},
|
||||
)
|
||||
@@ -184,7 +180,6 @@ flax_job = CircleCIJob(
|
||||
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
|
||||
"pip install --upgrade pip",
|
||||
"pip install .[flax,testing,sentencepiece,flax-speech,vision]",
|
||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
||||
],
|
||||
pytest_options={"rA": None},
|
||||
)
|
||||
@@ -197,7 +192,6 @@ pipelines_torch_job = CircleCIJob(
|
||||
"pip install --upgrade pip",
|
||||
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
|
||||
TORCH_SCATTER_INSTALL,
|
||||
"pip install https://github.com/kpu/kenlm/archive/master.zip",
|
||||
],
|
||||
pytest_options={"rA": None},
|
||||
tests_to_run="tests/pipelines/"
|
||||
|
||||
@@ -46,7 +46,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/onnx/tensorflow
|
||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html
|
||||
RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://software.intel.com/ipex-whl-stable
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
@@ -11,7 +11,7 @@ RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y te
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python -c "from torch import version; print(version.__version__.split('+')[0])")+cpu.html
|
||||
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
||||
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install --no-cache-dir pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' || VERSI
|
||||
RUN python3 -m pip uninstall -y tensorflow flax
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu113.html
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
|
||||
3
setup.py
3
setup.py
@@ -123,6 +123,7 @@ _deps = [
|
||||
"jax>=0.2.8,!=0.3.2,<=0.3.6",
|
||||
"jaxlib>=0.1.65,<=0.3.6",
|
||||
"jieba",
|
||||
"kenlm",
|
||||
"nltk",
|
||||
"numpy>=1.17",
|
||||
"onnxconverter-common",
|
||||
@@ -274,7 +275,7 @@ extras["sigopt"] = deps_list("sigopt")
|
||||
extras["integrations"] = extras["optuna"] + extras["ray"] + extras["sigopt"]
|
||||
|
||||
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
||||
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer")
|
||||
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer", "kenlm")
|
||||
# `pip install ".[speech]"` is deprecated and `pip install ".[torch-speech]"` should be used instead
|
||||
extras["speech"] = deps_list("torchaudio") + extras["audio"]
|
||||
extras["torch-speech"] = deps_list("torchaudio") + extras["audio"]
|
||||
|
||||
@@ -29,6 +29,7 @@ deps = {
|
||||
"jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
|
||||
"jaxlib": "jaxlib>=0.1.65,<=0.3.6",
|
||||
"jieba": "jieba",
|
||||
"kenlm": "kenlm",
|
||||
"nltk": "nltk",
|
||||
"numpy": "numpy>=1.17",
|
||||
"onnxconverter-common": "onnxconverter-common",
|
||||
|
||||
@@ -36,7 +36,14 @@ from ..models.auto.modeling_auto import AutoModelForDepthEstimation
|
||||
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||
from ..tokenization_utils import PreTrainedTokenizer
|
||||
from ..tokenization_utils_fast import PreTrainedTokenizerFast
|
||||
from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT, is_tf_available, is_torch_available, logging
|
||||
from ..utils import (
|
||||
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
|
||||
is_kenlm_available,
|
||||
is_pyctcdecode_available,
|
||||
is_tf_available,
|
||||
is_torch_available,
|
||||
logging,
|
||||
)
|
||||
from .audio_classification import AudioClassificationPipeline
|
||||
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
|
||||
from .base import (
|
||||
@@ -837,11 +844,12 @@ def pipeline(
|
||||
|
||||
kwargs["decoder"] = decoder
|
||||
except ImportError as e:
|
||||
logger.warning(
|
||||
f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Try to install"
|
||||
" `pyctcdecode` and `kenlm`: (`pip install pyctcdecode`, `pip install"
|
||||
f" https://github.com/kpu/kenlm/archive/master.zip`): Error: {e}"
|
||||
)
|
||||
logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
|
||||
if not is_kenlm_available():
|
||||
logger.warning("Try to install `kenlm`: `pip install kenlm")
|
||||
|
||||
if not is_pyctcdecode_available():
|
||||
logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
|
||||
|
||||
if task == "translation" and model.config.task_specific_params:
|
||||
for key in model.config.task_specific_params:
|
||||
|
||||
@@ -108,6 +108,7 @@ from .import_utils import (
|
||||
is_in_notebook,
|
||||
is_ipex_available,
|
||||
is_jumanpp_available,
|
||||
is_kenlm_available,
|
||||
is_librosa_available,
|
||||
is_more_itertools_available,
|
||||
is_ninja_available,
|
||||
|
||||
@@ -271,6 +271,10 @@ TORCH_FX_REQUIRED_VERSION = version.parse("1.10")
|
||||
TORCH_ONNX_DICT_INPUTS_MINIMUM_VERSION = version.parse("1.8")
|
||||
|
||||
|
||||
def is_kenlm_available():
|
||||
return importlib.util.find_spec("kenlm") is not None
|
||||
|
||||
|
||||
def is_torch_available():
|
||||
return _torch_available
|
||||
|
||||
|
||||
Reference in New Issue
Block a user