[Wav2Vec2] PyCTCDecode Integration to support language model boosted decoding (#14339)

* up

* up

* up

* make it cleaner

* correct

* make styhahalal

* add more tests

* finish

* small fix

* make style

* up

* tryout to solve cicrle ci

* up

* fix more tests

* fix more tests

* apply sylvains suggestions

* fix import

* correct docs

* add pyctcdecode only to speech tests

* fix more tests

* add tf, flax and pt tests

* add pt

* fix last tests

* fix more tests

* Apply suggestions from code review

* change lines

* Apply suggestions from code review

Co-authored-by: Anton Lozhkov <aglozhkov@gmail.com>

* correct tests

* correct tests

* add doc string

Co-authored-by: Anton Lozhkov <aglozhkov@gmail.com>
This commit is contained in:
Patrick von Platen
2021-12-08 12:07:54 +01:00
committed by GitHub
parent 2e12d90b9e
commit 961732c276
16 changed files with 831 additions and 19 deletions

View File

@@ -83,6 +83,7 @@ jobs:
- run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install tensorflow_probability
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-{{ checksum "setup.py" }}
paths:
@@ -151,6 +152,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-{{ checksum "setup.py" }}
paths:
@@ -187,6 +189,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-{{ checksum "setup.py" }}
paths:
@@ -217,6 +220,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-torch-{{ checksum "setup.py" }}
paths:
@@ -252,6 +256,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-torch-{{ checksum "setup.py" }}
paths:
@@ -278,9 +283,11 @@ jobs:
keys:
- v0.4-tf-{{ checksum "setup.py" }}
- v0.4-{{ checksum "setup.py" }}
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
- run: pip install --upgrade pip
- run: pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]
- run: pip install tensorflow_probability
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-tf-{{ checksum "setup.py" }}
paths:
@@ -312,9 +319,11 @@ jobs:
keys:
- v0.4-tf-{{ checksum "setup.py" }}
- v0.4-{{ checksum "setup.py" }}
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
- run: pip install --upgrade pip
- run: pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]
- run: pip install tensorflow_probability
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-tf-{{ checksum "setup.py" }}
paths:
@@ -341,8 +350,10 @@ jobs:
keys:
- v0.4-flax-{{ checksum "setup.py" }}
- v0.4-{{ checksum "setup.py" }}
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
- run: pip install --upgrade pip
- run: sudo pip install .[flax,testing,sentencepiece,flax-speech,vision]
- run: pip install .[flax,testing,sentencepiece,flax-speech,vision]
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-flax-{{ checksum "setup.py" }}
paths:
@@ -374,8 +385,10 @@ jobs:
keys:
- v0.4-flax-{{ checksum "setup.py" }}
- v0.4-{{ checksum "setup.py" }}
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
- run: pip install --upgrade pip
- run: sudo pip install .[flax,testing,sentencepiece,vision,flax-speech]
- run: pip install .[flax,testing,sentencepiece,vision,flax-speech]
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-flax-{{ checksum "setup.py" }}
paths:
@@ -407,6 +420,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-torch-{{ checksum "setup.py" }}
paths:
@@ -443,6 +457,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]
- run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
- run: pip install https://github.com/kpu/kenlm/archive/master.zip
- save_cache:
key: v0.4-torch-{{ checksum "setup.py" }}
paths:
@@ -582,7 +597,7 @@ jobs:
path: ~/transformers/examples_output.txt
- store_artifacts:
path: ~/transformers/reports
run_examples_torch_all:
working_directory: ~/transformers
docker: