Improve BERT-like models performance with better self attention (#9124)

* Improve BERT-like models attention layers * Apply style * Put back error raising instead of assert * Update template * Fix copies * Apply raising valueerror in MPNet * Restore the copy check for the Intermediate layer in Longformer * Update longformer
2020-12-21 13:10:15 +01:00
parent 6b034309ca
commit 5a8a4eb187
8 changed files with 348 additions and 271 deletions
--- a/src/transformers/dependency_versions_table.py
+++ b/src/transformers/dependency_versions_table.py
@@ -40,8 +40,8 @@ deps = {
    "sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3",
    "sphinx": "sphinx==3.2.1",
    "starlette": "starlette",
-    "tensorflow-cpu": "tensorflow-cpu>=2.0",
-    "tensorflow": "tensorflow>=2.0",
+    "tensorflow-cpu": "tensorflow-cpu>=2.3",
+    "tensorflow": "tensorflow>=2.3",
    "timeout-decorator": "timeout-decorator",
    "tokenizers": "tokenizers==0.9.4",
    "torch": "torch>=1.0",