Fix natten (#22229)
* Add kernel size to NATTEN's QK arguments. The new NATTEN 0.14.5 supports PyTorch 2.0, but also adds an additional argument to the QK operation to allow optional RPBs. This ends up failing NATTEN tests. This commit adds NATTEN back to circleci and adds the arguments to get it working again. * Force NATTEN >= 0.14.5
This commit is contained in:
@@ -374,8 +374,7 @@ exotic_models_job = CircleCIJob(
|
|||||||
"pip install 'git+https://github.com/facebookresearch/detectron2.git'",
|
"pip install 'git+https://github.com/facebookresearch/detectron2.git'",
|
||||||
"sudo apt install tesseract-ocr",
|
"sudo apt install tesseract-ocr",
|
||||||
"pip install pytesseract",
|
"pip install pytesseract",
|
||||||
# wait until natten is ready for torch 2.0.0
|
"pip install natten",
|
||||||
# "pip install natten",
|
|
||||||
],
|
],
|
||||||
tests_to_run=[
|
tests_to_run=[
|
||||||
"tests/models/*layoutlmv*",
|
"tests/models/*layoutlmv*",
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -129,7 +129,7 @@ _deps = [
|
|||||||
"keras-nlp>=0.3.1",
|
"keras-nlp>=0.3.1",
|
||||||
"librosa",
|
"librosa",
|
||||||
"nltk",
|
"nltk",
|
||||||
"natten>=0.14.4",
|
"natten>=0.14.5",
|
||||||
"numpy>=1.17",
|
"numpy>=1.17",
|
||||||
"onnxconverter-common",
|
"onnxconverter-common",
|
||||||
"onnxruntime-tools>=1.4.2",
|
"onnxruntime-tools>=1.4.2",
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ deps = {
|
|||||||
"keras-nlp": "keras-nlp>=0.3.1",
|
"keras-nlp": "keras-nlp>=0.3.1",
|
||||||
"librosa": "librosa",
|
"librosa": "librosa",
|
||||||
"nltk": "nltk",
|
"nltk": "nltk",
|
||||||
"natten": "natten>=0.14.4",
|
"natten": "natten>=0.14.5",
|
||||||
"numpy": "numpy>=1.17",
|
"numpy": "numpy>=1.17",
|
||||||
"onnxconverter-common": "onnxconverter-common",
|
"onnxconverter-common": "onnxconverter-common",
|
||||||
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",
|
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",
|
||||||
|
|||||||
@@ -347,7 +347,7 @@ class NeighborhoodAttention(nn.Module):
|
|||||||
query_layer = query_layer / math.sqrt(self.attention_head_size)
|
query_layer = query_layer / math.sqrt(self.attention_head_size)
|
||||||
|
|
||||||
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
|
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
|
||||||
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.dilation)
|
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, self.dilation)
|
||||||
|
|
||||||
# Normalize the attention scores to probabilities.
|
# Normalize the attention scores to probabilities.
|
||||||
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
||||||
|
|||||||
@@ -339,7 +339,7 @@ class NeighborhoodAttention(nn.Module):
|
|||||||
query_layer = query_layer / math.sqrt(self.attention_head_size)
|
query_layer = query_layer / math.sqrt(self.attention_head_size)
|
||||||
|
|
||||||
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
|
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
|
||||||
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, 1)
|
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, 1)
|
||||||
|
|
||||||
# Normalize the attention scores to probabilities.
|
# Normalize the attention scores to probabilities.
|
||||||
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
||||||
|
|||||||
Reference in New Issue
Block a user