From 5f8f2d849a450dadb4e7889ef5b847f88c60d9cb Mon Sep 17 00:00:00 2001 From: Victor SANH Date: Wed, 27 May 2020 18:25:17 -0400 Subject: [PATCH] add floppy bert model notebok --- .../movement-pruning/Saving_PruneBERT.ipynb | 1866 +++++++++++++++++ 1 file changed, 1866 insertions(+) create mode 100644 examples/movement-pruning/Saving_PruneBERT.ipynb diff --git a/examples/movement-pruning/Saving_PruneBERT.ipynb b/examples/movement-pruning/Saving_PruneBERT.ipynb new file mode 100644 index 0000000000..f0c68433f8 --- /dev/null +++ b/examples/movement-pruning/Saving_PruneBERT.ipynb @@ -0,0 +1,1866 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Saving PruneBERT\n", + "\n", + "\n", + "This notebook aims at showcasing how we can leverage standard tools to save (and load) an extremely sparse model fine-pruned with [movement pruning](https://arxiv.org/abs/2005.07683) (or any other unstructured pruning mehtod).\n", + "\n", + "In this example, we used BERT (base-uncased, but the procedure described here is not specific to BERT and can be applied to a large variety of models.\n", + "\n", + "We first obtain an extremely sparse model by fine-pruning with movement pruning on SQuAD v1.1. We then used the following combination of standard tools:\n", + "- We reduce the precision of the model with Int8 dynamic quantization using [PyTorch implementation](https://pytorch.org/tutorials/intermediate/dynamic_quantization_bert_tutorial.html). We only quantized the Fully Connected Layers.\n", + "- Sparse quantized matrices are converted into the [Compressed Sparse Row format](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html).\n", + "- We use HDF5 with `gzip` compression to store the weights.\n", + "\n", + "We experiment with a question answering model with only 6% of total remaining weights in the encoder (previously obtained with movement pruning). **We are able to reduce the memory size of the encoder from 340MB (original dense BERT) to 11MB**, which fits on a [91' floppy disk](https://en.wikipedia.org/wiki/Floptical)!\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Includes\n", + "\n", + "import h5py\n", + "import os\n", + "import json\n", + "from collections import OrderedDict\n", + "\n", + "from scipy import sparse\n", + "import numpy as np\n", + "\n", + "import torch\n", + "from torch import nn\n", + "\n", + "from transformers import *\n", + "\n", + "os.chdir('../../')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dynamic quantization induces little or no loss of performance while significantly reducing the memory footprint." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BertForQuestionAnswering(\n", + " (bert): BertModel(\n", + " (embeddings): BertEmbeddings(\n", + " (word_embeddings): Embedding(30522, 768, padding_idx=0)\n", + " (position_embeddings): Embedding(512, 768)\n", + " (token_type_embeddings): Embedding(2, 768)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (encoder): BertEncoder(\n", + " (layer): ModuleList(\n", + " (0): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (key): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (value): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=3072\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " )\n", + " (output): BertOutput(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=3072, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (pooler): BertPooler(\n", + " (dense): DynamicQuantizedLinear(\n", + " in_features=768, out_features=768\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + " (activation): Tanh()\n", + " )\n", + " )\n", + " (qa_outputs): DynamicQuantizedLinear(\n", + " in_features=768, out_features=2\n", + " (_packed_params): LinearPackedParams()\n", + " )\n", + ")\n" + ] + } + ], + "source": [ + "# Load fine-pruned model and quantize the model\n", + "\n", + "model_path = \"serialization_dir/bert-base-uncased/92/squad/l1\"\n", + "model_name = \"bertarized_l1_with_distil_0._0.1_1_2_l1_1100._3e-5_1e-2_sigmoied_threshold_constant_0._10_epochs\"\n", + "\n", + "model = BertForQuestionAnswering.from_pretrained(os.path.join(model_path, model_name))\n", + "model.to('cpu')\n", + "\n", + "quantized_model = torch.quantization.quantize_dynamic(\n", + " model=model,\n", + " qconfig_spec = {\n", + " torch.nn.Linear : torch.quantization.default_dynamic_qconfig,\n", + " },\n", + " dtype=torch.qint8,\n", + " )\n", + "print(quantized_model)\n", + "\n", + "qtz_st = quantized_model.state_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Saving the original (encoder + classifier) in the standard torch.save format\n", + "\n", + "dense_st = {name: param for name, param in model.state_dict().items() \n", + " if \"embedding\" not in name and \"pooler\" not in name}\n", + "torch.save(dense_st, 'dbg/dense_squad.pt',)\n", + "dense_mb_size = os.path.getsize(\"dbg/dense_squad.pt\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decompose quantization for bert.encoder.layer.0.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.0.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.0.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.0.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.0.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.0.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.1.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.1.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.1.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.1.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.1.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.1.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.2.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.2.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.2.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.2.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.2.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.2.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.3.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.3.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.3.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.3.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.3.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.3.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.4.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.4.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.4.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.4.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.4.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.4.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.5.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.5.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.5.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.5.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.5.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.5.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.6.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.6.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.6.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.6.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.6.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.6.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.7.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.7.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.7.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.7.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.7.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.7.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.8.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.8.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.8.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.8.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.8.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.8.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.9.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.9.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.9.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.9.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.9.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.9.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.10.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.10.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.10.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.10.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.10.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.10.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.11.attention.self.query._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.11.attention.self.key._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.11.attention.self.value._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.11.attention.output.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.11.intermediate.dense._packed_params.weight\n", + "Decompose quantization for bert.encoder.layer.11.output.dense._packed_params.weight\n", + "Decompose quantization for bert.pooler.dense._packed_params.weight\n", + "Decompose quantization for qa_outputs._packed_params.weight\n" + ] + } + ], + "source": [ + "# Elementary representation: we decompose the quantized tensors into (scale, zero_point, int_repr).\n", + "# See https://pytorch.org/docs/stable/quantization.html\n", + "\n", + "# We further leverage the fact that int_repr is sparse matrix to optimize the storage: we decompose int_repr into\n", + "# its CSR representation (data, indptr, indices).\n", + "\n", + "elementary_qtz_st = {}\n", + "for name, param in qtz_st.items():\n", + " if param.is_quantized:\n", + " print(\"Decompose quantization for\", name)\n", + " # We need to extract the scale, the zero_point and the int_repr for the quantized tensor and modules\n", + " scale = param.q_scale() # torch.tensor(1,) - float32\n", + " zero_point = param.q_zero_point() # torch.tensor(1,) - int32\n", + " elementary_qtz_st[f\"{name}.scale\"] = scale\n", + " elementary_qtz_st[f\"{name}.zero_point\"] = zero_point\n", + "\n", + " # We assume the int_repr is sparse and compute its CSR representation\n", + " # Only the FCs in the encoder are actually sparse\n", + " int_repr = param.int_repr() # torch.tensor(nb_rows, nb_columns) - int8\n", + " int_repr_cs = sparse.csr_matrix(int_repr) # scipy.sparse.csr.csr_matrix\n", + "\n", + " elementary_qtz_st[f\"{name}.int_repr.data\"] = int_repr_cs.data # np.array int8\n", + " elementary_qtz_st[f\"{name}.int_repr.indptr\"] = int_repr_cs.indptr # np.array int32\n", + " assert max(int_repr_cs.indices) < 65535 # If not, we shall fall back to int32\n", + " elementary_qtz_st[f\"{name}.int_repr.indices\"] = np.uint16(int_repr_cs.indices) # np.array uint16\n", + " elementary_qtz_st[f\"{name}.int_repr.shape\"] = int_repr_cs.shape # tuple(int, int)\n", + " else:\n", + " elementary_qtz_st[name] = param\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoder Size (MB) - Sparse & Quantized - `torch.save`: 21.29\n" + ] + } + ], + "source": [ + "# Saving the pruned (encoder + classifier) in the standard torch.save format\n", + "\n", + "dense_optimized_st = {name: param for name, param in elementary_qtz_st.items() \n", + " if \"embedding\" not in name and \"pooler\" not in name}\n", + "torch.save(dense_optimized_st, 'dbg/dense_squad_optimized.pt',)\n", + "print(\"Encoder Size (MB) - Sparse & Quantized - `torch.save`:\",\n", + " round(os.path.getsize(\"dbg/dense_squad_optimized.pt\")/1e6, 2))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Skip bert.embeddings.word_embeddings.weight\n", + "Skip bert.embeddings.position_embeddings.weight\n", + "Skip bert.embeddings.token_type_embeddings.weight\n", + "Skip bert.embeddings.LayerNorm.weight\n", + "Skip bert.embeddings.LayerNorm.bias\n", + "Skip bert.pooler.dense.scale\n", + "Skip bert.pooler.dense.zero_point\n", + "Skip bert.pooler.dense._packed_params.weight.scale\n", + "Skip bert.pooler.dense._packed_params.weight.zero_point\n", + "Skip bert.pooler.dense._packed_params.weight.int_repr.data\n", + "Skip bert.pooler.dense._packed_params.weight.int_repr.indptr\n", + "Skip bert.pooler.dense._packed_params.weight.int_repr.indices\n", + "Skip bert.pooler.dense._packed_params.weight.int_repr.shape\n", + "Skip bert.pooler.dense._packed_params.bias\n", + "\n", + "Encoder Size (MB) - Dense: 340.25\n", + "Encoder Size (MB) - Sparse & Quantized: 11.27\n" + ] + } + ], + "source": [ + "# Save the decomposed state_dict with an HDF5 file\n", + "# Saving only the encoder + QA Head\n", + "\n", + "with h5py.File('dbg/squad_sparse.h5','w') as hf:\n", + " for name, param in elementary_qtz_st.items():\n", + " if \"embedding\" in name:\n", + " print(f\"Skip {name}\")\n", + " continue\n", + "\n", + " if \"pooler\" in name:\n", + " print(f\"Skip {name}\")\n", + " continue\n", + "\n", + " if type(param) == torch.Tensor:\n", + " if param.numel() == 1:\n", + " # module scale\n", + " # module zero_point\n", + " hf.attrs[name] = param\n", + " continue\n", + "\n", + " if param.requires_grad:\n", + " # LayerNorm\n", + " param = param.detach().numpy()\n", + " hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n", + "\n", + " elif type(param) == float or type(param) == int or type(param) == tuple:\n", + " # float - tensor _packed_params.weight.scale\n", + " # int - tensor_packed_params.weight.zero_point\n", + " # tuple - tensor _packed_params.weight.shape\n", + " hf.attrs[name] = param\n", + "\n", + " else:\n", + " hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n", + "\n", + "\n", + "with open('dbg/metadata.json', 'w') as f:\n", + " f.write(json.dumps(qtz_st._metadata)) \n", + "\n", + "size = os.path.getsize(\"dbg/squad_sparse.h5\") + os.path.getsize(\"dbg/metadata.json\")\n", + "print(\"\")\n", + "print(\"Encoder Size (MB) - Dense: \", round(dense_mb_size/1e6, 2))\n", + "print(\"Encoder Size (MB) - Sparse & Quantized:\", round(size/1e6, 2))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Size (MB): 99.39\n" + ] + } + ], + "source": [ + "# Save the decomposed state_dict to HDF5 storage\n", + "# Save everything in the architecutre (embedding + encoder + QA Head)\n", + "\n", + "with h5py.File('dbg/squad_sparse_with_embs.h5','w') as hf:\n", + " for name, param in elementary_qtz_st.items():\n", + "# if \"embedding\" in name:\n", + "# print(f\"Skip {name}\")\n", + "# continue\n", + "\n", + "# if \"pooler\" in name:\n", + "# print(f\"Skip {name}\")\n", + "# continue\n", + "\n", + " if type(param) == torch.Tensor:\n", + " if param.numel() == 1:\n", + " # module scale\n", + " # module zero_point\n", + " hf.attrs[name] = param\n", + " continue\n", + "\n", + " if param.requires_grad:\n", + " # LayerNorm\n", + " param = param.detach().numpy()\n", + " hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n", + "\n", + " elif type(param) == float or type(param) == int or type(param) == tuple:\n", + " # float - tensor _packed_params.weight.scale\n", + " # int - tensor _packed_params.weight.zero_point\n", + " # tuple - tensor _packed_params.weight.shape\n", + " hf.attrs[name] = param\n", + "\n", + " else:\n", + " hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n", + "\n", + "\n", + "with open('dbg/metadata.json', 'w') as f:\n", + " f.write(json.dumps(qtz_st._metadata)) \n", + "\n", + "size = os.path.getsize(\"dbg/squad_sparse_with_embs.h5\") + os.path.getsize(\"dbg/metadata.json\")\n", + "print('\\nSize (MB):', round(size/1e6, 2))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.0.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.0.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.0.attention.self.key.scale\n", + "Unpack bert.encoder.layer.0.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.0.attention.self.query.scale\n", + "Unpack bert.encoder.layer.0.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.0.attention.self.value.scale\n", + "Unpack bert.encoder.layer.0.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.0.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.0.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.0.output.dense.scale\n", + "Unpack bert.encoder.layer.0.output.dense.zero_point\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.1.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.1.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.1.attention.self.key.scale\n", + "Unpack bert.encoder.layer.1.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.1.attention.self.query.scale\n", + "Unpack bert.encoder.layer.1.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.1.attention.self.value.scale\n", + "Unpack bert.encoder.layer.1.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.1.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.1.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.1.output.dense.scale\n", + "Unpack bert.encoder.layer.1.output.dense.zero_point\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.10.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.10.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.10.attention.self.key.scale\n", + "Unpack bert.encoder.layer.10.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.10.attention.self.query.scale\n", + "Unpack bert.encoder.layer.10.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.10.attention.self.value.scale\n", + "Unpack bert.encoder.layer.10.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.10.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.10.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.10.output.dense.scale\n", + "Unpack bert.encoder.layer.10.output.dense.zero_point\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.11.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.11.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.11.attention.self.key.scale\n", + "Unpack bert.encoder.layer.11.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.11.attention.self.query.scale\n", + "Unpack bert.encoder.layer.11.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.11.attention.self.value.scale\n", + "Unpack bert.encoder.layer.11.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.11.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.11.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.11.output.dense.scale\n", + "Unpack bert.encoder.layer.11.output.dense.zero_point\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.2.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.2.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.2.attention.self.key.scale\n", + "Unpack bert.encoder.layer.2.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.2.attention.self.query.scale\n", + "Unpack bert.encoder.layer.2.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.2.attention.self.value.scale\n", + "Unpack bert.encoder.layer.2.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.2.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.2.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.2.output.dense.scale\n", + "Unpack bert.encoder.layer.2.output.dense.zero_point\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.3.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.3.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.3.attention.self.key.scale\n", + "Unpack bert.encoder.layer.3.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.3.attention.self.query.scale\n", + "Unpack bert.encoder.layer.3.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.3.attention.self.value.scale\n", + "Unpack bert.encoder.layer.3.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.3.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.3.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.3.output.dense.scale\n", + "Unpack bert.encoder.layer.3.output.dense.zero_point\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.4.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.4.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.4.attention.self.key.scale\n", + "Unpack bert.encoder.layer.4.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.4.attention.self.query.scale\n", + "Unpack bert.encoder.layer.4.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.4.attention.self.value.scale\n", + "Unpack bert.encoder.layer.4.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.4.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.4.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.4.output.dense.scale\n", + "Unpack bert.encoder.layer.4.output.dense.zero_point\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.5.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.5.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.5.attention.self.key.scale\n", + "Unpack bert.encoder.layer.5.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.5.attention.self.query.scale\n", + "Unpack bert.encoder.layer.5.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.5.attention.self.value.scale\n", + "Unpack bert.encoder.layer.5.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.5.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.5.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.5.output.dense.scale\n", + "Unpack bert.encoder.layer.5.output.dense.zero_point\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.6.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.6.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.6.attention.self.key.scale\n", + "Unpack bert.encoder.layer.6.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.6.attention.self.query.scale\n", + "Unpack bert.encoder.layer.6.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.6.attention.self.value.scale\n", + "Unpack bert.encoder.layer.6.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.6.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.6.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.6.output.dense.scale\n", + "Unpack bert.encoder.layer.6.output.dense.zero_point\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.7.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.7.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.7.attention.self.key.scale\n", + "Unpack bert.encoder.layer.7.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.7.attention.self.query.scale\n", + "Unpack bert.encoder.layer.7.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.7.attention.self.value.scale\n", + "Unpack bert.encoder.layer.7.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.7.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.7.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.7.output.dense.scale\n", + "Unpack bert.encoder.layer.7.output.dense.zero_point\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.8.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.8.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.8.attention.self.key.scale\n", + "Unpack bert.encoder.layer.8.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.8.attention.self.query.scale\n", + "Unpack bert.encoder.layer.8.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.8.attention.self.value.scale\n", + "Unpack bert.encoder.layer.8.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.8.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.8.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.8.output.dense.scale\n", + "Unpack bert.encoder.layer.8.output.dense.zero_point\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.9.attention.output.dense.scale\n", + "Unpack bert.encoder.layer.9.attention.output.dense.zero_point\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.9.attention.self.key.scale\n", + "Unpack bert.encoder.layer.9.attention.self.key.zero_point\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.9.attention.self.query.scale\n", + "Unpack bert.encoder.layer.9.attention.self.query.zero_point\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.9.attention.self.value.scale\n", + "Unpack bert.encoder.layer.9.attention.self.value.zero_point\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.9.intermediate.dense.scale\n", + "Unpack bert.encoder.layer.9.intermediate.dense.zero_point\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.weight.scale\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.weight.zero_point\n", + "Unpack bert.encoder.layer.9.output.dense.scale\n", + "Unpack bert.encoder.layer.9.output.dense.zero_point\n", + "Unpack bert.pooler.dense._packed_params.weight.int_repr.shape\n", + "Unpack bert.pooler.dense._packed_params.weight.scale\n", + "Unpack bert.pooler.dense._packed_params.weight.zero_point\n", + "Unpack bert.pooler.dense.scale\n", + "Unpack bert.pooler.dense.zero_point\n", + "Unpack qa_outputs._packed_params.weight.int_repr.shape\n", + "Unpack qa_outputs._packed_params.weight.scale\n", + "Unpack qa_outputs._packed_params.weight.zero_point\n", + "Unpack qa_outputs.scale\n", + "Unpack qa_outputs.zero_point\n", + "Unpack bert.embeddings.LayerNorm.bias\n", + "Unpack bert.embeddings.LayerNorm.weight\n", + "Unpack bert.embeddings.position_embeddings.weight\n", + "Unpack bert.embeddings.token_type_embeddings.weight\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unpack bert.embeddings.word_embeddings.weight\n", + "Unpack bert.encoder.layer.0.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.0.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.0.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.0.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.0.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.0.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.0.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.0.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.0.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.0.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.1.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.1.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.1.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.1.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.1.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.1.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.1.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.1.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.1.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.1.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.10.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.10.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.10.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.10.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.10.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.10.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.10.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.10.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.10.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.10.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.11.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.11.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.11.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.11.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.11.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.11.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.11.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.11.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.11.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.11.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.2.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.2.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.2.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.2.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.2.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.2.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.2.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.2.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.2.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.2.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.3.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.3.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.3.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.3.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.3.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.3.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.3.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.3.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.3.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.3.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.4.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.4.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.4.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.4.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.4.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.4.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.4.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.4.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.4.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.4.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.5.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.5.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.5.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.5.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.5.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.bias\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.5.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.5.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.5.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.5.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.5.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.6.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.6.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.6.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.6.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.6.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.6.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.6.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.6.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.6.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.6.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.7.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.7.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.7.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.7.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.7.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.7.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.7.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.7.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.7.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.7.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.8.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.8.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.8.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.8.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.8.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.8.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.8.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.8.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.8.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.8.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.9.attention.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.9.attention.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.9.attention.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.bias\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.9.attention.self.key._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.bias\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.9.attention.self.query._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.bias\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.9.attention.self.value._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.9.intermediate.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.encoder.layer.9.output.LayerNorm.bias\n", + "Unpack bert.encoder.layer.9.output.LayerNorm.weight\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.bias\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.encoder.layer.9.output.dense._packed_params.weight.int_repr.indptr\n", + "Unpack bert.pooler.dense._packed_params.bias\n", + "Unpack bert.pooler.dense._packed_params.weight.int_repr.data\n", + "Unpack bert.pooler.dense._packed_params.weight.int_repr.indices\n", + "Unpack bert.pooler.dense._packed_params.weight.int_repr.indptr\n", + "Unpack qa_outputs._packed_params.bias\n", + "Unpack qa_outputs._packed_params.weight.int_repr.data\n", + "Unpack qa_outputs._packed_params.weight.int_repr.indices\n", + "Unpack qa_outputs._packed_params.weight.int_repr.indptr\n" + ] + } + ], + "source": [ + "# Reconstruct the elementary state dict\n", + "\n", + "reconstructed_elementary_qtz_st = {}\n", + "\n", + "hf = h5py.File('dbg/squad_sparse_with_embs.h5','r')\n", + "\n", + "for attr_name, attr_param in hf.attrs.items():\n", + " if 'shape' in attr_name:\n", + " attr_param = tuple(attr_param)\n", + " elif \".scale\" in attr_name:\n", + " if \"_packed_params\" in attr_name:\n", + " attr_param = float(attr_param)\n", + " else:\n", + " attr_param = torch.tensor(attr_param)\n", + " elif \".zero_point\" in attr_name:\n", + " if \"_packed_params\" in attr_name:\n", + " attr_param = int(attr_param)\n", + " else:\n", + " attr_param = torch.tensor(attr_param)\n", + " reconstructed_elementary_qtz_st[attr_name] = attr_param\n", + " print(f\"Unpack {attr_name}\")\n", + " \n", + "# Get the tensors/arrays\n", + "for data_name, data_param in hf.items():\n", + " if \"LayerNorm\" in data_name or \"_packed_params.bias\" in data_name:\n", + " reconstructed_elementary_qtz_st[data_name] = torch.from_numpy(np.array(data_param))\n", + " elif \"embedding\" in data_name:\n", + " reconstructed_elementary_qtz_st[data_name] = torch.from_numpy(np.array(data_param))\n", + " else: # _packed_params.weight.int_repr.data, _packed_params.weight.int_repr.indices and _packed_params.weight.int_repr.indptr\n", + " data_param = np.array(data_param)\n", + " if \"indices\" in data_name:\n", + " data_param = np.array(data_param, dtype=np.int32)\n", + " reconstructed_elementary_qtz_st[data_name] = data_param\n", + " print(f\"Unpack {data_name}\")\n", + " \n", + "\n", + "hf.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Sanity checks\n", + "\n", + "for name, param in reconstructed_elementary_qtz_st.items():\n", + " assert name in elementary_qtz_st\n", + "for name, param in elementary_qtz_st.items():\n", + " assert name in reconstructed_elementary_qtz_st, name\n", + "\n", + "for name, param in reconstructed_elementary_qtz_st.items():\n", + " assert type(param) == type(elementary_qtz_st[name]), name\n", + " if type(param) == torch.Tensor:\n", + " assert torch.all(torch.eq(param, elementary_qtz_st[name])), name\n", + " elif type(param) == np.ndarray:\n", + " assert (param == elementary_qtz_st[name]).all(), name\n", + " else:\n", + " assert param == elementary_qtz_st[name], name" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Re-assemble the sparse int_repr from the CSR format\n", + "\n", + "reconstructed_qtz_st = {}\n", + "\n", + "for name, param in reconstructed_elementary_qtz_st.items():\n", + " if \"weight.int_repr.indptr\" in name:\n", + " prefix_ = name[:-16]\n", + " data = reconstructed_elementary_qtz_st[f\"{prefix_}.int_repr.data\"]\n", + " indptr = reconstructed_elementary_qtz_st[f\"{prefix_}.int_repr.indptr\"]\n", + " indices = reconstructed_elementary_qtz_st[f\"{prefix_}.int_repr.indices\"]\n", + " shape = reconstructed_elementary_qtz_st[f\"{prefix_}.int_repr.shape\"]\n", + "\n", + " int_repr = sparse.csr_matrix(arg1=(data, indices, indptr),\n", + " shape=shape)\n", + " int_repr = torch.tensor(int_repr.todense())\n", + "\n", + " scale = reconstructed_elementary_qtz_st[f\"{prefix_}.scale\"]\n", + " zero_point = reconstructed_elementary_qtz_st[f\"{prefix_}.zero_point\"]\n", + " weight = torch._make_per_tensor_quantized_tensor(int_repr,\n", + " scale,\n", + " zero_point)\n", + "\n", + " reconstructed_qtz_st[f\"{prefix_}\"] = weight\n", + " elif \"int_repr.data\" in name or \"int_repr.shape\" in name or \"int_repr.indices\" in name or \\\n", + " \"weight.scale\" in name or \"weight.zero_point\" in name:\n", + " continue\n", + " else:\n", + " reconstructed_qtz_st[name] = param\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Sanity checks\n", + "\n", + "for name, param in reconstructed_qtz_st.items():\n", + " assert name in qtz_st\n", + "for name, param in qtz_st.items():\n", + " assert name in reconstructed_qtz_st, name\n", + "\n", + "for name, param in reconstructed_qtz_st.items():\n", + " assert type(param) == type(qtz_st[name]), name\n", + " if type(param) == torch.Tensor:\n", + " assert torch.all(torch.eq(param, qtz_st[name])), name\n", + " elif type(param) == np.ndarray:\n", + " assert (param == qtz_st[name]).all(), name\n", + " else:\n", + " assert param == qtz_st[name], name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sanity checks" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the re-constructed state dict into a model\n", + "\n", + "dummy_model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')\n", + "dummy_model.to('cpu')\n", + "\n", + "reconstructed_qtz_model = torch.quantization.quantize_dynamic(\n", + " model=dummy_model,\n", + " qconfig_spec = None,\n", + " dtype=torch.qint8,\n", + " )\n", + "\n", + "reconstructed_qtz_st = OrderedDict(reconstructed_qtz_st)\n", + "with open('dbg/metadata.json', 'r') as read_file:\n", + " metadata = json.loads(read_file.read())\n", + "reconstructed_qtz_st._metadata = metadata\n", + "\n", + "reconstructed_qtz_model.load_state_dict(reconstructed_qtz_st)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Sanity checks on the infernce\n", + "\n", + "N = 32\n", + "\n", + "for _ in range(25):\n", + " inputs = torch.randint(low=0, high=30000, size=(N, 128))\n", + " mask = torch.ones(size=(N, 128))\n", + "\n", + " y_reconstructed = reconstructed_qtz_model(input_ids=inputs, attention_mask=mask)[0]\n", + " y = quantized_model(input_ids=inputs, attention_mask=mask)[0]\n", + " \n", + " assert torch.all(torch.eq(y, y_reconstructed))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}