Remove nested lxmert (#9440)

2021-01-07 04:10:41 -05:00
parent b8462b5b2a
commit 3ec40299c1
8 changed files with 0 additions and 3647 deletions
--- a/examples/research_projects/movement-pruning/lxmert/README.md
+++ b/examples/research_projects/movement-pruning/lxmert/README.md
@@ -1,5 +0,0 @@
 # LXMERT DEMO
 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate``
 2. install reqs: ``pip install -r ./requirements.txt``
 3. usage is as shown in demo.ipynb
--- a/examples/research_projects/movement-pruning/lxmert/demo.ipynb
+++ b/examples/research_projects/movement-pruning/lxmert/demo.ipynb
--- a/examples/research_projects/movement-pruning/lxmert/extracting_data.py
+++ b/examples/research_projects/movement-pruning/lxmert/extracting_data.py
@@ -1,149 +0,0 @@
 import getopt
 import json
 import os
 # import numpy as np
 import sys
 from collections import OrderedDict
 import datasets
 import numpy as np
 import torch
 from modeling_frcnn import GeneralizedRCNN
 from processing_image import Preprocess
 from utils import Config
 """
 USAGE:
 ``python extracting_data.py -i <img_dir> -o <dataset_file>.datasets <batch_size>``
 """
 TEST = False
 CONFIG = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")
 DEFAULT_SCHEMA = datasets.Features(
    OrderedDict(
        {
            "attr_ids": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "attr_probs": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "boxes": datasets.Array2D((CONFIG.MAX_DETECTIONS, 4), dtype="float32"),
            "img_id": datasets.Value("int32"),
            "obj_ids": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "obj_probs": datasets.Sequence(length=CONFIG.MAX_DETECTIONS, feature=datasets.Value("float32")),
            "roi_features": datasets.Array2D((CONFIG.MAX_DETECTIONS, 2048), dtype="float32"),
            "sizes": datasets.Sequence(length=2, feature=datasets.Value("float32")),
            "preds_per_image": datasets.Value(dtype="int32"),
        }
    )
 )
 class Extract:
    def __init__(self, argv=sys.argv[1:]):
        inputdir = None
        outputfile = None
        subset_list = None
        batch_size = 1
        opts, args = getopt.getopt(argv, "i:o:b:s", ["inputdir=", "outfile=", "batch_size=", "subset_list="])
        for opt, arg in opts:
            if opt in ("-i", "--inputdir"):
                inputdir = arg
            elif opt in ("-o", "--outfile"):
                outputfile = arg
            elif opt in ("-b", "--batch_size"):
                batch_size = int(arg)
            elif opt in ("-s", "--subset_list"):
                subset_list = arg
        assert inputdir is not None  # and os.path.isdir(inputdir), f"{inputdir}"
        assert outputfile is not None and not os.path.isfile(outputfile), f"{outputfile}"
        if subset_list is not None:
            with open(os.path.realpath(subset_list)) as f:
                self.subset_list = set(map(lambda x: self._vqa_file_split()[0], tryload(f)))
        else:
            self.subset_list = None
        self.config = CONFIG
        if torch.cuda.is_available():
            self.config.model.device = "cuda"
        self.inputdir = os.path.realpath(inputdir)
        self.outputfile = os.path.realpath(outputfile)
        self.preprocess = Preprocess(self.config)
        self.model = GeneralizedRCNN.from_pretrained("unc-nlp/frcnn-vg-finetuned", config=self.config)
        self.batch = batch_size if batch_size != 0 else 1
        self.schema = DEFAULT_SCHEMA
    def _vqa_file_split(self, file):
        img_id = int(file.split(".")[0].split("_")[-1])
        filepath = os.path.join(self.inputdir, file)
        return (img_id, filepath)
    @property
    def file_generator(self):
        batch = []
        for i, file in enumerate(os.listdir(self.inputdir)):
            if self.subset_list is not None and i not in self.subset_list:
                continue
            batch.append(self._vqa_file_split(file))
            if len(batch) == self.batch:
                temp = batch
                batch = []
                yield list(map(list, zip(*temp)))
        for i in range(1):
            yield list(map(list, zip(*batch)))
    def __call__(self):
        # make writer
        if not TEST:
            writer = datasets.ArrowWriter(features=self.schema, path=self.outputfile)
        # do file generator
        for i, (img_ids, filepaths) in enumerate(self.file_generator):
            images, sizes, scales_yx = self.preprocess(filepaths)
            output_dict = self.model(
                images,
                sizes,
                scales_yx=scales_yx,
                padding="max_detections",
                max_detections=self.config.MAX_DETECTIONS,
                pad_value=0,
                return_tensors="np",
                location="cpu",
            )
            output_dict["boxes"] = output_dict.pop("normalized_boxes")
            if not TEST:
                output_dict["img_id"] = np.array(img_ids)
                batch = self.schema.encode_batch(output_dict)
                writer.write_batch(batch)
            if TEST:
                break
            # finalizer the writer
        if not TEST:
            num_examples, num_bytes = writer.finalize()
            print(f"Success! You wrote {num_examples} entry(s) and {num_bytes >> 20} mb")
 def tryload(stream):
    try:
        data = json.load(stream)
        try:
            data = list(data.keys())
        except Exception:
            data = [d["img_id"] for d in data]
    except Exception:
        try:
            data = eval(stream.read())
        except Exception:
            data = stream.read().split("\n")
    return data
 if __name__ == "__main__":
    extract = Extract(sys.argv[1:])
    extract()
    if not TEST:
        dataset = datasets.Dataset.from_file(extract.outputfile)
        # wala!
        # print(np.array(dataset[0:2]["roi_features"]).shape)
--- a/examples/research_projects/movement-pruning/lxmert/modeling_frcnn.py
+++ b/examples/research_projects/movement-pruning/lxmert/modeling_frcnn.py
--- a/examples/research_projects/movement-pruning/lxmert/processing_image.py
+++ b/examples/research_projects/movement-pruning/lxmert/processing_image.py
@@ -1,147 +0,0 @@
 """
 coding=utf-8
 Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal
 Adapted From Facebook Inc, Detectron2
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.import copy
 """
 import sys
 from typing import Tuple
 import numpy as np
 import torch
 import torch.nn.functional as F
 from PIL import Image
 from utils import img_tensorize
 class ResizeShortestEdge:
    def __init__(self, short_edge_length, max_size=sys.maxsize):
        """
        Args:
            short_edge_length (list[min, max])
            max_size (int): maximum allowed longest edge length.
        """
        self.interp_method = "bilinear"
        self.max_size = max_size
        self.short_edge_length = short_edge_length
    def __call__(self, imgs):
        img_augs = []
        for img in imgs:
            h, w = img.shape[:2]
            # later: provide list and randomly choose index for resize
            size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
            if size == 0:
                return img
            scale = size * 1.0 / min(h, w)
            if h < w:
                newh, neww = size, scale * w
            else:
                newh, neww = scale * h, size
            if max(newh, neww) > self.max_size:
                scale = self.max_size * 1.0 / max(newh, neww)
                newh = newh * scale
                neww = neww * scale
            neww = int(neww + 0.5)
            newh = int(newh + 0.5)
            if img.dtype == np.uint8:
                pil_image = Image.fromarray(img)
                pil_image = pil_image.resize((neww, newh), Image.BILINEAR)
                img = np.asarray(pil_image)
            else:
                img = img.permute(2, 0, 1).unsqueeze(0)  # 3, 0, 1)  # hw(c) -> nchw
                img = F.interpolate(img, (newh, neww), mode=self.interp_method, align_corners=False).squeeze(0)
            img_augs.append(img)
        return img_augs
 class Preprocess:
    def __init__(self, cfg):
        self.aug = ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST)
        self.input_format = cfg.INPUT.FORMAT
        self.size_divisibility = cfg.SIZE_DIVISIBILITY
        self.pad_value = cfg.PAD_VALUE
        self.max_image_size = cfg.INPUT.MAX_SIZE_TEST
        self.device = cfg.MODEL.DEVICE
        self.pixel_std = torch.tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(len(cfg.MODEL.PIXEL_STD), 1, 1)
        self.pixel_mean = torch.tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(len(cfg.MODEL.PIXEL_STD), 1, 1)
        self.normalizer = lambda x: (x - self.pixel_mean) / self.pixel_std
    def pad(self, images):
        max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
        image_sizes = [im.shape[-2:] for im in images]
        images = [
            F.pad(
                im,
                [0, max_size[-1] - size[1], 0, max_size[-2] - size[0]],
                value=self.pad_value,
            )
            for size, im in zip(image_sizes, images)
        ]
        return torch.stack(images), torch.tensor(image_sizes)
    def __call__(self, images, single_image=False):
        with torch.no_grad():
            if not isinstance(images, list):
                images = [images]
            if single_image:
                assert len(images) == 1
            for i in range(len(images)):
                if isinstance(images[i], torch.Tensor):
                    images.insert(i, images.pop(i).to(self.device).float())
                elif not isinstance(images[i], torch.Tensor):
                    images.insert(
                        i,
                        torch.as_tensor(img_tensorize(images.pop(i), input_format=self.input_format))
                        .to(self.device)
                        .float(),
                    )
            # resize smallest edge
            raw_sizes = torch.tensor([im.shape[:2] for im in images])
            images = self.aug(images)
            # transpose images and convert to torch tensors
            # images = [torch.as_tensor(i.astype("float32")).permute(2, 0, 1).to(self.device) for i in images]
            # now normalize before pad to avoid useless arithmetic
            images = [self.normalizer(x) for x in images]
            # now pad them to do the following operations
            images, sizes = self.pad(images)
            # Normalize
            if self.size_divisibility > 0:
                raise NotImplementedError()
            # pad
            scales_yx = torch.true_divide(raw_sizes, sizes)
            if single_image:
                return images[0], sizes[0], scales_yx[0]
            else:
                return images, sizes, scales_yx
 def _scale_box(boxes, scale_yx):
    boxes[:, 0::2] *= scale_yx[:, 1]
    boxes[:, 1::2] *= scale_yx[:, 0]
    return boxes
 def _clip_box(tensor, box_size: Tuple[int, int]):
    assert torch.isfinite(tensor).all(), "Box tensor contains infinite or NaN!"
    h, w = box_size
    tensor[:, 0].clamp_(min=0, max=w)
    tensor[:, 1].clamp_(min=0, max=h)
    tensor[:, 2].clamp_(min=0, max=w)
    tensor[:, 3].clamp_(min=0, max=h)
--- a/examples/research_projects/movement-pruning/lxmert/requirements.txt
+++ b/examples/research_projects/movement-pruning/lxmert/requirements.txt
@@ -1,99 +0,0 @@
 appdirs==1.4.3
 argon2-cffi==20.1.0
 async-generator==1.10
 attrs==20.2.0
 backcall==0.2.0
 bleach==3.1.5
 CacheControl==0.12.6
 certifi==2020.6.20
 cffi==1.14.2
 chardet==3.0.4
 click==7.1.2
 colorama==0.4.3
 contextlib2==0.6.0
 cycler==0.10.0
 datasets==1.0.0
 decorator==4.4.2
 defusedxml==0.6.0
 dill==0.3.2
 distlib==0.3.0
 distro==1.4.0
 entrypoints==0.3
 filelock==3.0.12
 future==0.18.2
 html5lib==1.0.1
 idna==2.8
 ipaddr==2.2.0
 ipykernel==5.3.4
 ipython
 ipython-genutils==0.2.0
 ipywidgets==7.5.1
 jedi==0.17.2
 Jinja2==2.11.2
 joblib==0.16.0
 jsonschema==3.2.0
 jupyter==1.0.0
 jupyter-client==6.1.7
 jupyter-console==6.2.0
 jupyter-core==4.6.3
 jupyterlab-pygments==0.1.1
 kiwisolver==1.2.0
 lockfile==0.12.2
 MarkupSafe==1.1.1
 matplotlib==3.3.1
 mistune==0.8.4
 msgpack==0.6.2
 nbclient==0.5.0
 nbconvert==6.0.1
 nbformat==5.0.7
 nest-asyncio==1.4.0
 notebook==6.1.5
 numpy==1.19.2
 opencv-python==4.4.0.42
 packaging==20.3
 pandas==1.1.2
 pandocfilters==1.4.2
 parso==0.7.1
 pep517==0.8.2
 pexpect==4.8.0
 pickleshare==0.7.5
 Pillow==7.2.0
 progress==1.5
 prometheus-client==0.8.0
 prompt-toolkit==3.0.7
 ptyprocess==0.6.0
 pyaml==20.4.0
 pyarrow==1.0.1
 pycparser==2.20
 Pygments==2.6.1
 pyparsing==2.4.6
 pyrsistent==0.16.0
 python-dateutil==2.8.1
 pytoml==0.1.21
 pytz==2020.1
 PyYAML==5.3.1
 pyzmq==19.0.2
 qtconsole==4.7.7
 QtPy==1.9.0
 regex==2020.7.14
 requests==2.22.0
 retrying==1.3.3
 sacremoses==0.0.43
 Send2Trash==1.5.0
 sentencepiece==0.1.91
 six==1.14.0
 terminado==0.8.3
 testpath==0.4.4
 tokenizers==0.8.1rc2
 torch==1.6.0
 torchvision==0.7.0
 tornado==6.0.4
 tqdm==4.48.2
 traitlets
 transformers==3.5.1
 urllib3==1.25.8
 wcwidth==0.2.5
 webencodings==0.5.1
 wget==3.2
 widgetsnbextension==3.5.1
 xxhash==2.0.0
--- a/examples/research_projects/movement-pruning/lxmert/utils.py
+++ b/examples/research_projects/movement-pruning/lxmert/utils.py
@@ -1,559 +0,0 @@
 """
 coding=utf-8
 Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal, Huggingface team :)
 Adapted From Facebook Inc, Detectron2
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.import copy
 """
 import copy
 import fnmatch
 import json
 import os
 import pickle as pkl
 import shutil
 import sys
 import tarfile
 import tempfile
 from collections import OrderedDict
 from contextlib import contextmanager
 from functools import partial
 from hashlib import sha256
 from io import BytesIO
 from pathlib import Path
 from urllib.parse import urlparse
 from zipfile import ZipFile, is_zipfile
 import numpy as np
 from PIL import Image
 from tqdm.auto import tqdm
 import cv2
 import requests
 import wget
 from filelock import FileLock
 from yaml import Loader, dump, load
 try:
    import torch
    _torch_available = True
 except ImportError:
    _torch_available = False
 try:
    from torch.hub import _get_torch_home
    torch_cache_home = _get_torch_home()
 except ImportError:
    torch_cache_home = os.path.expanduser(
        os.getenv("TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "torch"))
    )
 default_cache_path = os.path.join(torch_cache_home, "transformers")
 CLOUDFRONT_DISTRIB_PREFIX = "https://cdn.huggingface.co"
 S3_BUCKET_PREFIX = "https://s3.amazonaws.com/models.huggingface.co/bert"
 PATH = "/".join(str(Path(__file__).resolve()).split("/")[:-1])
 CONFIG = os.path.join(PATH, "config.yaml")
 ATTRIBUTES = os.path.join(PATH, "attributes.txt")
 OBJECTS = os.path.join(PATH, "objects.txt")
 PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path)
 PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
 TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE)
 WEIGHTS_NAME = "pytorch_model.bin"
 CONFIG_NAME = "config.yaml"
 def load_labels(objs=OBJECTS, attrs=ATTRIBUTES):
    vg_classes = []
    with open(objs) as f:
        for object in f.readlines():
            vg_classes.append(object.split(",")[0].lower().strip())
    vg_attrs = []
    with open(attrs) as f:
        for object in f.readlines():
            vg_attrs.append(object.split(",")[0].lower().strip())
    return vg_classes, vg_attrs
 def load_checkpoint(ckp):
    r = OrderedDict()
    with open(ckp, "rb") as f:
        ckp = pkl.load(f)["model"]
    for k in copy.deepcopy(list(ckp.keys())):
        v = ckp.pop(k)
        if isinstance(v, np.ndarray):
            v = torch.tensor(v)
        else:
            assert isinstance(v, torch.tensor), type(v)
        r[k] = v
    return r
 class Config:
    _pointer = {}
    def __init__(self, dictionary: dict, name: str = "root", level=0):
        self._name = name
        self._level = level
        d = {}
        for k, v in dictionary.items():
            if v is None:
                raise ValueError()
            k = copy.deepcopy(k)
            v = copy.deepcopy(v)
            if isinstance(v, dict):
                v = Config(v, name=k, level=level + 1)
            d[k] = v
            setattr(self, k, v)
        self._pointer = d
    def __repr__(self):
        return str(list((self._pointer.keys())))
    def __setattr__(self, key, val):
        self.__dict__[key] = val
        self.__dict__[key.upper()] = val
        levels = key.split(".")
        last_level = len(levels) - 1
        pointer = self._pointer
        if len(levels) > 1:
            for i, l in enumerate(levels):
                if hasattr(self, l) and isinstance(getattr(self, l), Config):
                    setattr(getattr(self, l), ".".join(levels[i:]), val)
                if l == last_level:
                    pointer[l] = val
                else:
                    pointer = pointer[l]
    def to_dict(self):
        return self._pointer
    def dump_yaml(self, data, file_name):
        with open(f"{file_name}", "w") as stream:
            dump(data, stream)
    def dump_json(self, data, file_name):
        with open(f"{file_name}", "w") as stream:
            json.dump(data, stream)
    @staticmethod
    def load_yaml(config):
        with open(config) as stream:
            data = load(stream, Loader=Loader)
        return data
    def __str__(self):
        t = "    "
        if self._name != "root":
            r = f"{t * (self._level-1)}{self._name}:\n"
        else:
            r = ""
        level = self._level
        for i, (k, v) in enumerate(self._pointer.items()):
            if isinstance(v, Config):
                r += f"{t * (self._level)}{v}\n"
                self._level += 1
            else:
                r += f"{t * (self._level)}{k}: {v} ({type(v).__name__})\n"
            self._level = level
        return r[:-1]
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
        config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
        return cls(config_dict)
    @classmethod
    def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs):
        cache_dir = kwargs.pop("cache_dir", None)
        force_download = kwargs.pop("force_download", False)
        resume_download = kwargs.pop("resume_download", False)
        proxies = kwargs.pop("proxies", None)
        local_files_only = kwargs.pop("local_files_only", False)
        if os.path.isdir(pretrained_model_name_or_path):
            config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
        elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
            config_file = pretrained_model_name_or_path
        else:
            config_file = hf_bucket_url(pretrained_model_name_or_path, filename=CONFIG_NAME, use_cdn=False)
        try:
            # Load from URL or cache if already cached
            resolved_config_file = cached_path(
                config_file,
                cache_dir=cache_dir,
                force_download=force_download,
                proxies=proxies,
                resume_download=resume_download,
                local_files_only=local_files_only,
            )
            # Load config dict
            if resolved_config_file is None:
                raise EnvironmentError
            config_file = Config.load_yaml(resolved_config_file)
        except EnvironmentError:
            msg = "Can't load config for"
            raise EnvironmentError(msg)
        if resolved_config_file == config_file:
            print("loading configuration file from path")
        else:
            print("loading configuration file cache")
        return Config.load_yaml(resolved_config_file), kwargs
 # quick compare tensors
 def compare(in_tensor):
    out_tensor = torch.load("dump.pt", map_location=in_tensor.device)
    n1 = in_tensor.numpy()
    n2 = out_tensor.numpy()[0]
    print(n1.shape, n1[0, 0, :5])
    print(n2.shape, n2[0, 0, :5])
    assert np.allclose(
        n1, n2, rtol=0.01, atol=0.1
    ), f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} % element-wise mismatch"
    raise Exception("tensors are all good")
    # Hugging face functions below
 def is_remote_url(url_or_filename):
    parsed = urlparse(url_or_filename)
    return parsed.scheme in ("http", "https")
 def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
    endpoint = CLOUDFRONT_DISTRIB_PREFIX if use_cdn else S3_BUCKET_PREFIX
    legacy_format = "/" not in model_id
    if legacy_format:
        return f"{endpoint}/{model_id}-{filename}"
    else:
        return f"{endpoint}/{model_id}/{filename}"
 def http_get(
    url,
    temp_file,
    proxies=None,
    resume_size=0,
    user_agent=None,
 ):
    ua = "python/{}".format(sys.version.split()[0])
    if _torch_available:
        ua += "; torch/{}".format(torch.__version__)
    if isinstance(user_agent, dict):
        ua += "; " + "; ".join("{}/{}".format(k, v) for k, v in user_agent.items())
    elif isinstance(user_agent, str):
        ua += "; " + user_agent
    headers = {"user-agent": ua}
    if resume_size > 0:
        headers["Range"] = "bytes=%d-" % (resume_size,)
    response = requests.get(url, stream=True, proxies=proxies, headers=headers)
    if response.status_code == 416:  # Range not satisfiable
        return
    content_length = response.headers.get("Content-Length")
    total = resume_size + int(content_length) if content_length is not None else None
    progress = tqdm(
        unit="B",
        unit_scale=True,
        total=total,
        initial=resume_size,
        desc="Downloading",
    )
    for chunk in response.iter_content(chunk_size=1024):
        if chunk:  # filter out keep-alive new chunks
            progress.update(len(chunk))
            temp_file.write(chunk)
    progress.close()
 def get_from_cache(
    url,
    cache_dir=None,
    force_download=False,
    proxies=None,
    etag_timeout=10,
    resume_download=False,
    user_agent=None,
    local_files_only=False,
 ):
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)
    os.makedirs(cache_dir, exist_ok=True)
    etag = None
    if not local_files_only:
        try:
            response = requests.head(url, allow_redirects=True, proxies=proxies, timeout=etag_timeout)
            if response.status_code == 200:
                etag = response.headers.get("ETag")
        except (EnvironmentError, requests.exceptions.Timeout):
            # etag is already None
            pass
    filename = url_to_filename(url, etag)
    # get cache path to put the file
    cache_path = os.path.join(cache_dir, filename)
    # etag is None = we don't have a connection, or url doesn't exist, or is otherwise inaccessible.
    # try to get the last downloaded one
    if etag is None:
        if os.path.exists(cache_path):
            return cache_path
        else:
            matching_files = [
                file
                for file in fnmatch.filter(os.listdir(cache_dir), filename + ".*")
                if not file.endswith(".json") and not file.endswith(".lock")
            ]
            if len(matching_files) > 0:
                return os.path.join(cache_dir, matching_files[-1])
            else:
                # If files cannot be found and local_files_only=True,
                # the models might've been found if local_files_only=False
                # Notify the user about that
                if local_files_only:
                    raise ValueError(
                        "Cannot find the requested files in the cached path and outgoing traffic has been"
                        " disabled. To enable model look-ups and downloads online, set 'local_files_only'"
                        " to False."
                    )
                return None
    # From now on, etag is not None.
    if os.path.exists(cache_path) and not force_download:
        return cache_path
    # Prevent parallel downloads of the same file with a lock.
    lock_path = cache_path + ".lock"
    with FileLock(lock_path):
        # If the download just completed while the lock was activated.
        if os.path.exists(cache_path) and not force_download:
            # Even if returning early like here, the lock will be released.
            return cache_path
        if resume_download:
            incomplete_path = cache_path + ".incomplete"
            @contextmanager
            def _resumable_file_manager():
                with open(incomplete_path, "a+b") as f:
                    yield f
            temp_file_manager = _resumable_file_manager
            if os.path.exists(incomplete_path):
                resume_size = os.stat(incomplete_path).st_size
            else:
                resume_size = 0
        else:
            temp_file_manager = partial(tempfile.NamedTemporaryFile, dir=cache_dir, delete=False)
            resume_size = 0
        # Download to temporary file, then copy to cache dir once finished.
        # Otherwise you get corrupt cache entries if the download gets interrupted.
        with temp_file_manager() as temp_file:
            print(
                "%s not found in cache or force_download set to True, downloading to %s",
                url,
                temp_file.name,
            )
            http_get(
                url,
                temp_file,
                proxies=proxies,
                resume_size=resume_size,
                user_agent=user_agent,
            )
        os.replace(temp_file.name, cache_path)
        meta = {"url": url, "etag": etag}
        meta_path = cache_path + ".json"
        with open(meta_path, "w") as meta_file:
            json.dump(meta, meta_file)
    return cache_path
 def url_to_filename(url, etag=None):
    url_bytes = url.encode("utf-8")
    url_hash = sha256(url_bytes)
    filename = url_hash.hexdigest()
    if etag:
        etag_bytes = etag.encode("utf-8")
        etag_hash = sha256(etag_bytes)
        filename += "." + etag_hash.hexdigest()
    if url.endswith(".h5"):
        filename += ".h5"
    return filename
 def cached_path(
    url_or_filename,
    cache_dir=None,
    force_download=False,
    proxies=None,
    resume_download=False,
    user_agent=None,
    extract_compressed_file=False,
    force_extract=False,
    local_files_only=False,
 ):
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
    if isinstance(url_or_filename, Path):
        url_or_filename = str(url_or_filename)
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)
    if is_remote_url(url_or_filename):
        # URL, so get it from the cache (downloading if necessary)
        output_path = get_from_cache(
            url_or_filename,
            cache_dir=cache_dir,
            force_download=force_download,
            proxies=proxies,
            resume_download=resume_download,
            user_agent=user_agent,
            local_files_only=local_files_only,
        )
    elif os.path.exists(url_or_filename):
        # File, and it exists.
        output_path = url_or_filename
    elif urlparse(url_or_filename).scheme == "":
        # File, but it doesn't exist.
        raise EnvironmentError("file {} not found".format(url_or_filename))
    else:
        # Something unknown
        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
    if extract_compressed_file:
        if not is_zipfile(output_path) and not tarfile.is_tarfile(output_path):
            return output_path
        # Path where we extract compressed archives
        # We avoid '.' in dir name and add "-extracted" at the end: "./model.zip" => "./model-zip-extracted/"
        output_dir, output_file = os.path.split(output_path)
        output_extract_dir_name = output_file.replace(".", "-") + "-extracted"
        output_path_extracted = os.path.join(output_dir, output_extract_dir_name)
        if os.path.isdir(output_path_extracted) and os.listdir(output_path_extracted) and not force_extract:
            return output_path_extracted
        # Prevent parallel extractions
        lock_path = output_path + ".lock"
        with FileLock(lock_path):
            shutil.rmtree(output_path_extracted, ignore_errors=True)
            os.makedirs(output_path_extracted)
            if is_zipfile(output_path):
                with ZipFile(output_path, "r") as zip_file:
                    zip_file.extractall(output_path_extracted)
                    zip_file.close()
            elif tarfile.is_tarfile(output_path):
                tar_file = tarfile.open(output_path)
                tar_file.extractall(output_path_extracted)
                tar_file.close()
            else:
                raise EnvironmentError("Archive format of {} could not be identified".format(output_path))
        return output_path_extracted
    return output_path
 def get_data(query, delim=","):
    assert isinstance(query, str)
    if os.path.isfile(query):
        with open(query) as f:
            data = eval(f.read())
    else:
        req = requests.get(query)
        try:
            data = requests.json()
        except Exception:
            data = req.content.decode()
            assert data is not None, "could not connect"
            try:
                data = eval(data)
            except Exception:
                data = data.split("\n")
        req.close()
    return data
 def get_image_from_url(url):
    response = requests.get(url)
    img = np.array(Image.open(BytesIO(response.content)))
    return img
 # to load legacy frcnn checkpoint from detectron
 def load_frcnn_pkl_from_url(url):
    fn = url.split("/")[-1]
    if fn not in os.listdir(os.getcwd()):
        wget.download(url)
    with open(fn, "rb") as stream:
        weights = pkl.load(stream)
    model = weights.pop("model")
    new = {}
    for k, v in model.items():
        new[k] = torch.from_numpy(v)
        if "running_var" in k:
            zero = torch.Tensor([0])
            k2 = k.replace("running_var", "num_batches_tracked")
            new[k2] = zero
    return new
 def get_demo_path():
    print(f"{os.path.abspath(os.path.join(PATH, os.pardir))}/demo.ipynb")
 def img_tensorize(im, input_format="RGB"):
    assert isinstance(im, str)
    if os.path.isfile(im):
        img = cv2.imread(im)
    else:
        img = get_image_from_url(im)
        assert img is not None, f"could not connect to: {im}"
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if input_format == "RGB":
        img = img[:, :, ::-1]
    return img
 def chunk(images, batch=1):
    return (images[i : i + batch] for i in range(0, len(images), batch))
--- a/examples/research_projects/movement-pruning/lxmert/visualizing_image.py
+++ b/examples/research_projects/movement-pruning/lxmert/visualizing_image.py
@@ -1,499 +0,0 @@
 """
 coding=utf-8
 Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal
 Adapted From Facebook Inc, Detectron2
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.import copy
 """
 import colorsys
 import io
 import matplotlib as mpl
 import matplotlib.colors as mplc
 import matplotlib.figure as mplfigure
 import numpy as np
 import torch
 from matplotlib.backends.backend_agg import FigureCanvasAgg
 import cv2
 from utils import img_tensorize
 _SMALL_OBJ = 1000
 class SingleImageViz:
    def __init__(
        self,
        img,
        scale=1.2,
        edgecolor="g",
        alpha=0.5,
        linestyle="-",
        saveas="test_out.jpg",
        rgb=True,
        pynb=False,
        id2obj=None,
        id2attr=None,
        pad=0.7,
    ):
        """
        img: an RGB image of shape (H, W, 3).
        """
        if isinstance(img, torch.Tensor):
            img = img.numpy().astype("np.uint8")
        if isinstance(img, str):
            img = img_tensorize(img)
        assert isinstance(img, np.ndarray)
        width, height = img.shape[1], img.shape[0]
        fig = mplfigure.Figure(frameon=False)
        dpi = fig.get_dpi()
        width_in = (width * scale + 1e-2) / dpi
        height_in = (height * scale + 1e-2) / dpi
        fig.set_size_inches(width_in, height_in)
        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
        ax.axis("off")
        ax.set_xlim(0.0, width)
        ax.set_ylim(height)
        self.saveas = saveas
        self.rgb = rgb
        self.pynb = pynb
        self.img = img
        self.edgecolor = edgecolor
        self.alpha = 0.5
        self.linestyle = linestyle
        self.font_size = int(np.sqrt(min(height, width)) * scale // 3)
        self.width = width
        self.height = height
        self.scale = scale
        self.fig = fig
        self.ax = ax
        self.pad = pad
        self.id2obj = id2obj
        self.id2attr = id2attr
        self.canvas = FigureCanvasAgg(fig)
    def add_box(self, box, color=None):
        if color is None:
            color = self.edgecolor
        (x0, y0, x1, y1) = box
        width = x1 - x0
        height = y1 - y0
        self.ax.add_patch(
            mpl.patches.Rectangle(
                (x0, y0),
                width,
                height,
                fill=False,
                edgecolor=color,
                linewidth=self.font_size // 3,
                alpha=self.alpha,
                linestyle=self.linestyle,
            )
        )
    def draw_boxes(self, boxes, obj_ids=None, obj_scores=None, attr_ids=None, attr_scores=None):
        if len(boxes.shape) > 2:
            boxes = boxes[0]
        if len(obj_ids.shape) > 1:
            obj_ids = obj_ids[0]
        if len(obj_scores.shape) > 1:
            obj_scores = obj_scores[0]
        if len(attr_ids.shape) > 1:
            attr_ids = attr_ids[0]
        if len(attr_scores.shape) > 1:
            attr_scores = attr_scores[0]
        if isinstance(boxes, torch.Tensor):
            boxes = boxes.numpy()
        if isinstance(boxes, list):
            boxes = np.array(boxes)
        assert isinstance(boxes, np.ndarray)
        areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
        sorted_idxs = np.argsort(-areas).tolist()
        boxes = boxes[sorted_idxs] if boxes is not None else None
        obj_ids = obj_ids[sorted_idxs] if obj_ids is not None else None
        obj_scores = obj_scores[sorted_idxs] if obj_scores is not None else None
        attr_ids = attr_ids[sorted_idxs] if attr_ids is not None else None
        attr_scores = attr_scores[sorted_idxs] if attr_scores is not None else None
        assigned_colors = [self._random_color(maximum=1) for _ in range(len(boxes))]
        assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
        if obj_ids is not None:
            labels = self._create_text_labels_attr(obj_ids, obj_scores, attr_ids, attr_scores)
            for i in range(len(boxes)):
                color = assigned_colors[i]
                self.add_box(boxes[i], color)
                self.draw_labels(labels[i], boxes[i], color)
    def draw_labels(self, label, box, color):
        x0, y0, x1, y1 = box
        text_pos = (x0, y0)
        instance_area = (y1 - y0) * (x1 - x0)
        small = _SMALL_OBJ * self.scale
        if instance_area < small or y1 - y0 < 40 * self.scale:
            if y1 >= self.height - 5:
                text_pos = (x1, y0)
            else:
                text_pos = (x0, y1)
        height_ratio = (y1 - y0) / np.sqrt(self.height * self.width)
        lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
        font_size = np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
        font_size *= 0.75 * self.font_size
        self.draw_text(
            text=label,
            position=text_pos,
            color=lighter_color,
        )
    def draw_text(
        self,
        text,
        position,
        color="g",
        ha="left",
    ):
        rotation = 0
        font_size = self.font_size
        color = np.maximum(list(mplc.to_rgb(color)), 0.2)
        color[np.argmax(color)] = max(0.8, np.max(color))
        bbox = {
            "facecolor": "black",
            "alpha": self.alpha,
            "pad": self.pad,
            "edgecolor": "none",
        }
        x, y = position
        self.ax.text(
            x,
            y,
            text,
            size=font_size * self.scale,
            family="sans-serif",
            bbox=bbox,
            verticalalignment="top",
            horizontalalignment=ha,
            color=color,
            zorder=10,
            rotation=rotation,
        )
    def save(self, saveas=None):
        if saveas is None:
            saveas = self.saveas
        if saveas.lower().endswith(".jpg") or saveas.lower().endswith(".png"):
            cv2.imwrite(
                saveas,
                self._get_buffer()[:, :, ::-1],
            )
        else:
            self.fig.savefig(saveas)
    def _create_text_labels_attr(self, classes, scores, attr_classes, attr_scores):
        labels = [self.id2obj[i] for i in classes]
        attr_labels = [self.id2attr[i] for i in attr_classes]
        labels = [
            f"{label} {score:.2f} {attr} {attr_score:.2f}"
            for label, score, attr, attr_score in zip(labels, scores, attr_labels, attr_scores)
        ]
        return labels
    def _create_text_labels(self, classes, scores):
        labels = [self.id2obj[i] for i in classes]
        if scores is not None:
            if labels is None:
                labels = ["{:.0f}%".format(s * 100) for s in scores]
            else:
                labels = ["{} {:.0f}%".format(li, s * 100) for li, s in zip(labels, scores)]
        return labels
    def _random_color(self, maximum=255):
        idx = np.random.randint(0, len(_COLORS))
        ret = _COLORS[idx] * maximum
        if not self.rgb:
            ret = ret[::-1]
        return ret
    def _get_buffer(self):
        if not self.pynb:
            s, (width, height) = self.canvas.print_to_buffer()
            if (width, height) != (self.width, self.height):
                img = cv2.resize(self.img, (width, height))
            else:
                img = self.img
        else:
            buf = io.BytesIO()  # works for cairo backend
            self.canvas.print_rgba(buf)
            width, height = self.width, self.height
            s = buf.getvalue()
            img = self.img
        buffer = np.frombuffer(s, dtype="uint8")
        img_rgba = buffer.reshape(height, width, 4)
        rgb, alpha = np.split(img_rgba, [3], axis=2)
        try:
            import numexpr as ne  # fuse them with numexpr
            visualized_image = ne.evaluate("img * (1 - alpha / 255.0) + rgb * (alpha / 255.0)")
        except ImportError:
            alpha = alpha.astype("float32") / 255.0
            visualized_image = img * (1 - alpha) + rgb * alpha
        return visualized_image.astype("uint8")
    def _change_color_brightness(self, color, brightness_factor):
        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
        color = mplc.to_rgb(color)
        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
        return modified_color
 # Color map
 _COLORS = (
    np.array(
        [
            0.000,
            0.447,
            0.741,
            0.850,
            0.325,
            0.098,
            0.929,
            0.694,
            0.125,
            0.494,
            0.184,
            0.556,
            0.466,
            0.674,
            0.188,
            0.301,
            0.745,
            0.933,
            0.635,
            0.078,
            0.184,
            0.300,
            0.300,
            0.300,
            0.600,
            0.600,
            0.600,
            1.000,
            0.000,
            0.000,
            1.000,
            0.500,
            0.000,
            0.749,
            0.749,
            0.000,
            0.000,
            1.000,
            0.000,
            0.000,
            0.000,
            1.000,
            0.667,
            0.000,
            1.000,
            0.333,
            0.333,
            0.000,
            0.333,
            0.667,
            0.000,
            0.333,
            1.000,
            0.000,
            0.667,
            0.333,
            0.000,
            0.667,
            0.667,
            0.000,
            0.667,
            1.000,
            0.000,
            1.000,
            0.333,
            0.000,
            1.000,
            0.667,
            0.000,
            1.000,
            1.000,
            0.000,
            0.000,
            0.333,
            0.500,
            0.000,
            0.667,
            0.500,
            0.000,
            1.000,
            0.500,
            0.333,
            0.000,
            0.500,
            0.333,
            0.333,
            0.500,
            0.333,
            0.667,
            0.500,
            0.333,
            1.000,
            0.500,
            0.667,
            0.000,
            0.500,
            0.667,
            0.333,
            0.500,
            0.667,
            0.667,
            0.500,
            0.667,
            1.000,
            0.500,
            1.000,
            0.000,
            0.500,
            1.000,
            0.333,
            0.500,
            1.000,
            0.667,
            0.500,
            1.000,
            1.000,
            0.500,
            0.000,
            0.333,
            1.000,
            0.000,
            0.667,
            1.000,
            0.000,
            1.000,
            1.000,
            0.333,
            0.000,
            1.000,
            0.333,
            0.333,
            1.000,
            0.333,
            0.667,
            1.000,
            0.333,
            1.000,
            1.000,
            0.667,
            0.000,
            1.000,
            0.667,
            0.333,
            1.000,
            0.667,
            0.667,
            1.000,
            0.667,
            1.000,
            1.000,
            1.000,
            0.000,
            1.000,
            1.000,
            0.333,
            1.000,
            1.000,
            0.667,
            1.000,
            0.333,
            0.000,
            0.000,
            0.500,
            0.000,
            0.000,
            0.667,
            0.000,
            0.000,
            0.833,
            0.000,
            0.000,
            1.000,
            0.000,
            0.000,
            0.000,
            0.167,
            0.000,
            0.000,
            0.333,
            0.000,
            0.000,
            0.500,
            0.000,
            0.000,
            0.667,
            0.000,
            0.000,
            0.833,
            0.000,
            0.000,
            1.000,
            0.000,
            0.000,
            0.000,
            0.167,
            0.000,
            0.000,
            0.333,
            0.000,
            0.000,
            0.500,
            0.000,
            0.000,
            0.667,
            0.000,
            0.000,
            0.833,
            0.000,
            0.000,
            1.000,
            0.000,
            0.000,
            0.000,
            0.143,
            0.143,
            0.143,
            0.857,
            0.857,
            0.857,
            1.000,
            1.000,
            1.000,
        ]
    )
    .astype(np.float32)
    .reshape(-1, 3)
 )