Reorganize examples (#9010)
* Reorganize example folder * Continue reorganization * Change requirements for tests * Final cleanup * Finish regroup with tests all passing * Copyright * Requirements and readme * Make a full link for the documentation * Address review comments * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Add symlink * Reorg again * Apply suggestions from code review Co-authored-by: Thomas Wolf <thomwolf@users.noreply.github.com> * Adapt title * Update to new strucutre * Remove test * Update READMEs Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Thomas Wolf <thomwolf@users.noreply.github.com>
This commit is contained in:
133
examples/research_projects/distillation/utils.py
Normal file
133
examples/research_projects/distillation/utils.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2019-present, the HuggingFace Inc. team and Facebook, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Utils to train DistilBERT
|
||||
adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM)
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
|
||||
import git
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s - %(levelname)s - %(name)s - PID: %(process)d - %(message)s",
|
||||
datefmt="%m/%d/%Y %H:%M:%S",
|
||||
level=logging.INFO,
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def git_log(folder_path: str):
|
||||
"""
|
||||
Log commit info.
|
||||
"""
|
||||
repo = git.Repo(search_parent_directories=True)
|
||||
repo_infos = {
|
||||
"repo_id": str(repo),
|
||||
"repo_sha": str(repo.head.object.hexsha),
|
||||
"repo_branch": str(repo.active_branch),
|
||||
}
|
||||
|
||||
with open(os.path.join(folder_path, "git_log.json"), "w") as f:
|
||||
json.dump(repo_infos, f, indent=4)
|
||||
|
||||
|
||||
def init_gpu_params(params):
|
||||
"""
|
||||
Handle single and multi-GPU / multi-node.
|
||||
"""
|
||||
if params.n_gpu <= 0:
|
||||
params.local_rank = 0
|
||||
params.master_port = -1
|
||||
params.is_master = True
|
||||
params.multi_gpu = False
|
||||
return
|
||||
|
||||
assert torch.cuda.is_available()
|
||||
|
||||
logger.info("Initializing GPUs")
|
||||
if params.n_gpu > 1:
|
||||
assert params.local_rank != -1
|
||||
|
||||
params.world_size = int(os.environ["WORLD_SIZE"])
|
||||
params.n_gpu_per_node = int(os.environ["N_GPU_NODE"])
|
||||
params.global_rank = int(os.environ["RANK"])
|
||||
|
||||
# number of nodes / node ID
|
||||
params.n_nodes = params.world_size // params.n_gpu_per_node
|
||||
params.node_id = params.global_rank // params.n_gpu_per_node
|
||||
params.multi_gpu = True
|
||||
|
||||
assert params.n_nodes == int(os.environ["N_NODES"])
|
||||
assert params.node_id == int(os.environ["NODE_RANK"])
|
||||
|
||||
# local job (single GPU)
|
||||
else:
|
||||
assert params.local_rank == -1
|
||||
|
||||
params.n_nodes = 1
|
||||
params.node_id = 0
|
||||
params.local_rank = 0
|
||||
params.global_rank = 0
|
||||
params.world_size = 1
|
||||
params.n_gpu_per_node = 1
|
||||
params.multi_gpu = False
|
||||
|
||||
# sanity checks
|
||||
assert params.n_nodes >= 1
|
||||
assert 0 <= params.node_id < params.n_nodes
|
||||
assert 0 <= params.local_rank <= params.global_rank < params.world_size
|
||||
assert params.world_size == params.n_nodes * params.n_gpu_per_node
|
||||
|
||||
# define whether this is the master process / if we are in multi-node distributed mode
|
||||
params.is_master = params.node_id == 0 and params.local_rank == 0
|
||||
params.multi_node = params.n_nodes > 1
|
||||
|
||||
# summary
|
||||
PREFIX = f"--- Global rank: {params.global_rank} - "
|
||||
logger.info(PREFIX + "Number of nodes: %i" % params.n_nodes)
|
||||
logger.info(PREFIX + "Node ID : %i" % params.node_id)
|
||||
logger.info(PREFIX + "Local rank : %i" % params.local_rank)
|
||||
logger.info(PREFIX + "World size : %i" % params.world_size)
|
||||
logger.info(PREFIX + "GPUs per node : %i" % params.n_gpu_per_node)
|
||||
logger.info(PREFIX + "Master : %s" % str(params.is_master))
|
||||
logger.info(PREFIX + "Multi-node : %s" % str(params.multi_node))
|
||||
logger.info(PREFIX + "Multi-GPU : %s" % str(params.multi_gpu))
|
||||
logger.info(PREFIX + "Hostname : %s" % socket.gethostname())
|
||||
|
||||
# set GPU device
|
||||
torch.cuda.set_device(params.local_rank)
|
||||
|
||||
# initialize multi-GPU
|
||||
if params.multi_gpu:
|
||||
logger.info("Initializing PyTorch distributed")
|
||||
torch.distributed.init_process_group(
|
||||
init_method="env://",
|
||||
backend="nccl",
|
||||
)
|
||||
|
||||
|
||||
def set_seed(args):
|
||||
"""
|
||||
Set the random seed.
|
||||
"""
|
||||
np.random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
if args.n_gpu > 0:
|
||||
torch.cuda.manual_seed_all(args.seed)
|
||||
Reference in New Issue
Block a user