diff --git a/examples/README.md b/examples/README.md index 0a5ec752d3..c1cddd2e47 100644 --- a/examples/README.md +++ b/examples/README.md @@ -94,3 +94,41 @@ Alternatively, you can switch your cloned 🤗 Transformers to a specific versio git checkout tags/v3.5.1 ``` and run the example command as usual afterward. + +## Running the Examples on Remote Hardware with Auto-Setup + +[run_on_remote.py](./run_on_remote.py) is a script that launches any example on remote self-hosted hardware, +with automatic hardware and environment setup. It uses [Runhouse](https://github.com/run-house/runhouse) to launch +on self-hosted hardware (e.g. in your own cloud account or on-premise cluster) but there are other options +for running remotely as well. You can easily customize the example used, command line arguments, dependencies, +and type of compute hardware, and then run the script to automatically launch the example. + +You can refer to +[hardware setup](https://runhouse-docs.readthedocs-hosted.com/en/main/rh_primitives/cluster.html#hardware-setup) +for more information about hardware and dependency setup with Runhouse, or this +[Colab tutorial](https://colab.research.google.com/drive/1sh_aNQzJX5BKAdNeXthTNGxKz7sM9VPc) for a more in-depth +walkthrough. + +You can run the script with the following commands: + +```bash +# First install runhouse: +pip install runhouse + +# For an on-demand V100 with whichever cloud provider you have configured: +python run_on_remote.py \ + --example pytorch/text-generation/run_generation.py \ + --model_type=gpt2 \ + --model_name_or_path=gpt2 \ + --prompt "I am a language model and" + +# For byo (bring your own) cluster: +python run_on_remote.py --host --user --key_path \ + --example + +# For on-demand instances +python run_on_remote.py --instance --provider \ + --example +``` + +You can also adapt the script to your own needs. \ No newline at end of file diff --git a/examples/run_on_remote.py b/examples/run_on_remote.py new file mode 100644 index 0000000000..cb499c540b --- /dev/null +++ b/examples/run_on_remote.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2021 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import shlex +import runhouse as rh + +if __name__ == "__main__": + # Refer to https://runhouse-docs.readthedocs-hosted.com/en/main/rh_primitives/cluster.html#hardware-setup for cloud access + # setup instructions, if using on-demand hardware + + # If user passes --user --host --key_path , fill them in as BYO cluster + # If user passes --instance --provider , fill them in as on-demand cluster + # Throw an error if user passes both BYO and on-demand cluster args + # Otherwise, use default values + parser = argparse.ArgumentParser() + parser.add_argument("--user", type=str, default="ubuntu") + parser.add_argument("--host", type=str, default="localhost") + parser.add_argument("--key_path", type=str, default=None) + parser.add_argument("--instance", type=str, default="V100:1") + parser.add_argument("--provider", type=str, default="cheapest") + parser.add_argument("--use_spot", type=bool, default=False) + parser.add_argument("--example", type=str, default="pytorch/text-generation/run_generation.py") + args, unknown = parser.parse_known_args() + if args.host != "localhost": + if args.instance != "V100:1" or args.provider != "cheapest": + raise ValueError("Cannot specify both BYO and on-demand cluster args") + cluster = rh.cluster( + name="rh-cluster", ips=[args.host], ssh_creds={"ssh_user": args.user, "ssh_private_key": args.key_path} + ) + else: + cluster = rh.cluster( + name="rh-cluster", instance_type=args.instance, provider=args.provider, use_spot=args.use_spot + ) + example_dir = args.example.rsplit("/", 1)[0] + + # Set up remote environment + cluster.install_packages(["pip:./"]) # Installs transformers from local source + # Note transformers is copied into the home directory on the remote machine, so we can install from there + cluster.run([f"pip install -r transformers/examples/{example_dir}/requirements.txt"]) + cluster.run(["pip install torch --upgrade --extra-index-url https://download.pytorch.org/whl/cu117"]) + + # Run example. You can bypass the CLI wrapper and paste your own code here. + cluster.run([f'python transformers/examples/{args.example} {" ".join(shlex.quote(arg) for arg in unknown)}']) + + # Alternatively, we can just import and run a training function (especially if there's no wrapper CLI): + # from my_script... import train + # reqs = ['pip:./', 'torch', 'datasets', 'accelerate', 'evaluate', 'tqdm', 'scipy', 'scikit-learn', 'tensorboard'] + # launch_train_gpu = rh.function(fn=train, + # system=gpu, + # reqs=reqs, + # name='train_bert_glue') + # + # We can pass in arguments just like we would to a function: + # launch_train_gpu(num_epochs = 3, lr = 2e-5, seed = 42, batch_size = 16 + # stream_logs=True)