Model versioning (#8324)

* fix typo

* rm use_cdn & references, and implement new hf_bucket_url

* I'm pretty sure we don't need to `read` this file

* same here

* [BIG] file_utils.networking: do not gobble up errors anymore

* Fix CI 😇

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Tiny doc tweak

* Add doc + pass kwarg everywhere

* Add more tests and explain

cc @sshleifer let me know if better

Co-Authored-By: Sam Shleifer <sshleifer@gmail.com>

* Also implement revision in pipelines

In the case where we're passing a task name or a string model identifier

* Fix CI 😇

* Fix CI

* [hf_api] new methods + command line implem

* make style

* Final endpoints post-migration

* Fix post-migration

* Py3.6 compat

cc @stefan-it

Thank you @stas00

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
This commit is contained in:
Julien Chaumond
2020-11-10 13:11:02 +01:00
committed by GitHub
parent 4185b115d4
commit 70f622fab4
23 changed files with 472 additions and 210 deletions

View File

@@ -1,4 +1,5 @@
import os
import subprocess
import sys
from argparse import ArgumentParser
from getpass import getpass
@@ -21,8 +22,10 @@ class UserCommands(BaseTransformersCLICommand):
whoami_parser.set_defaults(func=lambda args: WhoamiCommand(args))
logout_parser = parser.add_parser("logout", help="Log out")
logout_parser.set_defaults(func=lambda args: LogoutCommand(args))
# s3
s3_parser = parser.add_parser("s3", help="{ls, rm} Commands to interact with the files you upload on S3.")
# s3_datasets (s3-based system)
s3_parser = parser.add_parser(
"s3_datasets", help="{ls, rm} Commands to interact with the files you upload on S3."
)
s3_subparsers = s3_parser.add_subparsers(help="s3 related commands")
ls_parser = s3_subparsers.add_parser("ls")
ls_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
@@ -31,17 +34,42 @@ class UserCommands(BaseTransformersCLICommand):
rm_parser.add_argument("filename", type=str, help="individual object filename to delete from S3.")
rm_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
rm_parser.set_defaults(func=lambda args: DeleteObjCommand(args))
# upload
upload_parser = parser.add_parser("upload", help="Upload a model to S3.")
upload_parser.add_argument(
"path", type=str, help="Local path of the model folder or individual file to upload."
)
upload_parser = s3_subparsers.add_parser("upload", help="Upload a file to S3.")
upload_parser.add_argument("path", type=str, help="Local path of the folder or individual file to upload.")
upload_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
upload_parser.add_argument(
"--filename", type=str, default=None, help="Optional: override individual object filename on S3."
)
upload_parser.add_argument("-y", "--yes", action="store_true", help="Optional: answer Yes to the prompt")
upload_parser.set_defaults(func=lambda args: UploadCommand(args))
# deprecated model upload
upload_parser = parser.add_parser(
"upload",
help=(
"Deprecated: used to be the way to upload a model to S3."
" We now use a git-based system for storing models and other artifacts."
" Use the `repo create` command instead."
),
)
upload_parser.set_defaults(func=lambda args: DeprecatedUploadCommand(args))
# new system: git-based repo system
repo_parser = parser.add_parser(
"repo", help="{create, ls-files} Commands to interact with your huggingface.co repos."
)
repo_subparsers = repo_parser.add_subparsers(help="huggingface.co repos related commands")
ls_parser = repo_subparsers.add_parser("ls-files", help="List all your files on huggingface.co")
ls_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
ls_parser.set_defaults(func=lambda args: ListReposObjsCommand(args))
repo_create_parser = repo_subparsers.add_parser("create", help="Create a new repo on huggingface.co")
repo_create_parser.add_argument(
"name",
type=str,
help="Name for your model's repo. Will be namespaced under your username to build the model id.",
)
repo_create_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
repo_create_parser.add_argument("-y", "--yes", action="store_true", help="Optional: answer Yes to the prompt")
repo_create_parser.set_defaults(func=lambda args: RepoCreateCommand(args))
class ANSI:
@@ -51,6 +79,7 @@ class ANSI:
_bold = "\u001b[1m"
_red = "\u001b[31m"
_gray = "\u001b[90m"
_reset = "\u001b[0m"
@classmethod
@@ -61,6 +90,27 @@ class ANSI:
def red(cls, s):
return "{}{}{}".format(cls._bold + cls._red, s, cls._reset)
@classmethod
def gray(cls, s):
return "{}{}{}".format(cls._gray, s, cls._reset)
def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
"""
Inspired by:
- stackoverflow.com/a/8356620/593036
- stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
"""
col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
lines = []
lines.append(row_format.format(*headers))
lines.append(row_format.format(*["-" * w for w in col_widths]))
for row in rows:
lines.append(row_format.format(*row))
return "\n".join(lines)
class BaseUserCommand:
def __init__(self, args):
@@ -124,22 +174,6 @@ class LogoutCommand(BaseUserCommand):
class ListObjsCommand(BaseUserCommand):
def tabulate(self, rows: List[List[Union[str, int]]], headers: List[str]) -> str:
"""
Inspired by:
- stackoverflow.com/a/8356620/593036
- stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
"""
col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
lines = []
lines.append(row_format.format(*headers))
lines.append(row_format.format(*["-" * w for w in col_widths]))
for row in rows:
lines.append(row_format.format(*row))
return "\n".join(lines)
def run(self):
token = HfFolder.get_token()
if token is None:
@@ -155,7 +189,7 @@ class ListObjsCommand(BaseUserCommand):
print("No shared file yet")
exit()
rows = [[obj.filename, obj.LastModified, obj.ETag, obj.Size] for obj in objs]
print(self.tabulate(rows, headers=["Filename", "LastModified", "ETag", "Size"]))
print(tabulate(rows, headers=["Filename", "LastModified", "ETag", "Size"]))
class DeleteObjCommand(BaseUserCommand):
@@ -173,6 +207,85 @@ class DeleteObjCommand(BaseUserCommand):
print("Done")
class ListReposObjsCommand(BaseUserCommand):
def run(self):
token = HfFolder.get_token()
if token is None:
print("Not logged in")
exit(1)
try:
objs = self._api.list_repos_objs(token, organization=self.args.organization)
except HTTPError as e:
print(e)
print(ANSI.red(e.response.text))
exit(1)
if len(objs) == 0:
print("No shared file yet")
exit()
rows = [[obj.filename, obj.lastModified, obj.commit, obj.size] for obj in objs]
print(tabulate(rows, headers=["Filename", "LastModified", "Commit-Sha", "Size"]))
class RepoCreateCommand(BaseUserCommand):
def run(self):
token = HfFolder.get_token()
if token is None:
print("Not logged in")
exit(1)
try:
stdout = subprocess.check_output(["git", "--version"]).decode("utf-8")
print(ANSI.gray(stdout.strip()))
except FileNotFoundError:
print("Looks like you do not have git installed, please install.")
try:
stdout = subprocess.check_output(["git-lfs", "--version"]).decode("utf-8")
print(ANSI.gray(stdout.strip()))
except FileNotFoundError:
print(
ANSI.red(
"Looks like you do not have git-lfs installed, please install."
" You can install from https://git-lfs.github.com/."
" Then run `git lfs install` (you only have to do this once)."
)
)
print("")
user, _ = self._api.whoami(token)
namespace = self.args.organization if self.args.organization is not None else user
print("You are about to create {}".format(ANSI.bold(namespace + "/" + self.args.name)))
if not self.args.yes:
choice = input("Proceed? [Y/n] ").lower()
if not (choice == "" or choice == "y" or choice == "yes"):
print("Abort")
exit()
try:
url = self._api.create_repo(token, name=self.args.name, organization=self.args.organization)
except HTTPError as e:
print(e)
print(ANSI.red(e.response.text))
exit(1)
print("\nYour repo now lives at:")
print(" {}".format(ANSI.bold(url)))
print("\nYou can clone it locally with the command below," " and commit/push as usual.")
print(f"\n git clone {url}")
print("")
class DeprecatedUploadCommand(BaseUserCommand):
def run(self):
print(
ANSI.red(
"Deprecated: used to be the way to upload a model to S3."
" We now use a git-based system for storing models and other artifacts."
" Use the `repo create` command instead."
)
)
exit(1)
class UploadCommand(BaseUserCommand):
def walk_dir(self, rel_path):
"""