From f4bf0dea46b607e80ad61dacef13bd37f7270a3a Mon Sep 17 00:00:00 2001 From: jncasey <31020859+jncasey@users.noreply.github.com> Date: Wed, 27 Jan 2021 03:48:18 -0500 Subject: [PATCH] Fix auto-resume training from checkpoint (#9822) * Fix auto-resume training from checkpoint * style fixes --- src/transformers/trainer_utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index 2f11cda193..aa371d4524 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -77,15 +77,19 @@ class TrainOutput(NamedTuple): PREFIX_CHECKPOINT_DIR = "checkpoint" -_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d)+$") +_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d+)$") def get_last_checkpoint(folder): content = os.listdir(folder) - checkpoints = [path for path in content if _re_checkpoint.search(path) is not None and os.path.isdir(path)] + checkpoints = [ + path + for path in content + if _re_checkpoint.search(path) is not None and os.path.isdir(os.path.join(folder, path)) + ] if len(checkpoints) == 0: return - return max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0])) + return os.path.join(folder, max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0]))) class EvaluationStrategy(ExplicitEnum):