[FLAX] Minor fixes in CLM example (#12914)
* readme: fix retrieval of vocab size for flax clm example * examples: fix flax clm example when using training/evaluation files
This commit is contained in:
@@ -211,7 +211,7 @@ from transformers import GPT2Config
|
|||||||
|
|
||||||
model_dir = "./norwegian-gpt2" # ${MODEL_DIR}
|
model_dir = "./norwegian-gpt2" # ${MODEL_DIR}
|
||||||
|
|
||||||
config = GPT2Config.from_pretrained("gpt2", resid_pdrop=0.0, embd_pdrop=0.0, attn_pdrop=0.0, vocab_size=tokenizer.vocab_size)
|
config = GPT2Config.from_pretrained("gpt2", resid_pdrop=0.0, embd_pdrop=0.0, attn_pdrop=0.0, vocab_size=tokenizer.get_vocab_size())
|
||||||
config.save_pretrained(model_dir)
|
config.save_pretrained(model_dir)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -308,14 +308,14 @@ def main():
|
|||||||
extension = "text"
|
extension = "text"
|
||||||
dataset = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir)
|
dataset = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir)
|
||||||
|
|
||||||
if "validation" not in datasets.keys():
|
if "validation" not in dataset.keys():
|
||||||
datasets["validation"] = load_dataset(
|
dataset["validation"] = load_dataset(
|
||||||
extension,
|
extension,
|
||||||
data_files=data_files,
|
data_files=data_files,
|
||||||
split=f"train[:{data_args.validation_split_percentage}%]",
|
split=f"train[:{data_args.validation_split_percentage}%]",
|
||||||
cache_dir=model_args.cache_dir,
|
cache_dir=model_args.cache_dir,
|
||||||
)
|
)
|
||||||
datasets["train"] = load_dataset(
|
dataset["train"] = load_dataset(
|
||||||
extension,
|
extension,
|
||||||
data_files=data_files,
|
data_files=data_files,
|
||||||
split=f"train[{data_args.validation_split_percentage}%:]",
|
split=f"train[{data_args.validation_split_percentage}%:]",
|
||||||
|
|||||||
Reference in New Issue
Block a user