Skip to content

Commit ca3becf

Browse files
committed
Edited the path resolve of tokenizer path
1 parent eacf7dd commit ca3becf

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

scripts/push_to_hf.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,11 @@ def find_saved_tokenizer(output_dir: Path) -> Path | None:
180180
if tokenizer_path.exists():
181181
return tokenizer_path
182182

183-
# If not found, return None
183+
# If not found, return default tokenizer
184+
known_default = Path("simple_stories_train/tokenizer/simplestories-tokenizer.json")
185+
if known_default.is_file():
186+
return known_default.resolve()
187+
184188
return None
185189

186190

@@ -280,7 +284,8 @@ def main() -> None:
280284
model_max_len = config.block_size
281285

282286
# Convert and upload tokenizer
283-
output_dir = args.checkpoint_path.parent
287+
# The models are stored inside checkpoints folder and tokenizer is saved outside
288+
output_dir = args.checkpoint_path.parent.parent
284289
convert_and_upload_tokenizer(
285290
repo_id=args.repo_id,
286291
token=args.token,

0 commit comments

Comments
 (0)