File tree 4 files changed +38
-23
lines changed
4 files changed +38
-23
lines changed Original file line number Diff line number Diff line change 17
17
- " .github/workflows/*.yml"
18
18
19
19
jobs :
20
- check_code_quality :
20
+ tests :
21
21
runs-on : ubuntu-latest
22
22
steps :
23
23
- uses : actions/checkout@v4
34
34
- name : Check quality
35
35
run : |
36
36
make style && make quality
37
-
38
- pytest :
39
- needs : check_code_quality
40
- strategy :
41
- matrix :
42
- python-version :
43
- - " 3.8"
44
- os :
45
- - " ubuntu-latest"
46
- runs-on : ${{ matrix.os }}
47
- steps :
48
- - uses : actions/checkout@v4
49
- - name : Set up Python ${{ matrix.python-version }}
50
- uses : actions/setup-python@v5
51
- with :
52
- python-version : ${{ matrix.python-version }}
53
- cache : " pip"
54
- cache-dependency-path : " setup.py"
55
- - name : Install dependencies
56
- run : |
57
- python -m pip install --upgrade pip
58
- python -m pip install .[torch,dev]
59
37
- name : Test with pytest
60
38
run : |
61
39
make test
Original file line number Diff line number Diff line change @@ -214,6 +214,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
214
214
- [ Wikipedia (zh)] ( https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered )
215
215
- [ Pile (en)] ( https://huggingface.co/datasets/EleutherAI/pile )
216
216
- [ SkyPile (zh)] ( https://huggingface.co/datasets/Skywork/SkyPile-150B )
217
+ - [ FineWeb (en)] ( https://huggingface.co/datasets/HuggingFaceFW/fineweb )
218
+ - [ FineWeb-Edu (en)] ( https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu )
217
219
- [ The Stack (en)] ( https://huggingface.co/datasets/bigcode/the-stack )
218
220
- [ StarCoder (en)] ( https://huggingface.co/datasets/bigcode/starcoderdata )
219
221
@@ -273,6 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
273
275
<details ><summary >Preference datasets</summary >
274
276
275
277
- [ DPO mixed (en&zh)] ( https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k )
278
+ - [ UltraFeedback (en)] ( https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized )
276
279
- [ Orca DPO Pairs (en)] ( https://huggingface.co/datasets/Intel/orca_dpo_pairs )
277
280
- [ HH-RLHF (en)] ( https://huggingface.co/datasets/Anthropic/hh-rlhf )
278
281
- [ Nectar (en)] ( https://huggingface.co/datasets/berkeley-nest/Nectar )
Original file line number Diff line number Diff line change @@ -214,6 +214,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
214
214
- [ Wikipedia (zh)] ( https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered )
215
215
- [ Pile (en)] ( https://huggingface.co/datasets/EleutherAI/pile )
216
216
- [ SkyPile (zh)] ( https://huggingface.co/datasets/Skywork/SkyPile-150B )
217
+ - [ FineWeb (en)] ( https://huggingface.co/datasets/HuggingFaceFW/fineweb )
218
+ - [ FineWeb-Edu (en)] ( https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu )
217
219
- [ The Stack (en)] ( https://huggingface.co/datasets/bigcode/the-stack )
218
220
- [ StarCoder (en)] ( https://huggingface.co/datasets/bigcode/starcoderdata )
219
221
@@ -273,6 +275,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
273
275
<details ><summary >偏好数据集</summary >
274
276
275
277
- [ DPO mixed (en&zh)] ( https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k )
278
+ - [ UltraFeedback (en)] ( https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized )
276
279
- [ Orca DPO Pairs (en)] ( https://huggingface.co/datasets/Intel/orca_dpo_pairs )
277
280
- [ HH-RLHF (en)] ( https://huggingface.co/datasets/Anthropic/hh-rlhf )
278
281
- [ Nectar (en)] ( https://huggingface.co/datasets/berkeley-nest/Nectar )
Original file line number Diff line number Diff line change 391
391
"rejected" : " rejected"
392
392
}
393
393
},
394
+ "ultrafeedback" : {
395
+ "hf_hub_url" : " llamafactory/ultrafeedback_binarized" ,
396
+ "ms_hub_url" : " llamafactory/ultrafeedback_binarized" ,
397
+ "ranking" : true ,
398
+ "columns" : {
399
+ "prompt" : " instruction" ,
400
+ "chosen" : " chosen" ,
401
+ "rejected" : " rejected"
402
+ }
403
+ },
394
404
"orca_pairs" : {
395
405
"hf_hub_url" : " Intel/orca_dpo_pairs" ,
396
406
"ranking" : true ,
448
458
"assistant_tag" : " assistant"
449
459
}
450
460
},
461
+ "ultrafeedback_kto" : {
462
+ "hf_hub_url" : " argilla/ultrafeedback-binarized-preferences-cleaned-kto" ,
463
+ "ms_hub_url" : " AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto" ,
464
+ "columns" : {
465
+ "prompt" : " prompt" ,
466
+ "response" : " completion" ,
467
+ "kto_tag" : " label"
468
+ }
469
+ },
451
470
"wiki_demo" : {
452
471
"file_name" : " wiki_demo.txt" ,
453
472
"columns" : {
501
520
"prompt" : " text"
502
521
}
503
522
},
523
+ "fileweb" : {
524
+ "hf_hub_url" : " HuggingFaceFW/fineweb" ,
525
+ "columns" : {
526
+ "prompt" : " text"
527
+ }
528
+ },
529
+ "fileweb_edu" : {
530
+ "hf_hub_url" : " HuggingFaceFW/fineweb-edu" ,
531
+ "columns" : {
532
+ "prompt" : " text"
533
+ }
534
+ },
504
535
"the_stack" : {
505
536
"hf_hub_url" : " bigcode/the-stack" ,
506
537
"ms_hub_url" : " AI-ModelScope/the-stack" ,
You can’t perform that action at this time.
0 commit comments