1
+ import requests
2
+ import os
3
+
4
+ def model_on_modelscope (proj : str , fn : str ) -> dict :
5
+ url = f"https://modelscope.cn/api/v1/models/judd2024/{ proj } /repo?Revision=master&FilePath={ fn } "
6
+ return { 'fn' : fn , 'url' : url }
7
+
8
+ all_models = {
9
+ 'qwen2' : {
10
+ 'default' : '1.5b' ,
11
+ 'brief' : 'Qwen2 is a new series of large language models from Alibaba group.' ,
12
+ 'variants' : {
13
+ '7b' : {
14
+ 'default' : 'q8' ,
15
+ 'quantized' : {
16
+ 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-7b.bin' )
17
+ }
18
+ },
19
+ '1.5b' : {
20
+ 'default' : 'q8' ,
21
+ 'quantized' : {
22
+ 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-1.5b.bin' )
23
+ }
24
+ },
25
+ '0.5b' : {
26
+ 'default' : 'q8' ,
27
+ 'quantized' : {
28
+ 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-0.5b.bin' )
29
+ }
30
+ },
31
+ }
32
+ },
33
+ 'gemma' : {
34
+ 'default' : '2b' ,
35
+ 'brief' : 'Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.' ,
36
+ 'variants' : {
37
+ '2b' : {
38
+ 'default' : 'q8' ,
39
+ 'quantized' : {
40
+ 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'gemma-1.1-2b.bin' )
41
+ }
42
+ },
43
+ }
44
+ },
45
+ 'llama3' : {
46
+ 'default' : '8b' ,
47
+ 'brief' : 'Meta Llama 3: The most capable openly available LLM to date.' ,
48
+ 'variants' : {
49
+ '8b' : {
50
+ 'default' : 'q4_1' ,
51
+ 'quantized' : {
52
+ 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'llama3-8b-q4_1.bin' )
53
+ }
54
+ },
55
+ }
56
+ },
57
+ 'minicpm' : {
58
+ 'default' : '2b-sft' ,
59
+ 'brief' : 'Meta Llama 3: The most capable openly available LLM to date.' ,
60
+ 'variants' : {
61
+ '2b-sft' : {
62
+ 'default' : 'q8' ,
63
+ 'quantized' : {
64
+ 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'minicpm_sft_q8.bin' )
65
+ }
66
+ },
67
+ '2b-dpo' : {
68
+ 'default' : 'q4_1' ,
69
+ 'quantized' : {
70
+ 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'minicpm-dpo-q4_1.bin' )
71
+ }
72
+ },
73
+ }
74
+ },
75
+ 'qwen1.5' : {
76
+ 'default' : 'moe' ,
77
+ 'brief' : 'Qwen1.5 is the beta version of Qwen2 from Alibaba group.' ,
78
+ 'variants' : {
79
+ '1.8b' : {
80
+ 'default' : 'q8' ,
81
+ 'quantized' : {
82
+ 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'qwen1.5-1.8b.bin' )
83
+ }
84
+ },
85
+ 'moe' : {
86
+ 'default' : 'q4_1' ,
87
+ 'quantized' : {
88
+ 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'qwen1.5-moe-q4_1.bin' )
89
+ }
90
+ },
91
+ }
92
+ },
93
+ 'qanything' : {
94
+ 'default' : '7b' ,
95
+ 'brief' : 'QAnything is a local knowledge base question-answering system based on QwenLM.' ,
96
+ 'variants' : {
97
+ '7b' : {
98
+ 'default' : 'q4_1' ,
99
+ 'quantized' : {
100
+ 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'qwen-qany-7b-q4_1.bin' )
101
+ }
102
+ },
103
+ }
104
+ },
105
+ 'starling-lm' : {
106
+ 'default' : '7b' ,
107
+ 'brief' : 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.' ,
108
+ 'variants' : {
109
+ '7b' : {
110
+ 'default' : 'q4_1' ,
111
+ 'quantized' : {
112
+ 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'starling-7b-q4_1.bin' )
113
+ }
114
+ },
115
+ }
116
+ },
117
+ 'yi-1' : {
118
+ 'default' : '34b' ,
119
+ 'brief' : 'Yi (v1) is a high-performing, bilingual language model.' ,
120
+ 'variants' : {
121
+ '34b' : {
122
+ 'default' : 'q4_1' ,
123
+ 'quantized' : {
124
+ 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'yi-34b-q4.bin' )
125
+ }
126
+ },
127
+ }
128
+ },
129
+ }
130
+
131
+ def print_progress_bar (iteration , total , prefix = '' , suffix = '' , decimals = 1 , length = 60 , fill = '█' , printEnd = "\r " , auto_nl = True ):
132
+ percent = ("{0:." + str (decimals ) + "f}" ).format (100 * (iteration / float (total )))
133
+ filledLength = int (length * iteration // total )
134
+ bar = fill * filledLength + '-' * (length - filledLength )
135
+ print (f'\r { prefix } |{ bar } | { percent } % { suffix } ' , end = printEnd )
136
+ if (iteration == total ) and auto_nl :
137
+ print ()
138
+
139
+ def download_file (url : str , fn : str , prefix : str ):
140
+ flag = False
141
+ print (f"downloading { prefix } " )
142
+ with open (fn , 'wb' ) as f :
143
+ with requests .get (url , stream = True ) as r :
144
+ r .raise_for_status ()
145
+ total = int (r .headers .get ('content-length' , 0 ))
146
+
147
+ progress = 0
148
+
149
+ for chunk in r .iter_content (chunk_size = 8192 ):
150
+ progress += len (chunk )
151
+ f .write (chunk )
152
+ print_progress_bar (progress , total )
153
+
154
+ flag = progress == total
155
+ return flag
156
+
157
+ def show ():
158
+ def show_variants (info , default ):
159
+ sizes = [s for s in info .keys ()]
160
+ variants = [m + ":" + s for s in sizes ]
161
+ all_var = ', ' .join (variants )
162
+ print (f"Available: { all_var } " )
163
+ if len (variants ) > 1 :
164
+ print (f"Default : { m + ':' + default } " )
165
+
166
+ def show_model (m ):
167
+ info = all_models [m ]
168
+ print (f"**{ m } **: { info ['brief' ]} " )
169
+ show_variants (info ['variants' ], info ['default' ])
170
+ print ()
171
+
172
+ for m in all_models .keys ():
173
+ show_model (m )
174
+
175
+ def parse_model_id (model_id : str ):
176
+ parts = model_id .split (':' )
177
+ model = all_models [parts [0 ]]
178
+ variants = model ['variants' ]
179
+ var = variants [parts [1 ]] if len (parts ) >= 2 else variants ['default' ]
180
+ return var ['quantized' ][var ['default' ]]
181
+
182
+ def get_model (model_id , storage_dir ):
183
+ if not os .path .isdir (storage_dir ):
184
+ os .mkdir (storage_dir )
185
+ assert os .path .isdir (storage_dir ), f"{ storage_dir } is invalid"
186
+
187
+ info = parse_model_id (model_id )
188
+ fn = os .path .join (storage_dir , info ['fn' ])
189
+ if os .path .isfile (fn ):
190
+ return fn
191
+
192
+ assert download_file (info ['url' ], fn , model_id ), f"failed to download { model_id } "
193
+
194
+ return fn
195
+
196
+ def find_index (l : list , x ) -> int :
197
+ if x in l :
198
+ return l .index (x )
199
+ else :
200
+ return - 1
201
+
202
+ def preprocess_args (args : list [str ], storage_dir ) -> list [str ]:
203
+ i = find_index (args , '-m' )
204
+ if i < 0 :
205
+ i = find_index (args , '--model' )
206
+ if i < 0 :
207
+ return args
208
+ if args [i + 1 ].startswith (':' ):
209
+ args [i + 1 ] = get_model (args [i + 1 ][1 :], storage_dir )
210
+
211
+ return args
212
+
213
+ if __name__ == '__main__' :
214
+ show ()
0 commit comments