Skip to content

Commit 8bdacf8

Browse files
author
Judd
committed
downloader; quick start.
1 parent dc581d7 commit 8bdacf8

File tree

5 files changed

+267
-3
lines changed

5 files changed

+267
-3
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
Inference of a bunch of models from less than 1B to more than 300B, for real-time chatting with [RAG](./docs/rag.md) on your computer (CPU),
1010
pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [ggml](https://github.com/ggerganov/ggml).
1111

12-
| [Supported Models](./docs/models.md) | [Download Quantized Models](https://modelscope.cn/models/judd2024/chatllm_quantized_models) |
12+
| [Supported Models](./docs/models.md) | [Download Quantized Models](./docs/quick_start.md#download-quantized-models) |
1313

1414
**What's New:**
1515

@@ -44,6 +44,10 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
4444
* [x] [LoRA](./docs/models.md#lora-models);
4545
* [x] Python/JavaScript/C [Bindings](./docs/binding.md), web demo, and more possibilities.
4646

47+
## Quick Start
48+
49+
As simple as `python chatllm.py -i -m :model_id`. [Check it out](./docs/quick_start.md).
50+
4751
## Usage
4852

4953
### Preparation

README_zh.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
在计算机(CPU)上实时聊天,可 [检索增强生成](./docs/rag.md) 。支持从不到 1B 到超过 300B 的一系列模型的推理。基于 [@ggerganov](https://github.com/ggerganov)[ggml](https://github.com/ggerganov/ggml),纯 C++ 实现。
1010

11-
| [支持的模型](./docs/models.md) | [下载量化模型](https://modelscope.cn/models/judd2024/chatllm_quantized_models) |
11+
| [支持的模型](./docs/models.md) | [下载量化模型](./docs/quick_start.md#download-quantized-models) |
1212

1313
## 特点
1414

@@ -24,6 +24,10 @@
2424
- [x] LoRA
2525
- [x] Python/JavaScript/C [绑定](./docs/binding.md),网页演示,以及更多可能性。
2626

27+
## 快速开始
28+
29+
只需要简单一行 `python chatllm.py -i -m :model_id`. 查看 [详情](./docs/quick_start.md).
30+
2731
## 使用方法
2832

2933
#### 准备工作

bindings/chatllm.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
import threading
55
from typing import Any, Iterable, List, Union
66

7+
try:
8+
import model_downloader
9+
except:
10+
this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
11+
sys.path.append(os.path.join(this_dir, '..', 'scripts'))
12+
import model_downloader
713

814
class PrintType(IntEnum):
915
PRINT_CHAT_CHUNK = 0,
@@ -20,10 +26,13 @@ class LibChatLLM:
2026
_obj2id = {}
2127
_id2obj = {}
2228

23-
def __init__(self, lib: str = '') -> None:
29+
def __init__(self, lib: str = '', model_storage: str = '') -> None:
2430

2531
if lib == '':
2632
lib = os.path.dirname(os.path.abspath(sys.argv[0]))
33+
self._lib_path = lib
34+
self.model_storage = os.path.abspath(model_storage if model_storage != '' else os.path.join(lib, '..', 'quantized'))
35+
2736
lib = os.path.join(lib, 'libchatllm.')
2837
if sys.platform == 'win32':
2938
lib = lib + 'dll'
@@ -120,6 +129,9 @@ def alloc_id_for_obj(self, obj: Any) -> int:
120129
def append_param(self, obj: c_void_p, param: Union[str, List[str]]) -> None:
121130
if isinstance(param, str):
122131
param = [param]
132+
return
133+
134+
param = model_downloader.preprocess_args(param, self.model_storage)
123135
for s in param:
124136
self._chatllm_append_param(obj, c_char_p(s.encode()))
125137

docs/quick_start.md

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
## Quick Start
2+
3+
For Windows users, the easies way is to download a release, extract it, and start chatting:
4+
5+
```
6+
python chatllm.py -i -m :qwen2:0.5b
7+
downloading qwen2:0.5b
8+
|████████████████████████████████████████████████████████████| 100.0%
9+
________ __ __ __ __ ___ (通义千问)
10+
/ ____/ /_ ____ _/ /_/ / / / / |/ /_________ ____
11+
/ / / __ \/ __ `/ __/ / / / / /|_/ // ___/ __ \/ __ \
12+
/ /___/ / / / /_/ / /_/ /___/ /___/ / / // /__/ /_/ / /_/ /
13+
\____/_/ /_/\__,_/\__/_____/_____/_/ /_(_)___/ .___/ .___/
14+
You are served by QWen2, /_/ /_/
15+
with 494032768 (0.5B) parameters.
16+
17+
You > hi
18+
A.I. > Hello! How can I assist you today?
19+
You >
20+
```
21+
22+
For Linux/MacOS (and Windows) users, build [binding](binding.md) and start chatting.
23+
24+
### Download Quantized Models
25+
26+
A [script](../scripts/model_downloader.py) is provided, which can download some quantized models on demand.
27+
When a model name starting with `:` is given to `-m` option (as shown in above example), this script will
28+
treat it as a model ID and try to download it if the file does not exist.
29+
30+
Use `python model_downloader.py` to check all quantized models.

scripts/model_downloader.py

+214
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
import requests
2+
import os
3+
4+
def model_on_modelscope(proj: str, fn: str) -> dict:
5+
url = f"https://modelscope.cn/api/v1/models/judd2024/{proj}/repo?Revision=master&FilePath={fn}"
6+
return { 'fn': fn, 'url': url }
7+
8+
all_models = {
9+
'qwen2': {
10+
'default': '1.5b',
11+
'brief': 'Qwen2 is a new series of large language models from Alibaba group.',
12+
'variants': {
13+
'7b': {
14+
'default': 'q8',
15+
'quantized': {
16+
'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-7b.bin')
17+
}
18+
},
19+
'1.5b': {
20+
'default': 'q8',
21+
'quantized': {
22+
'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-1.5b.bin')
23+
}
24+
},
25+
'0.5b': {
26+
'default': 'q8',
27+
'quantized': {
28+
'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-0.5b.bin')
29+
}
30+
},
31+
}
32+
},
33+
'gemma': {
34+
'default': '2b',
35+
'brief': 'Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.',
36+
'variants': {
37+
'2b': {
38+
'default': 'q8',
39+
'quantized': {
40+
'q8': model_on_modelscope('chatllm_quantized_models', 'gemma-1.1-2b.bin')
41+
}
42+
},
43+
}
44+
},
45+
'llama3': {
46+
'default': '8b',
47+
'brief': 'Meta Llama 3: The most capable openly available LLM to date.',
48+
'variants': {
49+
'8b': {
50+
'default': 'q4_1',
51+
'quantized': {
52+
'q4_1': model_on_modelscope('chatllm_quantized_models', 'llama3-8b-q4_1.bin')
53+
}
54+
},
55+
}
56+
},
57+
'minicpm': {
58+
'default': '2b-sft',
59+
'brief': 'Meta Llama 3: The most capable openly available LLM to date.',
60+
'variants': {
61+
'2b-sft': {
62+
'default': 'q8',
63+
'quantized': {
64+
'q8': model_on_modelscope('chatllm_quantized_models', 'minicpm_sft_q8.bin')
65+
}
66+
},
67+
'2b-dpo': {
68+
'default': 'q4_1',
69+
'quantized': {
70+
'q4_1': model_on_modelscope('chatllm_quantized_models', 'minicpm-dpo-q4_1.bin')
71+
}
72+
},
73+
}
74+
},
75+
'qwen1.5': {
76+
'default': 'moe',
77+
'brief': 'Qwen1.5 is the beta version of Qwen2 from Alibaba group.',
78+
'variants': {
79+
'1.8b': {
80+
'default': 'q8',
81+
'quantized': {
82+
'q8': model_on_modelscope('chatllm_quantized_models', 'qwen1.5-1.8b.bin')
83+
}
84+
},
85+
'moe': {
86+
'default': 'q4_1',
87+
'quantized': {
88+
'q4_1': model_on_modelscope('chatllm_quantized_models', 'qwen1.5-moe-q4_1.bin')
89+
}
90+
},
91+
}
92+
},
93+
'qanything': {
94+
'default': '7b',
95+
'brief': 'QAnything is a local knowledge base question-answering system based on QwenLM.',
96+
'variants': {
97+
'7b': {
98+
'default': 'q4_1',
99+
'quantized': {
100+
'q4_1': model_on_modelscope('chatllm_quantized_models', 'qwen-qany-7b-q4_1.bin')
101+
}
102+
},
103+
}
104+
},
105+
'starling-lm': {
106+
'default': '7b',
107+
'brief': 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.',
108+
'variants': {
109+
'7b': {
110+
'default': 'q4_1',
111+
'quantized': {
112+
'q4_1': model_on_modelscope('chatllm_quantized_models', 'starling-7b-q4_1.bin')
113+
}
114+
},
115+
}
116+
},
117+
'yi-1': {
118+
'default': '34b',
119+
'brief': 'Yi (v1) is a high-performing, bilingual language model.',
120+
'variants': {
121+
'34b': {
122+
'default': 'q4_1',
123+
'quantized': {
124+
'q4_1': model_on_modelscope('chatllm_quantized_models', 'yi-34b-q4.bin')
125+
}
126+
},
127+
}
128+
},
129+
}
130+
131+
def print_progress_bar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 60, fill = '█', printEnd = "\r", auto_nl = True):
132+
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
133+
filledLength = int(length * iteration // total)
134+
bar = fill * filledLength + '-' * (length - filledLength)
135+
print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
136+
if (iteration == total) and auto_nl:
137+
print()
138+
139+
def download_file(url: str, fn: str, prefix: str):
140+
flag = False
141+
print(f"downloading {prefix}")
142+
with open(fn, 'wb') as f:
143+
with requests.get(url, stream=True) as r:
144+
r.raise_for_status()
145+
total = int(r.headers.get('content-length', 0))
146+
147+
progress = 0
148+
149+
for chunk in r.iter_content(chunk_size=8192):
150+
progress += len(chunk)
151+
f.write(chunk)
152+
print_progress_bar(progress, total)
153+
154+
flag = progress == total
155+
return flag
156+
157+
def show():
158+
def show_variants(info, default):
159+
sizes = [s for s in info.keys()]
160+
variants = [m + ":" + s for s in sizes]
161+
all_var = ', '.join(variants)
162+
print(f"Available: {all_var}")
163+
if len(variants) > 1:
164+
print(f"Default : {m + ':' + default}")
165+
166+
def show_model(m):
167+
info = all_models[m]
168+
print(f"**{m}**: {info['brief']}")
169+
show_variants(info['variants'], info['default'])
170+
print()
171+
172+
for m in all_models.keys():
173+
show_model(m)
174+
175+
def parse_model_id(model_id: str):
176+
parts = model_id.split(':')
177+
model = all_models[parts[0]]
178+
variants = model['variants']
179+
var = variants[parts[1]] if len(parts) >= 2 else variants['default']
180+
return var['quantized'][var['default']]
181+
182+
def get_model(model_id, storage_dir):
183+
if not os.path.isdir(storage_dir):
184+
os.mkdir(storage_dir)
185+
assert os.path.isdir(storage_dir), f"{storage_dir} is invalid"
186+
187+
info = parse_model_id(model_id)
188+
fn = os.path.join(storage_dir, info['fn'])
189+
if os.path.isfile(fn):
190+
return fn
191+
192+
assert download_file(info['url'], fn, model_id), f"failed to download {model_id}"
193+
194+
return fn
195+
196+
def find_index(l: list, x) -> int:
197+
if x in l:
198+
return l.index(x)
199+
else:
200+
return -1
201+
202+
def preprocess_args(args: list[str], storage_dir) -> list[str]:
203+
i = find_index(args, '-m')
204+
if i < 0:
205+
i = find_index(args, '--model')
206+
if i < 0:
207+
return args
208+
if args[i + 1].startswith(':'):
209+
args[i + 1] = get_model(args[i + 1][1:], storage_dir)
210+
211+
return args
212+
213+
if __name__ == '__main__':
214+
show()

0 commit comments

Comments
 (0)