|
| 1 | +from transformers import AutoTokenizer, PreTrainedTokenizerFast |
| 2 | +from http.server import HTTPServer, BaseHTTPRequestHandler |
| 3 | +import json |
| 4 | +import argparse |
| 5 | + |
| 6 | +class Tokenizer_Http(): |
| 7 | + |
| 8 | + def __init__(self, model_id): |
| 9 | + self.tokenizer = AutoTokenizer.from_pretrained(model_id) |
| 10 | + |
| 11 | + def encode(self, prompt, content): |
| 12 | + messages = [ |
| 13 | + {"role": "system", "content": content}, |
| 14 | + {"role": "user", "content": prompt} |
| 15 | + ] |
| 16 | + text = self.tokenizer.apply_chat_template( |
| 17 | + messages, |
| 18 | + tokenize=False, |
| 19 | + add_generation_prompt=True |
| 20 | + ) |
| 21 | + print(text) |
| 22 | + token_ids = self.tokenizer.encode(text) |
| 23 | + return token_ids |
| 24 | + |
| 25 | + def decode(self, token_ids): |
| 26 | + return self.tokenizer.decode(token_ids) |
| 27 | + |
| 28 | + @property |
| 29 | + def bos_id(self): |
| 30 | + return self.tokenizer.bos_token_id |
| 31 | + |
| 32 | + @property |
| 33 | + def eos_id(self): |
| 34 | + return self.tokenizer.eos_token_id |
| 35 | + |
| 36 | + @property |
| 37 | + def bos_token(self): |
| 38 | + return self.tokenizer.bos_token |
| 39 | + |
| 40 | + @property |
| 41 | + def eos_token(self): |
| 42 | + return self.tokenizer.eos_token |
| 43 | + |
| 44 | +class Request(BaseHTTPRequestHandler): |
| 45 | + #通过类继承,新定义类 |
| 46 | + timeout = 5 |
| 47 | + server_version = 'Apache' |
| 48 | + |
| 49 | + def do_GET(self): |
| 50 | + print(self.path) |
| 51 | + #在新类中定义get的内容(当客户端向该服务端使用get请求时,本服务端将如下运行) |
| 52 | + self.send_response(200) |
| 53 | + self.send_header("type", "get") #设置响应头,可省略或设置多个 |
| 54 | + self.end_headers() |
| 55 | + |
| 56 | + if self.path == '/bos_id': |
| 57 | + bos_id = tokenizer.bos_id |
| 58 | + # print(bos_id) |
| 59 | + # to json |
| 60 | + if bos_id is None: |
| 61 | + msg = json.dumps({'bos_id': -1}) |
| 62 | + else: |
| 63 | + msg = json.dumps({'bos_id': bos_id}) |
| 64 | + elif self.path == '/eos_id': |
| 65 | + eos_id = tokenizer.eos_id |
| 66 | + if eos_id is None: |
| 67 | + msg = json.dumps({'eos_id': -1}) |
| 68 | + else: |
| 69 | + msg = json.dumps({'eos_id': eos_id}) |
| 70 | + else: |
| 71 | + msg = 'error' |
| 72 | + |
| 73 | + print(msg) |
| 74 | + msg = str(msg).encode() #转为str再转为byte格式 |
| 75 | + |
| 76 | + self.wfile.write(msg) #将byte格式的信息返回给客户端 |
| 77 | + |
| 78 | + def do_POST(self): |
| 79 | + #在新类中定义post的内容(当客户端向该服务端使用post请求时,本服务端将如下运行) |
| 80 | + data = self.rfile.read(int( |
| 81 | + self.headers['content-length'])) #获取从客户端传入的参数(byte格式) |
| 82 | + data = data.decode() #将byte格式转为str格式 |
| 83 | + |
| 84 | + self.send_response(200) |
| 85 | + self.send_header("type", "post") #设置响应头,可省略或设置多个 |
| 86 | + self.end_headers() |
| 87 | + |
| 88 | + if self.path == '/encode': |
| 89 | + req = json.loads(data) |
| 90 | + prompt = req['text'] |
| 91 | + |
| 92 | + token_ids = tokenizer.encode(prompt, args.content) |
| 93 | + if token_ids is None: |
| 94 | + msg = json.dumps({'token_ids': -1}) |
| 95 | + else: |
| 96 | + msg = json.dumps({'token_ids': token_ids}) |
| 97 | + |
| 98 | + elif self.path == '/decode': |
| 99 | + req = json.loads(data) |
| 100 | + token_ids = req['token_ids'] |
| 101 | + text = tokenizer.decode(token_ids) |
| 102 | + if text is None: |
| 103 | + msg = json.dumps({'text': ""}) |
| 104 | + else: |
| 105 | + msg = json.dumps({'text': text}) |
| 106 | + else: |
| 107 | + msg = 'error' |
| 108 | + print(msg) |
| 109 | + msg = str(msg).encode() #转为str再转为byte格式 |
| 110 | + |
| 111 | + self.wfile.write(msg) #将byte格式的信息返回给客户端 |
| 112 | + |
| 113 | + |
| 114 | +if __name__ == "__main__": |
| 115 | + |
| 116 | + args = argparse.ArgumentParser() |
| 117 | + args.add_argument('--host', type=str, default='localhost') |
| 118 | + args.add_argument('--port', type=int, default=8080) |
| 119 | + args.add_argument('--model_id', type=str, default='qwen3_0.6B_tokenizer') |
| 120 | + args.add_argument('--content', type=str, default='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.') |
| 121 | + args = args.parse_args() |
| 122 | + |
| 123 | + tokenizer = Tokenizer_Http(args.model_id) |
| 124 | + |
| 125 | + # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token) |
| 126 | + # print(tokenizer.encode("hello world", args.content)) |
| 127 | + |
| 128 | + host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1' |
| 129 | + print('http://%s:%s' % host) |
| 130 | + server = HTTPServer(host, Request) #根据地址端口号和新定义的类,创建服务器实例 |
| 131 | + server.serve_forever() #开启服务 |
0 commit comments