ymcui · ymcui · Jun 26, 2023 · Jun 25, 2023 · Jun 25, 2023 · Jun 25, 2023
diff --git a/data/README.md b/data/README.md
@@ -1,13 +1,13 @@
 # 数据 Data
 
-###  alpaca_data_zh_51k.json
+### alpaca_data_zh_51k.json
 
 中文Alpaca数据，包含51k个从ChatGPT (gpt-3.5-turbo)爬取的指令数据。
 
 Chinese Alpaca dataset, containing 51k instruction data crawled from ChatGPT (gpt-3.5-turbo).
 
-###  pt_sample_data.txt
+### pt_sample_data.txt
 
 CLM任务预训练样例数据
 
-Pre-training sample data
+Pre-training sample data
diff --git a/notebooks/README.md b/notebooks/README.md
@@ -1,14 +1,14 @@
 # 笔记本示例 Notebooks
 
-###  ceval_example_for_chinese_alpaca.ipynb
+### ceval_example_for_chinese_alpaca.ipynb
 
 利用Chinese Alpaca模型解码C-Eval数据集的示例。
 
 Example of decoding C-Eval dataset with Chinese Alpaca.
 
 建议查看Colab上的最新版 / Check latest notebook：<a href="https://colab.research.google.com/drive/12YewimRT7JuqJGOejxN7YG8jq2de4DnF?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
 
-###  convert_and_quantize_chinese_llama_and_alpaca.ipynb
+### convert_and_quantize_chinese_llama_and_alpaca.ipynb
 
 Colab上的转换和量化中文LLaMA/Alpaca（含Plus版本）的运行示例（仅供流程参考）。
 
@@ -40,8 +40,8 @@ Example of running the Gradio demo on Colab.
 
 在Colab中打开 / Open the notebook in Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ymcui/Chinese-LLaMA-Alpaca/blob/main/notebooks/gradio_web_demo.ipynb) 
 
-###  legacy/
+### legacy/
 
 旧版notebook，供参考，但不会再更新。
 
-Old notebook. Reference only, will not be updated.
+Old notebook. Reference only, will not be updated.
diff --git a/scripts/README.md b/scripts/README.md
@@ -1,6 +1,6 @@
 # 代码与脚本 Code and Scripts
 
-###  training/
+### training/
 
 预训练与指令精调代码，Wiki：
 
@@ -12,13 +12,13 @@ Pre-training and instruction finetuning code, Wiki:
 - Pre-training: https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Pretraining-Script
 - Instruction finetuning: https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/SFT-Script
 
-###  inference/
+### inference/
 
 使用🤗transformers进行推理，Wiki：[https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/使用Transformers推理](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/使用Transformers推理)
 
 Inference using 🤗transformers, Wiki: https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Inference-with-Transformers
 
-###  langchain/
+### langchain/
 
 使用LangChain进行检索式问答和文本摘要的示例，Wiki：[https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/与LangChain进行集成](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/与LangChain进行集成)
 
@@ -30,25 +30,25 @@ Using LangChain for Retrieval QA and Summarization, Wiki: https://github.com/ymc
 
 A server that implements OPENAI API using fastapi, Wiki: [https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/API-Calls](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/API-Calls)
 
-###  merge_tokenizer/
+### merge_tokenizer/
 
 中文词表扩充代码，Wiki: [https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/训练细节#准备工作词表扩充](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/训练细节#准备工作词表扩充)
 
 Code for extending Chinese vocabulary, Wiki: https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Training-Details#preparation-vocabulary-expansion
 
-###  merge_llama_with_chinese_lora.py
+### merge_llama_with_chinese_lora.py
 
 合并LLaMA/Alpaca LoRA脚本，Wiki: [https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/手动模型合并与转换](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/手动模型合并与转换)
 
 Script for merging LLaMA/Alpaca LoRA. Wiki: https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Manual-Conversion
 
-###  merge_llama_with_chinese_lora_low_mem.py
+### merge_llama_with_chinese_lora_low_mem.py
 
 （推荐）低资源版合并LLaMA/Alpaca LoRA脚本，Wiki: [https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/手动模型合并与转换](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/手动模型合并与转换)
 
 （recommended）Script for merging LLaMA/Alpaca LoRA (low-resource version). Wiki: https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Manual-Conversion
 
-###  crawl_prompt.py
+### crawl_prompt.py
 
 指令数据爬取脚本，Wiki：[https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/训练细节#训练数据](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/训练细节#训练数据)
 

diff --git a/scripts/ceval/eval.py b/scripts/ceval/eval.py
@@ -11,7 +11,6 @@
 choices = ["A", "B", "C", "D"]
 
 def main(args, evaluator,take):
-
     assert os.path.exists("subject_mapping.json"), "subject_mapping.json not found!"
     with open("subject_mapping.json") as f:
         subject_mapping = json.load(f)
@@ -77,8 +76,6 @@ def main(args, evaluator,take):
     json.dump(summary,open(save_result_dir+'/summary.json','w'),ensure_ascii=False,indent=2)
 
 
-
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model_path", type=str)
@@ -92,7 +89,6 @@ def main(args, evaluator,take):
     parser.add_argument("--do_save_csv", choices=["False","True"], default="False")
     parser.add_argument("--output_dir", type=str)
     parser.add_argument("--do_test", choices=["False","True"], default="False")
-
 
     args = parser.parse_args()
 

diff --git a/scripts/ceval/evaluator.py b/scripts/ceval/evaluator.py
@@ -1,6 +1,5 @@
 # This code is modified from C-Eval Project: https://github.com/SJTU-LIT/ceval
 
-import re
 import string
 class Evaluator:
     def __init__(self, choices, model_name, k=-1):
@@ -26,7 +25,7 @@ def generate_few_shot_prompt(self, subject, dev_df):
         for i in range(k):
             prompt += self.format_example(dev_df.iloc[i, :])
         return prompt
-    
+
     def eval_subject(self, subject_name, test_df, dev_df=None, few_shot=False, save_result_dir=None):
         pass
 

diff --git a/scripts/ceval/llama_evaluator.py b/scripts/ceval/llama_evaluator.py
@@ -42,13 +42,13 @@ def __init__(self, choices, k, model_path, device, temperature=0.2):
         self.D_id = self.tokenizer.encode("：D")[-1]
 
 
-    def eval_subject(self, subject_name, 
-            test_df, 
-            dev_df=None, 
-            few_shot=False, 
-            cot=False, 
-            save_result_dir=None, 
-            with_prompt=False, 
+    def eval_subject(self, subject_name,
+            test_df,
+            dev_df=None,
+            few_shot=False,
+            cot=False,
+            save_result_dir=None,
+            with_prompt=False,
             constrained_decoding=False,
             do_test=False):
         all_answers = {}
@@ -81,7 +81,7 @@ def eval_subject(self, subject_name,
 
             inputs = self.tokenizer(instruction, return_tensors="pt")
             generation_output = self.model.generate(
-                    input_ids = inputs["input_ids"].to(self.device), 
+                    input_ids = inputs["input_ids"].to(self.device),
                     attention_mask = inputs['attention_mask'].to(self.device),
                     eos_token_id=self.tokenizer.eos_token_id,
                     pad_token_id=self.tokenizer.pad_token_id,

diff --git a/scripts/crawl_prompt.py b/scripts/crawl_prompt.py
@@ -1,5 +1,4 @@
 import openai
-import json
 import sys
 import random
 
@@ -23,16 +22,17 @@ def return_random_prompt():
   system_prompt += "4. 除非特别要求，请使用中文，指令可以是命令句、疑问句、或其他合适的类型。\n"
   system_prompt += "5. 为指令生成一个适当且涉及真实情况的<input>，不应该只包含简单的占位符。<input>应提供实质性的内容，具有挑战性。字数不超过" + str(random.randint(80, 120)) + "字。\n"
   system_prompt += "6. <output>应该是对指令的适当且真实的回应，不能只回复答应或拒绝请求。如果需要额外信息才能回复时，请努力预测用户意图并尝试回复。<output>的内容应少于" + str(random.randint(128, 512)) + "字。\n\n"
-  
+
   system_prompt += "请给出满足条件的20条JSON格式数据：\n"
 
   return system_prompt
 
+
 if __name__ == "__main__":
   if len(sys.argv) != 2:
     print("Usage: python crawl_prompt.py <output_file>")
     exit(1)
-  
+
   output_file = open(sys.argv[1], 'w')
 
   MAX_EPOCHS = 1    # number of data to generate (each prompt contains 20 JSON-formatted data)

diff --git a/scripts/inference/gradio_demo.py b/scripts/inference/gradio_demo.py
@@ -43,7 +43,7 @@
 tokenizer = LlamaTokenizer.from_pretrained(args.tokenizer_path)
 
 base_model = LlamaForCausalLM.from_pretrained(
-    args.base_model, 
+    args.base_model,
     load_in_8bit=load_in_8bit,
     torch_dtype=load_type,
     low_cpu_mem_usage=True,

diff --git a/scripts/inference/inference_hf.py b/scripts/inference/inference_hf.py
@@ -60,7 +60,7 @@ def generate_prompt(instruction, input=None):
     tokenizer = LlamaTokenizer.from_pretrained(args.tokenizer_path)
 
     base_model = LlamaForCausalLM.from_pretrained(
-        args.base_model, 
+        args.base_model,
         load_in_8bit=False,
         torch_dtype=load_type,
         low_cpu_mem_usage=True,
@@ -116,7 +116,7 @@ def generate_prompt(instruction, input=None):
                     input_text = raw_input_text
                 inputs = tokenizer(input_text,return_tensors="pt")  #add_special_tokens=False ?
                 generation_output = model.generate(
-                    input_ids = inputs["input_ids"].to(device), 
+                    input_ids = inputs["input_ids"].to(device),
                     attention_mask = inputs['attention_mask'].to(device),
                     eos_token_id=tokenizer.eos_token_id,
                     pad_token_id=tokenizer.pad_token_id,
@@ -140,7 +140,7 @@ def generate_prompt(instruction, input=None):
                     input_text = example
                 inputs = tokenizer(input_text,return_tensors="pt")  #add_special_tokens=False ?
                 generation_output = model.generate(
-                    input_ids = inputs["input_ids"].to(device), 
+                    input_ids = inputs["input_ids"].to(device),
                     attention_mask = inputs['attention_mask'].to(device),
                     eos_token_id=tokenizer.eos_token_id,
                     pad_token_id=tokenizer.pad_token_id,

diff --git a/scripts/langchain/langchain_qa.py b/scripts/langchain/langchain_qa.py
@@ -59,7 +59,7 @@
         device = torch.device(0)
     else:
         device = torch.device('cpu')
-    
+
     loader = TextLoader(file_path)
     documents = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(
@@ -89,8 +89,8 @@
         chain_type_kwargs = {"prompt": PROMPT}
         qa = RetrievalQA.from_chain_type(
             llm=model,
-            chain_type="stuff", 
-            retriever=docsearch.as_retriever(search_kwargs={"k": 1}), 
+            chain_type="stuff",
+            retriever=docsearch.as_retriever(search_kwargs={"k": 1}),
             chain_type_kwargs=chain_type_kwargs)
 
     elif args.chain_type == "refine":
@@ -104,7 +104,7 @@
         )
         chain_type_kwargs = {"question_prompt": initial_qa_prompt, "refine_prompt": refine_prompt}
         qa = RetrievalQA.from_chain_type(
-            llm=model, chain_type="refine", 
+            llm=model, chain_type="refine",
             retriever=docsearch.as_retriever(search_kwargs={"k": 1}),
             chain_type_kwargs=chain_type_kwargs)
 

diff --git a/scripts/langchain/langchain_sum.py b/scripts/langchain/langchain_sum.py
@@ -15,12 +15,11 @@
 from langchain import HuggingFacePipeline
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.prompts import PromptTemplate
-from langchain.docstore.document import Document
 from langchain.chains.summarize import load_summarize_chain
 
-prompt_template = """Below is an instruction that describes a task. 
-                    Write a response that appropriately completes the request.\n\n
-                    ### Instruction:\n请为以下文字写一段摘要:\n{text}\n\n### Response: """
+prompt_template = ("Below is an instruction that describes a task. "
+                   "Write a response that appropriately completes the request.\n\n"
+                   "### Instruction:\n请为以下文字写一段摘要:\n{text}\n\n### Response: ")
 refine_template = (
     "Below is an instruction that describes a task."
     "Write a response that appropriately completes the request.\n\n"
@@ -41,7 +40,7 @@
         device = torch.device(0)
     else:
         device = torch.device('cpu')
-    
+
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100, length_function=len)
     with open(file_path) as f:
         text = f.read()

diff --git a/scripts/merge_llama_with_chinese_lora.py b/scripts/merge_llama_with_chinese_lora.py
@@ -322,7 +322,7 @@ def save_shards(model_sd, num_shards: int):
                     transpose(lora_model_sd[lora_b_key].float() @ lora_model_sd[lora_a_key].float(),fan_in_fan_out) * lora_scaling
                 )
                 assert base_model_sd[original_key].dtype == torch.float16
-    
+
         # did we do anything?
         assert not torch.allclose(first_weight_old, first_weight)
 

diff --git a/scripts/merge_llama_with_chinese_lora_low_mem.py b/scripts/merge_llama_with_chinese_lora_low_mem.py
@@ -22,7 +22,7 @@
                     type=str, help="Please specify a base model")
 parser.add_argument('--lora_model', default=None, required=True,
                     type=str, help="Please specify LoRA models to be merged (ordered); use commas to separate multiple LoRA models")
-parser.add_argument('--output_type', default='pth',choices=['pth','huggingface'], 
+parser.add_argument('--output_type', default='pth',choices=['pth','huggingface'],
                     type=str, help="Save the merged model in pth or huggingface format")
 parser.add_argument('--output_dir', default='./merged_model',
                     type=str, help="The output folder to save the merged model")
@@ -210,7 +210,7 @@ def merge_shards(output_dir, num_shards: int):
         shards_merged = {}
         for d in shards_dicts:
             shards_merged |= d
-    
+
         print(f"Saving the merged shard to " + os.path.join(output_dir, f"consolidated.0{i}.pth"))
         torch.save(shards_merged, os.path.join(output_dir, f"consolidated.0{i}.pth"))
 
@@ -305,7 +305,7 @@ def merge_shards(output_dir, num_shards: int):
                         print(f"merging {lora_key_A} and lora_B.weight form {tl_idx}-th LoRA weight to {k}")
                     state_dict[k] += (
                         transpose(
-                            t_and_l['state_dict'][lora_key_B].float() 
+                            t_and_l['state_dict'][lora_key_B].float()
                           @ t_and_l['state_dict'][lora_key_A].float(), t_and_l['fan_in_fan_out']) * t_and_l['scaling']
                     )
             weight_size = state_dict[k].numel() * dtype_byte_size(state_dict[k].dtype)

diff --git a/scripts/merge_tokenizer/merge_tokenizers.py b/scripts/merge_tokenizer/merge_tokenizers.py
@@ -62,6 +62,5 @@
 text='''白日依山尽，黄河入海流。欲穷千里目，更上一层楼。
 The primary use of LLaMA is research on large language models, including'''
 print("Test text:\n",text)
-print
 print(f"Tokenized by LLaMA tokenizer:{llama_tokenizer.tokenize(text)}")
 print(f"Tokenized by Chinese-LLaMA tokenizer:{chinese_llama_tokenizer.tokenize(text)}")
diff --git a/scripts/openai_server_demo/README.md b/scripts/openai_server_demo/README.md
@@ -116,7 +116,7 @@ json返回体：
 
 `top_k`: 在随机采样（random sampling）时，前top_k高概率的token将作为候选token被随机采样。
 
-`top_p`: 在随机采样（random sampling）时，累积概率超过top_p的token将作为候选token被随机采样，越低随机性越大，举个例子，当top_p设定为0.6时，概率前5的token概率分别为[0.23, 0.20, 0.18, 0.11, 0.10]时，前三个token的累积概率为0.61，那么第4个token将被过滤掉，只有前三的token将作为候选token被随机采样。
+`top_p`: 在随机采样（random sampling）时，累积概率超过top_p的token将作为候选token被随机采样，越低随机性越大，举个例子，当top_p设定为0.6时，概率前5的token概率分别为{0.23, 0.20, 0.18, 0.11, 0.10}时，前三个token的累积概率为0.61，那么第4个token将被过滤掉，只有前三的token将作为候选token被随机采样。
 
 `repetition_penalty`: 重复惩罚，具体细节可以参考这篇文章：<https://arxiv.org/pdf/1909.05858.pdf> 。
 

diff --git a/scripts/openai_server_demo/openai_api_server.py b/scripts/openai_server_demo/openai_api_server.py
@@ -1,4 +1,3 @@
-import pdb
 import argparse
 import os
 from fastapi import FastAPI
@@ -54,7 +53,7 @@
 tokenizer = LlamaTokenizer.from_pretrained(args.tokenizer_path)
 
 base_model = LlamaForCausalLM.from_pretrained(
-    args.base_model, 
+    args.base_model,
     load_in_8bit=load_in_8bit,
     torch_dtype=load_type,
     low_cpu_mem_usage=True,
@@ -121,7 +120,7 @@ def predict(
     type(input) == str -> /v1/completions
     type(input) == list -> /v1/chat/completions
     """
-    if type(input) == str:
+    if isinstance(input, str):
         prompt = generate_completion_prompt(input)
     else:
         prompt = generate_chat_prompt(input)
@@ -177,30 +176,28 @@ def get_embedding(input):
 async def create_chat_completion(request: ChatCompletionRequest):
     """Creates a completion for the chat message"""
     msgs = request.messages
-    if type(msgs) == str:
+    if isinstance(msgs, str):
         msgs = [ChatMessage(role='user',content=msgs)]
     else:
         msgs = [ChatMessage(role=x['role'],content=x['message']) for x in msgs]
     output = predict(
-        input=msgs, 
+        input=msgs,
         max_new_tokens=request.max_tokens,
         top_p=request.top_p,
         top_k=request.top_k,
         temperature=request.temperature,
         num_beams=request.num_beams,
         repetition_penalty=request.repetition_penalty,
     )
-    choices = [ChatCompletionResponseChoice(index = i, message = msg) 
-               for i, msg in enumerate(msgs)]
-    choices += [ChatCompletionResponseChoice(index = len(choices),
-                                              message = ChatMessage(role='assistant',content=output))]
+    choices = [ChatCompletionResponseChoice(index = i, message = msg) for i, msg in enumerate(msgs)]
+    choices += [ChatCompletionResponseChoice(index = len(choices), message = ChatMessage(role='assistant',content=output))]
     return ChatCompletionResponse(choices = choices)
 
 @app.post("/v1/completions")
 async def create_completion(request: CompletionRequest):
     """Creates a completion"""
     output = predict(
-        input=request.prompt, 
+        input=request.prompt,
         max_new_tokens=request.max_tokens,
         top_p=request.top_p,
         top_k=request.top_k,