Llm large fixes (#1959)

pgmpablo157321 · web-flow · commit 21d845e67124 · 2024-12-04T12:04:46.000-06:00
* Fix llama3 multi-gpu issue

* Fix identation

* Remove llama2 outdated references

* Fix run_evaluation argument
diff --git a/language/llama3-405b/README.md b/language/llama3-405b/README.md
@@ -70,7 +70,7 @@ TODO: Host model and grant access to submitters
 + First go to [llama3-request-link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and make a request, sign in to HuggingFace (if you don't have account, you'll need to create one). **Please note your authentication credentials** as you may be required to provide them when cloning below.
 + Requires Git Large Files Storage
 ```
-export CHECKPOINT_PATH=${PWD}/Llama-2-70b-chat-hf
+export CHECKPOINT_PATH=Meta-Llama-3.1-405B-Instruct
 git lfs install
 git clone https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct ${CHECKPOINT_PATH}
 
diff --git a/language/llama3-405b/SUT_VLLM.py b/language/llama3-405b/SUT_VLLM.py
@@ -127,19 +127,18 @@ def process_queries(self):
                 pred_output_tokens,
                 query_id_list=query_ids,
             )
-
-        for i in range(len(qitem)):
-            n_tokens = processed_output[i].shape[0]
-            response_array = array.array(
-                "B", processed_output[i].tobytes())
-            bi = response_array.buffer_info()
-            response = [
-                lg.QuerySampleResponse(
-                    qitem[i].id,
-                    bi[0],
-                    bi[1],
-                    n_tokens)]
-            lg.QuerySamplesComplete(response)
+            for i in range(len(qitem)):
+                n_tokens = processed_output[i].shape[0]
+                response_array = array.array(
+                    "B", processed_output[i].tobytes())
+                bi = response_array.buffer_info()
+                response = [
+                    lg.QuerySampleResponse(
+                        qitem[i].id,
+                        bi[0],
+                        bi[1],
+                        n_tokens)]
+                lg.QuerySamplesComplete(response)
 
         tok = time.time()
 
diff --git a/language/llama3-405b/dataset.py b/language/llama3-405b/dataset.py
@@ -80,7 +80,7 @@ def postProcess(
         output_seq = out_tokens
         assert len(query_id_list) == len(output_seq)
 
-        return np.asarray(output_seq, dtype=np.int32)
+        return [np.asarray(out, dtype=np.int32) for out in output_seq]
 
     def LoadSamplesToRam(self, sample_list):
         pass
diff --git a/language/llama3-405b/evaluate-accuracy.py b/language/llama3-405b/evaluate-accuracy.py
@@ -182,7 +182,7 @@ def main():
 
     preds, targets = postprocess_text(preds_decoded_text, target_required)
 
-    result = run_evaluation(preds, targets, metrics)
+    result = run_evaluation(preds, targets, metrics_required)
     result = dict(result)
     prediction_lens = [len(pred) for pred in preds]
     gen_num = len(preds)
diff --git a/language/llama3-405b/main.py b/language/llama3-405b/main.py
@@ -46,7 +46,7 @@ def get_args():
     parser.add_argument(
         "--model-path",
         type=str,
-        default="meta-llama/Llama-2-70b-chat-hf",
+        default="Meta-Llama-3.1-405B-Instruct",
         help="Model name",
     )
     parser.add_argument("--dataset-path", type=str, default=None, help="")
@@ -110,7 +110,7 @@ def get_args():
     parser.add_argument(
         "--api-model-name",
         type=str,
-        default="meta-llama/Llama-2-70b-chat-hf",
+        default="Meta-Llama-3.1-405B-Instruct",
         help="Model name(specified in llm server)",
     )
     parser.add_argument(
diff --git a/language/llama3-405b/run_offline.sh b/language/llama3-405b/run_offline.sh
@@ -1,4 +1,4 @@
-CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}"
+CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}"
 DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"
 
 python -u main.py --scenario Offline \
diff --git a/language/llama3-405b/run_server.sh b/language/llama3-405b/run_server.sh
@@ -1,6 +1,6 @@
 
 
-CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}"
+CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}"
 DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"
 
 python -u main.py --scenario Server \

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}"`
	`1`	`+CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}"`
`2`	`2`	`DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"`
`3`	`3`
`4`	`4`	`python -u main.py --scenario Offline \`