From e90ee12d748c12c6f17186f31a9b49ed47a95c72 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Mon, 23 Dec 2024 00:12:27 -0800
Subject: [PATCH 01/18] Update run-docs to avoid duplicate code

Update run-docs to avoid duplicate code
---
 .ci/scripts/run-docs | 178 ++++++++++++++-----------------------------
 1 file changed, 56 insertions(+), 122 deletions(-)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index 6f5ee46c7..c6ff7f899 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -1,127 +1,61 @@
-# /bin/bash -x
+#!/bin/bash -x
 
-if [ "X$1" == "X" ]; then
+# Check if an argument was provided
+if [ -z "$1" ]; then
   echo "Must specify document to run"
   exit 1
 fi
 
-if [ "$1" == "readme" ]; then
-        echo "::group::Create script to run README"
-        python3 torchchat/utils/scripts/updown.py --create-sections --file README.md --replace 'llama3.1:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-readme.sh
-        echo "::endgroup::"
-
-        echo "::group::Run README"
-        echo "*******************************************"
-        cat ./run-readme.sh
-        echo "*******************************************"
-        bash -x ./run-readme.sh
-        echo "::endgroup::"
-
-        exit 0
-fi
-
-if [ "$1" == "quantization" ]; then
-        echo "::group::Create script to run quantization"
-        python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-quantization.sh
-        echo "::endgroup::"
-
-        echo "::group::Run quantization"
-        echo "*******************************************"
-        cat ./run-quantization.sh
-        echo "*******************************************"
-        bash -x ./run-quantization.sh
-        echo "::endgroup::"
-
-        exit 0
-fi
-
-if [ "$1" == "gguf" ]; then
-        echo "::group::Create script to run gguf"
-        python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-gguf.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-gguf.sh
-        echo "::endgroup::"
-
-        echo "::group::Run gguf"
-        echo "*******************************************"
-        cat ./run-gguf.sh
-        echo "*******************************************"
-        bash -x ./run-gguf.sh
-        echo "::endgroup::"
-fi
-
-
-if [ "$1" == "advanced" ]; then
-        echo "::group::Create script to run advanced"
-        python3 torchchat/utils/scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-advanced.sh
-        echo "::endgroup::"
-
-        echo "::group::Run advanced"
-        echo "*******************************************"
-        cat ./run-advanced.sh
-        echo "*******************************************"
-        bash -x ./run-advanced.sh
-        echo "::endgroup::"
-fi
-
-if [ "$1" == "evaluation" ]; then
-        echo "::group::Create script to run evaluation"
-        python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-evaluation.sh
-        echo "::endgroup::"
-
-        echo "::group::Run evaluation"
-        echo "*******************************************"
-        cat ./run-evaluation.sh
-        echo "*******************************************"
-        bash -x ./run-evaluation.sh
-fi
-
-if [ "$1" == "multimodal" ]; then
-
-   # Expecting that this might fail this test as-is, because 
-   # it's the first on-pr test depending on github secrets for access with HF token access
-
-        echo "::group::Create script to run multimodal"
-        python3 torchchat/utils/scripts/updown.py --file docs/multimodal.md > ./run-multimodal.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-multimodal.sh
-        echo "::endgroup::"
-
-        echo "::group::Run multimodal"
-        echo "*******************************************"
-        cat ./run-multimodal.sh
-        echo "*******************************************"
-        bash -x ./run-multimodal.sh
-        echo "::endgroup::"
-fi
-
-if [ "$1" == "native" ]; then
-
-        echo "::group::Create script to run native-execution"
-        python3 torchchat/utils/scripts/updown.py --file docs/native-execution.md > ./run-native.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-native.sh
-        echo "::endgroup::"
-
-        echo "::group::Run native-execution"
-        echo "*******************************************"
-        cat ./run-native.sh
-        echo "*******************************************"
-        bash -x ./run-native.sh
-        echo "::endgroup::"
-fi
+# Pre-initialize variables
+filepath=""
+parameters="--replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN"
+script_name="./run-${1}.sh"  # Dynamically initialize script name
+
+# Use a case statement to handle the $1 argument
+case "$1" in
+  "readme")
+    filepath="README.md"
+    ;;
+  "quantization")
+    filepath="docs/quantization.md"
+    ;;
+  "gguf")
+    filepath="docs/GGUF.md"
+    ;;
+  "advanced")
+    filepath="docs/ADVANCED-USERS.md"
+    ;;
+  "evaluation")
+    filepath="torchchat/utils/docs/evaluation.md"
+    ;;
+  "multimodal")
+    filepath="docs/multimodal.md"
+    parameters=""  # Clear parameters
+    ;;
+  "native")
+    filepath="docs/native-execution.md"
+    parameters=""  # Clear parameters
+    ;;
+  "distributed")
+    filepath="docs/distributed.md"
+    parameters=""  # Clear parameters
+    ;;
+  *)
+    echo "Unknown option: $1"
+    exit 1
+    ;;
+esac
+
+# Generate the script
+echo "::group::Create script to run $1"
+python3 torchchat/utils/scripts/updown.py --file "$filepath" $parameters > "$script_name"
+echo "exit 1" >> "$script_name"
+echo "::endgroup::"
+
+# Run the script
+echo "::group::Run $1"
+echo "*******************************************"
+cat "$script_name"
+echo "*******************************************"
+bash -x "$script_name"
+echo "::endgroup::"

From ddb3773b76151289f1031b0ffc39d6f40fafb237 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 24 Dec 2024 03:23:02 -0800
Subject: [PATCH 02/18] Update run-docs

Add back command explaining seemingly extraneous `echo exit 1`
---
 .ci/scripts/run-docs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index c6ff7f899..689ab5539 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -49,6 +49,7 @@ esac
 # Generate the script
 echo "::group::Create script to run $1"
 python3 torchchat/utils/scripts/updown.py --file "$filepath" $parameters > "$script_name"
+# if something happened to updown processor, and it did not error out, fail with an exit 1
 echo "exit 1" >> "$script_name"
 echo "::endgroup::"
 

From 9e82bbc9acfac94fae16b026cc5805ba447e3150 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Sat, 28 Dec 2024 23:13:38 -0800
Subject: [PATCH 03/18] Update build_native.sh

Update to C++11 ABI for AOTI, similar to ET
---
 torchchat/utils/scripts/build_native.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index 3c2c1c846..a935fa74c 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -93,7 +93,7 @@ popd
 if [[ "$TARGET" == "et" ]]; then
     cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
 else
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
 fi
 cmake --build ./cmake-out --target "${TARGET}"_run
 

From 6087a583a0b07f0351b3d1d91637cd35481a29b8 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 31 Dec 2024 18:47:09 -0800
Subject: [PATCH 04/18] Update run-docs

---
 .ci/scripts/run-docs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index 689ab5539..bb80b0ad9 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -40,6 +40,11 @@ case "$1" in
     filepath="docs/distributed.md"
     parameters=""  # Clear parameters
     ;;
+  "local")
+    filepath="docs/local-model.md"
+    parameters=""  # Clear parameters
+    ;;
+
   *)
     echo "Unknown option: $1"
     exit 1

From dcb2a608d52e95aa12113180ba12ffef0054e262 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Thu, 23 Jan 2025 15:51:35 -0800
Subject: [PATCH 05/18] Update run-docs

Update to run distributed inference test with open-llama instead of llama3.1
---
 .ci/scripts/run-docs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index fccddd6b3..cddbb0e46 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -38,7 +38,7 @@ case "$1" in
     ;;
   "distributed")
     filepath="docs/distributed.md"
-    parameters=""  # Clear parameters
+    parameters="--replace 'llama3.1:open-llama,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN"  # Use open-llama to avoid need for authentication
     ;;
   "local")
     filepath="docs/local-model.md"
@@ -64,4 +64,4 @@ echo "*******************************************"
 cat "$script_name"
 echo "*******************************************"
 bash -x "$script_name"
-echo "::endgroup::"
\ No newline at end of file
+echo "::endgroup::"

From adcb28a0b76fca0b84d9fce574f98083362a3a9e Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Thu, 23 Jan 2025 18:12:56 -0800
Subject: [PATCH 06/18] Update run-docs

Open-llama -> stories to avoid tokens.
---
 .ci/scripts/run-docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index cddbb0e46..d1d16ab04 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -38,7 +38,7 @@ case "$1" in
     ;;
   "distributed")
     filepath="docs/distributed.md"
-    parameters="--replace 'llama3.1:open-llama,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN"  # Use open-llama to avoid need for authentication
+    parameters="--replace 'llama3.1:stories110M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN"  # Use open-llama to avoid need for authentication
     ;;
   "local")
     filepath="docs/local-model.md"

From bd594fbcddad8f038b10716897fc4d91bc0c6bc3 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 10:32:31 -0800
Subject: [PATCH 07/18] Update README.md

Remove -l 3 since no longer necessary after Angea's change
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2448b0b72..04fb4789e 100644
--- a/README.md
+++ b/README.md
@@ -413,7 +413,7 @@ torchchat/utils/scripts/build_native.sh et
 
 Execute using the runner
 ```bash
-cmake-out/et_run llama3.1.pte -z `python3 torchchat.py where llama3.1`/tokenizer.model -l 3 -i "Once upon a time"
+cmake-out/et_run llama3.1.pte -z `python3 torchchat.py where llama3.1`/tokenizer.model -i "Once upon a time"
 ```
 
 </details>

From 1015de76e151b266a2c7b429f37f97f7c21d2aec Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 10:39:10 -0800
Subject: [PATCH 08/18] Update quantization.md

remove -l 3 from aoti run , and write -l3 for et_run
---
 docs/quantization.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/quantization.md b/docs/quantization.md
index 704a7ed6a..56fd2182e 100644
--- a/docs/quantization.md
+++ b/docs/quantization.md
@@ -182,7 +182,7 @@ OMP_NUM_THREADS=6 python3 torchchat.py generate llama3.1 --dso-path llama3_1.so
 If you built the AOTI runner with link_torchao_ops as discussed in the setup section, you can also use the C++ runner:
 
 ```
-OMP_NUM_THREADS=6 ./cmake-out/aoti_run llama3_1.so -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l 3 -i "Once upon a time,"
+OMP_NUM_THREADS=6 ./cmake-out/aoti_run llama3_1.so -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -i "Once upon a time," # -l 3
 ```
 
 #### ExecuTorch
@@ -193,7 +193,7 @@ python torchchat.py export llama3.1 --device cpu --dtype float32 --quantize '{"e
 Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file.  It will not work with the `python torchchat.py generate` command.
 
 ```
-./cmake-out/et_run llama3_1.pte -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l 3 -i "Once upon a time,"
+./cmake-out/et_run llama3_1.pte -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l3 -i "Once upon a time,"
 ```
 
 ## Experimental TorchAO MPS lowbit kernels

From 02dd5db4291fccf8ef87923b8ab1917ee2bedb5e Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 10:40:49 -0800
Subject: [PATCH 09/18] Update run-docs

-l 3:-l 2 -> -l3:-l2

after modifying the command lines.  Hopefull this is legal for et_run
---
 .ci/scripts/run-docs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index 33a0afd44..71f074cef 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -8,7 +8,7 @@ fi
 
 # Pre-initialize variables
 filepath=""
-parameters="--replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN"
+parameters="--replace 'llama3:stories15M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"
 script_name="./run-${1}.sh"  # Dynamically initialize script name
 
 # Use a case statement to handle the $1 argument
@@ -38,7 +38,7 @@ case "$1" in
     ;;
   "distributed")
     filepath="docs/distributed.md"
-    parameters="--replace 'llama3.1:stories110M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN"  # Use stories110M to avoid need for authentication
+    parameters="--replace 'llama3.1:stories110M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"  # Use stories110M to avoid need for authentication
     ;;
   "local")
     filepath="docs/local-model.md"

From da1b98d493bf21e0c092228b03c6c5add87cbfb0 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 11:27:50 -0800
Subject: [PATCH 10/18] Update run.cpp

Update to support non-space separated args
---
 runner/run.cpp | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/runner/run.cpp b/runner/run.cpp
index e5c818cfa..095b46f2b 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -803,41 +803,49 @@ int main(int argc, char *argv[]) {
   } else {
     error_usage();
   }
-  for (int i = 2; i < argc; i += 2) {
+  for (int i = 2; i < argc; i += 1) {
     // do some basic validation
-    if (i + 1 >= argc) {
-      error_usage();
-    } // must have arg after flag
+    char *parm = argv[i+1];
+
     if (argv[i][0] != '-') {
       error_usage();
     } // must start with dash
-    if (strlen(argv[i]) != 2) {
+
+    // uniarg means the arg comes right after the letter in accordance with posix
+    int uniarg = strlen(argv[i]) != 2; 
+    if (uniarg) {
+      parm=&argv[i][2];
+    } else if (i + 1 >= argc) {
       error_usage();
-    } // must be -x (one dash, one letter)
+    } // must have arg after option if flag is not contiguous to option
+    
     // read in the args
     if (argv[i][1] == 't') {
-      temperature = atof(argv[i + 1]);
+      temperature = atof(parm);
     } else if (argv[i][1] == 'p') {
-      topp = atof(argv[i + 1]);
+      topp = atof(parm);
     } else if (argv[i][1] == 's') {
-      rng_seed = atoi(argv[i + 1]);
+      rng_seed = atoi(parm);
     } else if (argv[i][1] == 'n') {
-      steps = atoi(argv[i + 1]);
+      steps = atoi(parm);
     } else if (argv[i][1] == 'v') {
       vocab_size = atoi(argv[i + 1]);
     } else if (argv[i][1] == 'i') {
-      prompt = argv[i + 1];
+      prompt = parm;
     } else if (argv[i][1] == 'z') {
-      tokenizer_path = argv[i + 1];
+      tokenizer_path = parm;
     } else if (argv[i][1] == 'm') {
-      mode = argv[i + 1];
+      mode = parm;
     } else if (argv[i][1] == 'y') {
-      system_prompt = argv[i + 1];
+      system_prompt = parm;
     } else if (argv[i][1] == 'l') {
-      llama_ver = atoi(argv[i + 1]);
+      llama_ver = atoi(parm);
     } else {
       error_usage();
     }
+
+    // account for parameter
+    i += (uniarg)?0:1;
   }
 
   if (model_path == NULL) {

From f3ee3e41c16aa10bbe38d102a8062b69f88788b0 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 11:30:14 -0800
Subject: [PATCH 11/18] Update run.cpp

typo
---
 runner/run.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/run.cpp b/runner/run.cpp
index 095b46f2b..2a591bf82 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -829,7 +829,7 @@ int main(int argc, char *argv[]) {
     } else if (argv[i][1] == 'n') {
       steps = atoi(parm);
     } else if (argv[i][1] == 'v') {
-      vocab_size = atoi(argv[i + 1]);
+      vocab_size = atoi(parm);
     } else if (argv[i][1] == 'i') {
       prompt = parm;
     } else if (argv[i][1] == 'z') {

From 5629e29ea40a8cb47bab851b4e2630cc843c916e Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 15:51:37 -0800
Subject: [PATCH 12/18] Create cuda-32.json

Add a gs=32 cuda.json for test runs with stories15M
---
 torchchat/quant_config/cuda-32.json | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 torchchat/quant_config/cuda-32.json

diff --git a/torchchat/quant_config/cuda-32.json b/torchchat/quant_config/cuda-32.json
new file mode 100644
index 000000000..90c37250a
--- /dev/null
+++ b/torchchat/quant_config/cuda-32.json
@@ -0,0 +1,5 @@
+{
+    "executor": {"accelerator": "cuda"},
+    "precision": {"dtype": "bf16"},
+    "linear:int4": {"groupsize" : 32}
+}

From 902a5dab1aa79c16dfa49cb81135198e75cfd4ae Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 15:52:21 -0800
Subject: [PATCH 13/18] Create mobile-32.json

add gs=32 variant of mobile for tests
---
 torchchat/quant_config/mobile-32.json | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 torchchat/quant_config/mobile-32.json

diff --git a/torchchat/quant_config/mobile-32.json b/torchchat/quant_config/mobile-32.json
new file mode 100644
index 000000000..3afaa7542
--- /dev/null
+++ b/torchchat/quant_config/mobile-32.json
@@ -0,0 +1,4 @@
+{
+    "embedding": {"bitwidth": 4, "groupsize" : 32},
+    "linear:a8w4dq": {"groupsize" : 32}
+}

From 0ac70964b75671ead499f3bafb2c78618ae00f02 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 15:53:46 -0800
Subject: [PATCH 14/18] Update run-docs

Use gs=32 variants with stories models
---
 .ci/scripts/run-docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index 71f074cef..cb199df9d 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -8,7 +8,7 @@ fi
 
 # Pre-initialize variables
 filepath=""
-parameters="--replace 'llama3:stories15M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"
+parameters="--replace 'llama3:stories15M,-l3:-l2,cuda.json:cuda-32.json,mobile.json:mobile-32.json' --suppress huggingface-cli,HF_TOKEN"
 script_name="./run-${1}.sh"  # Dynamically initialize script name
 
 # Use a case statement to handle the $1 argument

From 4d97e78cb1a6a3433bd3fdf19f3fe3d0186e8281 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Tue, 28 Jan 2025 15:54:52 -0800
Subject: [PATCH 15/18] Update run-docs

undo gs32
---
 .ci/scripts/run-docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
index cb199df9d..71f074cef 100755
--- a/.ci/scripts/run-docs
+++ b/.ci/scripts/run-docs
@@ -8,7 +8,7 @@ fi
 
 # Pre-initialize variables
 filepath=""
-parameters="--replace 'llama3:stories15M,-l3:-l2,cuda.json:cuda-32.json,mobile.json:mobile-32.json' --suppress huggingface-cli,HF_TOKEN"
+parameters="--replace 'llama3:stories15M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"
 script_name="./run-${1}.sh"  # Dynamically initialize script name
 
 # Use a case statement to handle the $1 argument

From c787e1a21d934fd77dff772cf8a8897317407146 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Wed, 29 Jan 2025 00:38:09 -0800
Subject: [PATCH 16/18] Update run-readme-pr-mps.yml

Extend timeout to avoid timeout of mps quantization test
---
 .github/workflows/run-readme-pr-mps.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml
index 4d5cd7e14..db16bc80e 100644
--- a/.github/workflows/run-readme-pr-mps.yml
+++ b/.github/workflows/run-readme-pr-mps.yml
@@ -15,8 +15,8 @@ jobs:
           conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp
           conda activate test-readme-mps-macos
           set -x
-          # NS: Remove previous installation  of torch first
-          # as this script does not isntall anything into conda env but rather as system dep
+          # NS: Remove previous installation of torch first
+          # as this script does not install anything into conda env but rather as system dep
           pip3 uninstall -y torch || true
           set -eou pipefail
 
@@ -37,6 +37,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-14
+      timeout: 60
       script: |
           set -x
           conda create -y -n test-quantization-mps-macos python=3.10.11

From 156cedaec30d01d4b97b699ad17c9440afaefd9b Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Wed, 29 Jan 2025 12:27:58 -0800
Subject: [PATCH 17/18] Update run.cpp

enforce that and argument must have at least length 2, and refine check for uniarg (ie arg plus flag value in one option) to be args with more than 2 characters
---
 runner/run.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/runner/run.cpp b/runner/run.cpp
index 2a591bf82..ea452550f 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -811,8 +811,12 @@ int main(int argc, char *argv[]) {
       error_usage();
     } // must start with dash
 
+    if strlen(argv[i]) < 2 {
+      error_usage();
+    } // must have at least dash '-' and option letter
+    
     // uniarg means the arg comes right after the letter in accordance with posix
-    int uniarg = strlen(argv[i]) != 2; 
+    int uniarg = strlen(argv[i]) > 2; 
     if (uniarg) {
       parm=&argv[i][2];
     } else if (i + 1 >= argc) {

From b77ddf3e48fcc66f284208a45a2198141408afd5 Mon Sep 17 00:00:00 2001
From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com>
Date: Thu, 30 Jan 2025 13:08:50 -0800
Subject: [PATCH 18/18] Update run.cpp

typos
---
 runner/run.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/runner/run.cpp b/runner/run.cpp
index ea452550f..d64c636bb 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -806,17 +806,17 @@ int main(int argc, char *argv[]) {
   for (int i = 2; i < argc; i += 1) {
     // do some basic validation
     char *parm = argv[i+1];
+    // uniarg means the arg comes right after the letter in accordance with posix
+    int uniarg = strlen(argv[i]) > 2; 
 
     if (argv[i][0] != '-') {
       error_usage();
     } // must start with dash
 
-    if strlen(argv[i]) < 2 {
+    if (strlen(argv[i]) < 2) {
       error_usage();
     } // must have at least dash '-' and option letter
     
-    // uniarg means the arg comes right after the letter in accordance with posix
-    int uniarg = strlen(argv[i]) > 2; 
     if (uniarg) {
       parm=&argv[i][2];
     } else if (i + 1 >= argc) {