onnx · danielholanda · Apr 3, 2025 · Apr 3, 2025
diff --git a/docs/lemonade/README.md b/docs/lemonade/README.md
@@ -83,12 +83,12 @@ To prompt your LLM, try one of the following:
 
 OGA iGPU:
 ```bash
-    lemonade -i microsoft/Phi-3-mini-4k-instruct oga-load --device igpu --dtype int4 llm-prompt -p "Hello, my thoughts are"
+    lemonade -i microsoft/Phi-3-mini-4k-instruct oga-load --device igpu --dtype int4 llm-prompt -p "Hello, my thoughts are" -t
 ```
 
 Hugging Face:
 ```bash
-    lemonade -i facebook/opt-125m huggingface-load llm-prompt -p "Hello, my thoughts are"
+    lemonade -i facebook/opt-125m huggingface-load llm-prompt -p "Hello, my thoughts are" -t
 ```
 
 The LLM will run with your provided prompt, and the LLM's response to your prompt will be printed to the screen. You can replace the `"Hello, my thoughts are"` with any prompt you like.
@@ -97,6 +97,9 @@ You can also replace the `facebook/opt-125m` with any Hugging Face checkpoint yo
 
 You can also set the `--device` argument in `oga-load` and `huggingface-load` to load your LLM on a different device.
 
+The `-t` (or `--template`) flag instructs lemonade to insert the prompt string into the model's chat template.
+This typically results in the model returning a higher quality response.
+
 Run `lemonade huggingface-load -h` and `lemonade llm-prompt -h` to learn more about these tools.
 
 ## Accuracy

diff --git a/examples/lemonade/server/continue.md b/examples/lemonade/server/continue.md
@@ -31,22 +31,23 @@ This will add a Continue tab to your VS Code Activity Bar.
 > Note: The following instructions are based on instructions from Continue found [here](https://docs.continue.dev/customize/model-providers/openai#openai-compatible-servers--apis) 
 
 1. Open the Continue tab in your VS Code Activity Bar.
-1. Click the gear icon at the top to open Settings.
-1. Under "Configuration", click "Open Config File".
-1. Replace the "models" key in the `config.json` with the following and save:
-
-```json
-  "models": [
-    {
-      "title": "Lemonade", 
-      "provider": "openai",
-      "model": "Qwen-1.5-7B-Chat-Hybrid",
-      "apiKey": "-",
-      "apiBase": "http://localhost:8000/api/v0"
-    }
-  ],
+1. Click the chat box. Some buttons will appear at the bottom of the box, including `Select model`.
+1. Click `Select model`, then `+ Add Chat model` to open the new model dialog box.
+1. Click the `config file` link at the very bottom of the dialog to open `config.yaml`.
+1. Replace the "models" key in the `config.yaml` with the following and save:
+
+```yaml
+models:
+  - name: Lemonade
+    provider: openai
+    model: Qwen-1.5-7B-Chat-Hybrid 
+    apiBase: http://localhost:8000/api/v0
+    apiKey: none
 ```
 
+6. Close the dialog box.
+7. Click the chat box again. You should see `Lemonade` where you used to see `Select model`. Ready!
+
 ## Usage
 
 > Note: see the Continue [user guide](https://docs.continue.dev/) to learn about all of their features.

diff --git a/installer/Installer.nsi b/installer/Installer.nsi
@@ -16,7 +16,6 @@ OutFile "Lemonade_Server_Installer.exe"
 Var LogHandle
 
 Var LEMONADE_SERVER_STRING
-Var LEMONADE_CONDA_ENV
 Var HYBRID_SELECTED
 Var HYBRID_CLI_OPTION
 
@@ -54,9 +53,6 @@ SectionIn RO ; Read only, always installed
 
   remove_dir:
     ; Try to remove directory and verify it was successful
-
-    ; Attempt conda remove of the env, to help speed things up
-    ExecWait 'conda env remove -yp "$INSTDIR\$LEMONADE_CONDA_ENV"'
 
     ; Delete all remaining files
     RMDir /r "$INSTDIR"
@@ -103,97 +99,34 @@ SectionIn RO ; Read only, always installed
 
     DetailPrint "- Packaged repo"
 
-    ; Check if conda is available
-    ExecWait 'where conda' $2
-    DetailPrint "- Checked if conda is available"
-
-    ; If conda is not found, show a message
-    ; Otherwise, continue with the installation
-    StrCmp $2 "0" create_env conda_not_available
-
-    conda_not_available:
-      DetailPrint "- Conda not installed."
-      ${IfNot} ${Silent}
-        MessageBox MB_YESNO "Conda is not installed. Would you like to install Miniconda?" IDYES install_miniconda IDNO exit_installer
-      ${Else}
-        Goto install_miniconda
-      ${EndIf}
-
-    exit_installer:
-      DetailPrint "- Something went wrong. Exiting installer"
-      Quit
-
-    install_miniconda:
-      DetailPrint "-------------"
-      DetailPrint "- Miniconda -"
-      DetailPrint "-------------"
-      DetailPrint "- Downloading Miniconda installer..."
-      ExecWait 'curl -s -o "$TEMP\Miniconda3-latest-Windows-x86_64.exe" "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"'
-
-      ; Install Miniconda silently
-      ExecWait '"$TEMP\Miniconda3-latest-Windows-x86_64.exe" /InstallationType=JustMe /AddToPath=1 /RegisterPython=0 /S /D=$PROFILE\miniconda3' $2
-      ; Check if Miniconda installation was successful
-      ${If} $2 == 0
-        DetailPrint "- Miniconda installation successful"
-        ${IfNot} ${Silent}
-          MessageBox MB_OK "Miniconda has been successfully installed."
-        ${EndIf}
-
-        StrCpy $R1 "$PROFILE\miniconda3\Scripts\conda.exe"
-        Goto create_env
-
-      ${Else}
-        DetailPrint "- Miniconda installation failed"
-        ${IfNot} ${Silent}
-          MessageBox MB_OK "Error: Miniconda installation failed. Installation will be aborted."
-        ${EndIf}
-        Goto exit_installer
-      ${EndIf}
-
-    create_env:
-      DetailPrint "---------------------"
-      DetailPrint "- Conda Environment -"
-      DetailPrint "---------------------"
-
-      DetailPrint "- Initializing conda..."
-      ; Use the appropriate conda executable
-      ${If} $R1 == ""
-        StrCpy $R1 "conda"
-      ${EndIf}
-      ; Initialize conda (needed for systems where conda was previously installed but not initialized)
-      nsExec::ExecToStack '"$R1" init'
-
-      DetailPrint "- Creating a Python 3.10 environment named '$LEMONADE_CONDA_ENV' in the installation directory: $INSTDIR..."
-      ExecWait '"$R1" create -p "$INSTDIR\$LEMONADE_CONDA_ENV" python=3.10 -y' $R0
-
-      ; Check if the environment creation was successful (exit code should be 0)
-      StrCmp $R0 0 install_lemonade env_creation_failed
-
-    env_creation_failed:
-      DetailPrint "- ERROR: Environment creation failed"
-      ; Display an error message and exit
-      ${IfNot} ${Silent}
-        MessageBox MB_OK "ERROR: Failed to create the Python environment. Installation will be aborted."
-      ${EndIf}
-      Quit
+    DetailPrint "Set up Python"
+    CreateDirectory "$INSTDIR\python"
+    ExecWait 'curl -s -o "$INSTDIR\python\python.zip" "https://www.python.org/ftp/python/3.10.9/python-3.10.9-embed-amd64.zip"'
+    ExecWait 'tar -xf "$INSTDIR\python\python.zip" -C "$INSTDIR\python"'
+    ExecWait 'curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py'
+    ExecWait '$INSTDIR\python\python.exe get-pip.py --no-warn-script-location'
+
+    FileOpen $2 "$INSTDIR\python\python310._pth" a
+    FileSeek $2 0 END
+    FileWrite $2 "$\r$\nLib$\r$\n"
+    FileWrite $2 "$\r$\nLib\site-packages$\r$\n"
+    FileClose $2
 
-    install_lemonade:
-      DetailPrint "-------------------------"
-      DetailPrint "- Lemonade Installation -"
-      DetailPrint "-------------------------"
+    DetailPrint "-------------------------"
+    DetailPrint "- Lemonade Installation -"
+    DetailPrint "-------------------------"
 
 
-      DetailPrint "- Installing $LEMONADE_SERVER_STRING..."
-      ${If} $HYBRID_SELECTED == "true"
-        nsExec::ExecToLog '"$INSTDIR\$LEMONADE_CONDA_ENV\python.exe" -m pip install -e "$INSTDIR"[llm-oga-hybrid] --no-warn-script-location'
-      ${Else}
-        nsExec::ExecToLog '"$INSTDIR\$LEMONADE_CONDA_ENV\python.exe" -m pip install -e "$INSTDIR"[llm] --no-warn-script-location'
-      ${EndIf}
-      Pop $R0  ; Return value
-      DetailPrint "- $LEMONADE_SERVER_STRING install return code: $R0"
+    DetailPrint "- Installing $LEMONADE_SERVER_STRING..."
+    ${If} $HYBRID_SELECTED == "true"
+      ExecWait '"$INSTDIR\python\python.exe" -m pip install "$INSTDIR"[llm-oga-hybrid] --no-warn-script-location' $8
+    ${Else}
+      ExecWait '"$INSTDIR\python\python.exe" -m pip install "$INSTDIR"[llm] --no-warn-script-location' $8
+    ${EndIf}
+    DetailPrint "- $LEMONADE_SERVER_STRING install return code: $8"
 
-      ; Check if installation was successful (exit code should be 0)
-      StrCmp $R0 0 install_success install_failed
+    ; Check if installation was successful (exit code should be 0)
+    StrCmp $8 0 install_success install_failed
 
     install_success:
       DetailPrint "- $LEMONADE_SERVER_STRING installation successful"
@@ -233,7 +166,7 @@ Section "Install Ryzen AI Hybrid Execution" HybridSec
   ; Once we're done downloading and installing the archive the size comes out to about 370MB
   AddSize 388882
 
-  nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --ryzenai hybrid -y'
+  nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --ryzenai hybrid -y'
 
   Pop $R0  ; Return value
   DetailPrint "Hybrid execution mode install return code: $R0"
@@ -299,20 +232,19 @@ SubSection /e "Selected Models" ModelsSec
         ${GetParameters} $CMDLINE
         ${GetOptions} $CMDLINE "/Models=" $R0
         ${If} $R0 != ""
-            nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --models $R0'
+            nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --models $R0'
         ${Else}
             ; Otherwise, only the default CPU model will be installed
-            nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --models Qwen2.5-0.5B-Instruct-CPU'
+            nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --models Qwen2.5-0.5B-Instruct-CPU'
         ${EndIf}
     ${Else}
-        nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --models $9'
+        nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --models $9'
     ${EndIf}
   SectionEnd
 
 SubSectionEnd
 
 Section "-Add Desktop Shortcut" ShortcutSec  
-  ; Create a desktop shortcut that passes the conda environment name as a parameter
   CreateShortcut "$DESKTOP\lemonade-server.lnk" "$INSTDIR\bin\lemonade-server.bat" "serve --keep-alive" "$INSTDIR\img\favicon.ico"
 
 SectionEnd
@@ -458,7 +390,7 @@ LangString MUI_TEXT_ABORT_SUBTITLE "${LANG_ENGLISH}" "Installation has been abor
 LangString MUI_BUTTONTEXT_FINISH "${LANG_ENGLISH}" "Finish"
 LangString MUI_TEXT_LICENSE_TITLE ${LANG_ENGLISH} "AMD License Agreement"
 LangString MUI_TEXT_LICENSE_SUBTITLE ${LANG_ENGLISH} "Please review the license terms before installing AMD Ryzen AI Hybrid Execution Mode."
-LangString DESC_SEC01 ${LANG_ENGLISH} "The minimum set of dependencies for a lemonade server that runs LLMs on CPU."
+LangString DESC_SEC01 ${LANG_ENGLISH} "The minimum set of dependencies for a lemonade server that runs LLMs on CPU (includes Python)."
 LangString DESC_HybridSec ${LANG_ENGLISH} "Add support for running LLMs on Ryzen AI hybrid execution mode. Only available on Ryzen AI 300-series processors."
 LangString DESC_ModelsSec ${LANG_ENGLISH} "Select which models to install"
 LangString DESC_Qwen05Sec ${LANG_ENGLISH} "Small CPU-only Qwen model"
@@ -485,7 +417,6 @@ LangString DESC_DeepSeekQwen7BSec ${LANG_ENGLISH} "7B parameter DeepSeek Qwen mo
 
 Function .onInit
   StrCpy $LEMONADE_SERVER_STRING "Lemonade Server"
-  StrCpy $LEMONADE_CONDA_ENV "lemon_env"
   StrCpy $HYBRID_SELECTED "true"
 
   ; Create a variable to store selected models

diff --git a/installer/lemonade-server.bat b/installer/lemonade-server.bat
@@ -1,6 +1,5 @@
 @echo off
 setlocal enabledelayedexpansion
-set CONDA_ENV=lemon_env
 
 REM --keep-alive is only used by the bash script to make sure that, if the server fails to open, we don't close the terminal right away.
 REM Check for --keep-alive argument and remove it from arguments passed to CLI
@@ -18,7 +17,7 @@ REM Change to parent directory where conda env and bin folders are located
 pushd "%~dp0.."
 
 REM Run the Python CLI script through conda, passing filtered arguments
-call conda run --no-capture-output -p "%CD%\%CONDA_ENV%" lemonade-server-dev !ARGS!
+call "%CD%\python\Scripts\lemonade-server-dev" !ARGS!
 popd
 
 REM Error handling: Show message and pause if --keep-alive was specified

diff --git a/installer/run_server.bat b/installer/run_server.bat
diff --git a/src/lemonade/cache.py b/src/lemonade/cache.py
@@ -68,6 +68,7 @@ class Keys:
     DTYPE = "dtype"
     PROMPT = "prompt"
     PROMPT_TOKENS = "prompt_tokens"
+    PROMPT_TEMPLATE = "prompt_template"
     RESPONSE = "response"
     RESPONSE_TOKENS = "response_tokens"
     RESPONSE_LENGTHS_HISTOGRAM = "response_lengths_histogram"

diff --git a/src/lemonade/tools/ort_genai/oga.py b/src/lemonade/tools/ort_genai/oga.py
@@ -330,6 +330,14 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             help="Download the model if needed, but don't load it",
         )
 
+        parser.add_argument(
+            "--trust-remote-code",
+            action="store_true",
+            help="Set this flag to use models whose code is on the Hugging Face hub rather "
+            "than natively in the OnnxRuntime Gen AI libraries.  Please review the model code "
+            "in advance as this is a security risk.",
+        )
+
         parser.add_argument(
             "--subfolder",
             default=None,
@@ -547,15 +555,28 @@ def _setup_npu_environment():
         return saved_state
 
     @staticmethod
-    def _load_model_and_setup_state(state, full_model_path, checkpoint):
+    def _load_model_and_setup_state(
+        state, full_model_path, checkpoint, trust_remote_code
+    ):
         """
         Loads the OGA model from local folder and then loads the tokenizer.
         """
         state.model = OrtGenaiModel(full_model_path)
 
-        hf_tokenizer = AutoTokenizer.from_pretrained(
-            full_model_path, local_files_only=True
-        )
+        try:
+            hf_tokenizer = AutoTokenizer.from_pretrained(
+                full_model_path,
+                local_files_only=True,
+                trust_remote_code=trust_remote_code,
+            )
+        except ValueError as e:
+            if "trust_remote_code" in str(e):
+                raise ValueError(
+                    "This model requires you to execute code from the repo.  Please review it "
+                    "and if you trust it, then use the `--trust-remote-code` flag with oga-load."
+                )
+            raise
+
         state.tokenizer = OrtGenaiTokenizer(
             state.model.model,
             hf_tokenizer,
@@ -582,6 +603,7 @@ def run(
         int4_block_size: int = None,
         force: bool = False,
         download_only: bool = False,
+        trust_remote_code=False,
         subfolder: str = None,
     ) -> State:
         state.device = device
@@ -671,7 +693,9 @@ def run(
                         "0" if "phi-" in checkpoint.lower() else "1"
                     )
 
-                self._load_model_and_setup_state(state, full_model_path, checkpoint)
+                self._load_model_and_setup_state(
+                    state, full_model_path, checkpoint, trust_remote_code
+                )
             finally:
                 self._cleanup_environment(saved_env_state)