Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docs/lemonade/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ To prompt your LLM, try one of the following:

OGA iGPU:
```bash
lemonade -i microsoft/Phi-3-mini-4k-instruct oga-load --device igpu --dtype int4 llm-prompt -p "Hello, my thoughts are"
lemonade -i microsoft/Phi-3-mini-4k-instruct oga-load --device igpu --dtype int4 llm-prompt -p "Hello, my thoughts are" -t
```

Hugging Face:
```bash
lemonade -i facebook/opt-125m huggingface-load llm-prompt -p "Hello, my thoughts are"
lemonade -i facebook/opt-125m huggingface-load llm-prompt -p "Hello, my thoughts are" -t
```

The LLM will run with your provided prompt, and the LLM's response to your prompt will be printed to the screen. You can replace the `"Hello, my thoughts are"` with any prompt you like.
Expand All @@ -97,6 +97,9 @@ You can also replace the `facebook/opt-125m` with any Hugging Face checkpoint yo

You can also set the `--device` argument in `oga-load` and `huggingface-load` to load your LLM on a different device.

The `-t` (or `--template`) flag instructs lemonade to insert the prompt string into the model's chat template.
This typically results in the model returning a higher quality response.

Run `lemonade huggingface-load -h` and `lemonade llm-prompt -h` to learn more about these tools.

## Accuracy
Expand Down
29 changes: 15 additions & 14 deletions examples/lemonade/server/continue.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,23 @@ This will add a Continue tab to your VS Code Activity Bar.
> Note: The following instructions are based on instructions from Continue found [here](https://docs.continue.dev/customize/model-providers/openai#openai-compatible-servers--apis)

1. Open the Continue tab in your VS Code Activity Bar.
1. Click the gear icon at the top to open Settings.
1. Under "Configuration", click "Open Config File".
1. Replace the "models" key in the `config.json` with the following and save:

```json
"models": [
{
"title": "Lemonade",
"provider": "openai",
"model": "Qwen-1.5-7B-Chat-Hybrid",
"apiKey": "-",
"apiBase": "http://localhost:8000/api/v0"
}
],
1. Click the chat box. Some buttons will appear at the bottom of the box, including `Select model`.
1. Click `Select model`, then `+ Add Chat model` to open the new model dialog box.
1. Click the `config file` link at the very bottom of the dialog to open `config.yaml`.
1. Replace the "models" key in the `config.yaml` with the following and save:

```yaml
models:
- name: Lemonade
provider: openai
model: Qwen-1.5-7B-Chat-Hybrid
apiBase: http://localhost:8000/api/v0
apiKey: none
```

6. Close the dialog box.
7. Click the chat box again. You should see `Lemonade` where you used to see `Select model`. Ready!

## Usage

> Note: see the Continue [user guide](https://docs.continue.dev/) to learn about all of their features.
Expand Down
127 changes: 29 additions & 98 deletions installer/Installer.nsi
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ OutFile "Lemonade_Server_Installer.exe"
Var LogHandle

Var LEMONADE_SERVER_STRING
Var LEMONADE_CONDA_ENV
Var HYBRID_SELECTED
Var HYBRID_CLI_OPTION

Expand Down Expand Up @@ -54,9 +53,6 @@ SectionIn RO ; Read only, always installed

remove_dir:
; Try to remove directory and verify it was successful

; Attempt conda remove of the env, to help speed things up
ExecWait 'conda env remove -yp "$INSTDIR\$LEMONADE_CONDA_ENV"'

; Delete all remaining files
RMDir /r "$INSTDIR"
Expand Down Expand Up @@ -103,97 +99,34 @@ SectionIn RO ; Read only, always installed

DetailPrint "- Packaged repo"

; Check if conda is available
ExecWait 'where conda' $2
DetailPrint "- Checked if conda is available"

; If conda is not found, show a message
; Otherwise, continue with the installation
StrCmp $2 "0" create_env conda_not_available

conda_not_available:
DetailPrint "- Conda not installed."
${IfNot} ${Silent}
MessageBox MB_YESNO "Conda is not installed. Would you like to install Miniconda?" IDYES install_miniconda IDNO exit_installer
${Else}
Goto install_miniconda
${EndIf}

exit_installer:
DetailPrint "- Something went wrong. Exiting installer"
Quit

install_miniconda:
DetailPrint "-------------"
DetailPrint "- Miniconda -"
DetailPrint "-------------"
DetailPrint "- Downloading Miniconda installer..."
ExecWait 'curl -s -o "$TEMP\Miniconda3-latest-Windows-x86_64.exe" "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"'

; Install Miniconda silently
ExecWait '"$TEMP\Miniconda3-latest-Windows-x86_64.exe" /InstallationType=JustMe /AddToPath=1 /RegisterPython=0 /S /D=$PROFILE\miniconda3' $2
; Check if Miniconda installation was successful
${If} $2 == 0
DetailPrint "- Miniconda installation successful"
${IfNot} ${Silent}
MessageBox MB_OK "Miniconda has been successfully installed."
${EndIf}

StrCpy $R1 "$PROFILE\miniconda3\Scripts\conda.exe"
Goto create_env

${Else}
DetailPrint "- Miniconda installation failed"
${IfNot} ${Silent}
MessageBox MB_OK "Error: Miniconda installation failed. Installation will be aborted."
${EndIf}
Goto exit_installer
${EndIf}

create_env:
DetailPrint "---------------------"
DetailPrint "- Conda Environment -"
DetailPrint "---------------------"

DetailPrint "- Initializing conda..."
; Use the appropriate conda executable
${If} $R1 == ""
StrCpy $R1 "conda"
${EndIf}
; Initialize conda (needed for systems where conda was previously installed but not initialized)
nsExec::ExecToStack '"$R1" init'

DetailPrint "- Creating a Python 3.10 environment named '$LEMONADE_CONDA_ENV' in the installation directory: $INSTDIR..."
ExecWait '"$R1" create -p "$INSTDIR\$LEMONADE_CONDA_ENV" python=3.10 -y' $R0

; Check if the environment creation was successful (exit code should be 0)
StrCmp $R0 0 install_lemonade env_creation_failed

env_creation_failed:
DetailPrint "- ERROR: Environment creation failed"
; Display an error message and exit
${IfNot} ${Silent}
MessageBox MB_OK "ERROR: Failed to create the Python environment. Installation will be aborted."
${EndIf}
Quit
DetailPrint "Set up Python"
CreateDirectory "$INSTDIR\python"
ExecWait 'curl -s -o "$INSTDIR\python\python.zip" "https://www.python.org/ftp/python/3.10.9/python-3.10.9-embed-amd64.zip"'
ExecWait 'tar -xf "$INSTDIR\python\python.zip" -C "$INSTDIR\python"'
ExecWait 'curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py'
ExecWait '$INSTDIR\python\python.exe get-pip.py --no-warn-script-location'

FileOpen $2 "$INSTDIR\python\python310._pth" a
FileSeek $2 0 END
FileWrite $2 "$\r$\nLib$\r$\n"
FileWrite $2 "$\r$\nLib\site-packages$\r$\n"
FileClose $2

install_lemonade:
DetailPrint "-------------------------"
DetailPrint "- Lemonade Installation -"
DetailPrint "-------------------------"
DetailPrint "-------------------------"
DetailPrint "- Lemonade Installation -"
DetailPrint "-------------------------"


DetailPrint "- Installing $LEMONADE_SERVER_STRING..."
${If} $HYBRID_SELECTED == "true"
nsExec::ExecToLog '"$INSTDIR\$LEMONADE_CONDA_ENV\python.exe" -m pip install -e "$INSTDIR"[llm-oga-hybrid] --no-warn-script-location'
${Else}
nsExec::ExecToLog '"$INSTDIR\$LEMONADE_CONDA_ENV\python.exe" -m pip install -e "$INSTDIR"[llm] --no-warn-script-location'
${EndIf}
Pop $R0 ; Return value
DetailPrint "- $LEMONADE_SERVER_STRING install return code: $R0"
DetailPrint "- Installing $LEMONADE_SERVER_STRING..."
${If} $HYBRID_SELECTED == "true"
ExecWait '"$INSTDIR\python\python.exe" -m pip install "$INSTDIR"[llm-oga-hybrid] --no-warn-script-location' $8
${Else}
ExecWait '"$INSTDIR\python\python.exe" -m pip install "$INSTDIR"[llm] --no-warn-script-location' $8
${EndIf}
DetailPrint "- $LEMONADE_SERVER_STRING install return code: $8"

; Check if installation was successful (exit code should be 0)
StrCmp $R0 0 install_success install_failed
; Check if installation was successful (exit code should be 0)
StrCmp $8 0 install_success install_failed

install_success:
DetailPrint "- $LEMONADE_SERVER_STRING installation successful"
Expand Down Expand Up @@ -233,7 +166,7 @@ Section "Install Ryzen AI Hybrid Execution" HybridSec
; Once we're done downloading and installing the archive the size comes out to about 370MB
AddSize 388882

nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --ryzenai hybrid -y'
nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --ryzenai hybrid -y'

Pop $R0 ; Return value
DetailPrint "Hybrid execution mode install return code: $R0"
Expand Down Expand Up @@ -299,20 +232,19 @@ SubSection /e "Selected Models" ModelsSec
${GetParameters} $CMDLINE
${GetOptions} $CMDLINE "/Models=" $R0
${If} $R0 != ""
nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --models $R0'
nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --models $R0'
${Else}
; Otherwise, only the default CPU model will be installed
nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --models Qwen2.5-0.5B-Instruct-CPU'
nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --models Qwen2.5-0.5B-Instruct-CPU'
${EndIf}
${Else}
nsExec::ExecToLog 'conda run --no-capture-output -p $INSTDIR\$LEMONADE_CONDA_ENV lemonade-install --models $9'
nsExec::ExecToLog '$INSTDIR\python\Scripts\lemonade-install --models $9'
${EndIf}
SectionEnd

SubSectionEnd

Section "-Add Desktop Shortcut" ShortcutSec
; Create a desktop shortcut that passes the conda environment name as a parameter
CreateShortcut "$DESKTOP\lemonade-server.lnk" "$INSTDIR\bin\lemonade-server.bat" "serve --keep-alive" "$INSTDIR\img\favicon.ico"

SectionEnd
Expand Down Expand Up @@ -458,7 +390,7 @@ LangString MUI_TEXT_ABORT_SUBTITLE "${LANG_ENGLISH}" "Installation has been abor
LangString MUI_BUTTONTEXT_FINISH "${LANG_ENGLISH}" "Finish"
LangString MUI_TEXT_LICENSE_TITLE ${LANG_ENGLISH} "AMD License Agreement"
LangString MUI_TEXT_LICENSE_SUBTITLE ${LANG_ENGLISH} "Please review the license terms before installing AMD Ryzen AI Hybrid Execution Mode."
LangString DESC_SEC01 ${LANG_ENGLISH} "The minimum set of dependencies for a lemonade server that runs LLMs on CPU."
LangString DESC_SEC01 ${LANG_ENGLISH} "The minimum set of dependencies for a lemonade server that runs LLMs on CPU (includes Python)."
LangString DESC_HybridSec ${LANG_ENGLISH} "Add support for running LLMs on Ryzen AI hybrid execution mode. Only available on Ryzen AI 300-series processors."
LangString DESC_ModelsSec ${LANG_ENGLISH} "Select which models to install"
LangString DESC_Qwen05Sec ${LANG_ENGLISH} "Small CPU-only Qwen model"
Expand All @@ -485,7 +417,6 @@ LangString DESC_DeepSeekQwen7BSec ${LANG_ENGLISH} "7B parameter DeepSeek Qwen mo

Function .onInit
StrCpy $LEMONADE_SERVER_STRING "Lemonade Server"
StrCpy $LEMONADE_CONDA_ENV "lemon_env"
StrCpy $HYBRID_SELECTED "true"

; Create a variable to store selected models
Expand Down
3 changes: 1 addition & 2 deletions installer/lemonade-server.bat
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
@echo off
setlocal enabledelayedexpansion
set CONDA_ENV=lemon_env

REM --keep-alive is only used by the bash script to make sure that, if the server fails to open, we don't close the terminal right away.
REM Check for --keep-alive argument and remove it from arguments passed to CLI
Expand All @@ -18,7 +17,7 @@ REM Change to parent directory where conda env and bin folders are located
pushd "%~dp0.."

REM Run the Python CLI script through conda, passing filtered arguments
call conda run --no-capture-output -p "%CD%\%CONDA_ENV%" lemonade-server-dev !ARGS!
call "%CD%\python\Scripts\lemonade-server-dev" !ARGS!
popd

REM Error handling: Show message and pause if --keep-alive was specified
Expand Down
12 changes: 0 additions & 12 deletions installer/run_server.bat

This file was deleted.

1 change: 1 addition & 0 deletions src/lemonade/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Keys:
DTYPE = "dtype"
PROMPT = "prompt"
PROMPT_TOKENS = "prompt_tokens"
PROMPT_TEMPLATE = "prompt_template"
RESPONSE = "response"
RESPONSE_TOKENS = "response_tokens"
RESPONSE_LENGTHS_HISTOGRAM = "response_lengths_histogram"
Expand Down
34 changes: 29 additions & 5 deletions src/lemonade/tools/ort_genai/oga.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,14 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
help="Download the model if needed, but don't load it",
)

parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Set this flag to use models whose code is on the Hugging Face hub rather "
"than natively in the OnnxRuntime Gen AI libraries. Please review the model code "
"in advance as this is a security risk.",
)

parser.add_argument(
"--subfolder",
default=None,
Expand Down Expand Up @@ -547,15 +555,28 @@ def _setup_npu_environment():
return saved_state

@staticmethod
def _load_model_and_setup_state(state, full_model_path, checkpoint):
def _load_model_and_setup_state(
state, full_model_path, checkpoint, trust_remote_code
):
"""
Loads the OGA model from local folder and then loads the tokenizer.
"""
state.model = OrtGenaiModel(full_model_path)

hf_tokenizer = AutoTokenizer.from_pretrained(
full_model_path, local_files_only=True
)
try:
hf_tokenizer = AutoTokenizer.from_pretrained(
full_model_path,
local_files_only=True,
trust_remote_code=trust_remote_code,
)
except ValueError as e:
if "trust_remote_code" in str(e):
raise ValueError(
"This model requires you to execute code from the repo. Please review it "
"and if you trust it, then use the `--trust-remote-code` flag with oga-load."
)
raise

state.tokenizer = OrtGenaiTokenizer(
state.model.model,
hf_tokenizer,
Expand All @@ -582,6 +603,7 @@ def run(
int4_block_size: int = None,
force: bool = False,
download_only: bool = False,
trust_remote_code=False,
subfolder: str = None,
) -> State:
state.device = device
Expand Down Expand Up @@ -671,7 +693,9 @@ def run(
"0" if "phi-" in checkpoint.lower() else "1"
)

self._load_model_and_setup_state(state, full_model_path, checkpoint)
self._load_model_and_setup_state(
state, full_model_path, checkpoint, trust_remote_code
)
finally:
self._cleanup_environment(saved_env_state)

Expand Down
Loading
Loading