llm-d-planner/ui/components/settings.py at a27b1d277799845dd76f72340fa01b96c6fd6e18 · llm-d-incubation/llm-d-planner · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""Settings / Configuration tab component.

Contains benchmark database management controls;
structured to support additional configuration sections.
"""

import streamlit as st
from api_client import (
    fetch_db_status,
    fetch_deployment_mode,
    reset_database,
    update_deployment_mode,
    upload_benchmarks,
)

_TAB_INDEX = 5  # Configuration is the 6th tab (0-indexed)


def render_configuration_tab():
    """Render the Configuration tab with deployment mode and database management."""
    # --- Deployment Mode ---
    st.subheader("Deployment Mode")

    # Sync deployment mode from backend on each render
    current_mode = fetch_deployment_mode()
    st.session_state.deployment_mode_selection = (
        "Simulator" if current_mode == "simulator" else "Production"
    )

    def _on_mode_change():
        new_mode = st.session_state.deployment_mode_radio.lower()
        result = update_deployment_mode(new_mode)
        if result:
            st.session_state.deployment_mode_selection = st.session_state.deployment_mode_radio
            st.session_state["_mode_msg"] = (
                "success",
                f"Deployment mode set to **{st.session_state.deployment_mode_radio}**.",
            )
        else:
            st.session_state["_mode_msg"] = ("error", "Failed to update deployment mode.")
        st.session_state["_pending_tab"] = _TAB_INDEX

    modes = ["Production", "Simulator"]
    st.radio(
        "YAML generation target",
        modes,
        index=modes.index(st.session_state.deployment_mode_selection),
        horizontal=True,
        key="deployment_mode_radio",
        on_change=_on_mode_change,
        help="Production uses real vLLM with GPU resources. "
        "Simulator uses the vLLM simulator (no GPU required).",
    )

    mode_msg = st.session_state.pop("_mode_msg", None)
    if mode_msg:
        level, text = mode_msg
        if level == "success":
            st.success(text)
        else:
            st.error(text)

    st.divider()

    # --- Benchmark Database ---
    st.subheader("Benchmark Database")

    # Reserve space for stats — populated after actions so data is always fresh
    status_area = st.container()

    st.divider()

    # Track whether an action produced updated stats
    action_status = None

    # --- Upload ---
    st.markdown("**Upload Benchmarks**")
    st.caption("Upload a JSON file with a top-level `benchmarks` array. Duplicates are skipped.")

    # Counter-based key resets the file uploader after a successful load
    upload_counter = st.session_state.get("_upload_counter", 0)
    uploaded = st.file_uploader(
        "Choose benchmark JSON file",
        type=["json"],
        key=f"settings_file_upload_{upload_counter}",
        label_visibility="collapsed",
    )

    # Clear any stored message when the user selects a new file
    if uploaded is not None:
        st.session_state.pop("_load_msg", None)

    if uploaded is not None and st.button("Load DB", key="settings_upload_btn", type="primary"):
        with st.spinner("Loading..."):
            result = upload_benchmarks(uploaded.getvalue(), uploaded.name)
        if result and result.get("success"):
            msg = (
                f"Processed {result.get('records_in_file', '?')} records from "
                f"{result.get('filename', 'file')} (duplicates skipped). "
                f"Database now has {result.get('total_benchmarks', '?')} unique benchmarks."
            )
            st.session_state["_load_msg"] = ("success", msg)
            # Increment counter so the file uploader resets on next rerun
            st.session_state["_upload_counter"] = upload_counter + 1
            st.session_state["_pending_tab"] = _TAB_INDEX
            st.rerun()
        else:
            msg = result.get("message", "Unknown error") if result else "No response from server"
            st.session_state["_load_msg"] = ("error", f"Load failed: {msg}")

    # Show persisted load message (survives the rerun that clears the file uploader)
    load_msg = st.session_state.get("_load_msg")
    if load_msg:
        level, text = load_msg
        if level == "success":
            st.success(text)
        else:
            st.error(text)

    st.divider()

    # --- Reset ---
    st.markdown("**Reset Database**")
    if st.button("Reset Database", key="settings_reset_btn", type="secondary"):
        st.session_state["_pending_tab"] = _TAB_INDEX
        with st.spinner("Resetting..."):
            result = reset_database()
        if result and result.get("success"):
            st.success("Database has been reset. All benchmark data removed.")
            action_status = result
            # Clear any stale load message
            st.session_state.pop("_load_msg", None)
        else:
            msg = result.get("message", "Unknown error") if result else "No response from server"
            st.error(f"Reset failed: {msg}")

    # --- Populate status area (after actions, so stats reflect mutations) ---
    status = action_status if action_status else fetch_db_status()
    with status_area:
        if status and status.get("success"):
            c1, c2, c3 = st.columns(3)
            c1.metric("Total Benchmarks", status.get("total_benchmarks", 0))
            c2.metric("Models", status.get("num_models", 0))
            c3.metric("Hardware Types", status.get("num_hardware_types", 0))

            traffic = status.get("traffic_distribution", [])
            if traffic:
                st.caption(
                    "Traffic profiles: "
                    + ", ".join(f"({t['prompt_tokens']}, {t['output_tokens']})" for t in traffic)
                )
        else:
            st.warning("Could not connect to database.")