diff --git a/.claude/skills/osmo-skill/agents/workflow-expert.md b/.claude/skills/osmo-skill/agents/workflow-expert.md deleted file mode 100644 index c5601dc0d..000000000 --- a/.claude/skills/osmo-skill/agents/workflow-expert.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -name: workflow-expert -description: > - OSMO workflow specialist for workflow creation, resource checking, - submission, and failure diagnosis. Generates or validates YAML, - checks resources, submits — then RETURNS the workflow ID. It does NOT - monitor workflows. The calling agent handles monitoring inline (see - the osmo skill's "Orchestrate a Workflow End-to-End" use case). On - failure, resume this agent for diagnosis. -skills: - - osmo -model: opus -memory: user ---- - -You are a workflow specialist for the OSMO platform. You handle the heavy -lifting — workflow generation, resource selection, submission, and failure -diagnosis — then return control so the calling agent can monitor inline -with live status updates visible to the user. - -Load the [osmo skill](/osmo-skill/SKILL.md) in your context with all CLI procedures and -reference files. Use its procedures directly — do not reinvent them. - -Your agent memory persists across sessions. Consult it before starting -work — it may contain pool performance data, error patterns, and resource -sizing that avoids trial-and-error. - -## Mode 1: Setup and Submit (default) - -Execute these steps using your preloaded osmo skill: - -1. **Resource Check** — Follow the "Check Available Resources" use case. - Pick the pool with the best GPU match for the user's needs. - -2. **Workflow Generation** — If `workflow.yaml` already exists and the user - referenced it, submit it as-is. Do NOT modify the YAML — no adding/removing - tasks, renaming tasks, changing resource values, or altering the script - contents. If you spot an obvious issue (e.g. wrong template variable), - flag it in your return message but still submit the original unchanged. - Otherwise, follow the "Generate and Submit a Workflow" use case to create one. - -3. **Submit** — Follow the submission steps from the skill. Skip user - confirmation if pre-authorized. On validation errors, auto-adjust - resources per the skill's sizing rules and resubmit. - -4. **Return** — After successful submission, return a structured response: - - **Workflow ID** and **pool name** - - **OSMO Web link**: `https://us-west-2-aws.osmo.nvidia.com/v2/workflows/` - - **Output datasets** the workflow will produce (names from the YAML) - - Do NOT poll or monitor the workflow. Return immediately after submission. - -## Mode 2: Diagnose and Fix (via resume) - -When resumed with a failure context (workflow ID + status): - -1. **Analyze logs**: Analyze the logs summary that is provided to you frist. If the summary is not informational enough for root-casue analysis, fetch more detailed logs with `osmo workflow logs -n 10000`. -2. **Root-cause analysis**: Identify the failure (OOM/exit 137, script error, - image pull failure, NCCL timeout, template variable errors, etc.) -3. **Proactive review**: When fixing a script error, review the ENTIRE script - for other potential issues that would cause a runtime failure — not just the - line that failed. Fix all such issues in a single pass to minimize retry - cycles. Limit fixes to things that would break execution (missing commands, - wrong template variables, syntax errors, bad paths). Do NOT change resource - values (CPU, GPU, memory), task structure, or make optimizations the user - did not ask for. -4. **Explain the fix**: State what failed, what you changed, and any other - issues you caught proactively. Use plain language. -5. **Resubmit** to the same pool. -6. **Return** the new workflow ID (same format as Mode 1 step 4), plus a - summary of what was fixed. - -Track retries across resume invocations. After 3 failures, ask the user. - -## Guidelines - -- Use plain language — no Kubernetes jargon. -- Run commands yourself — do not tell the user to run them. -- When in doubt about user intent, ask before submitting. - -## Memory - -After each successful workflow cycle (submit or diagnose+fix), save key -learnings to your agent memory. Organize by topic: - -- **Pool performance**: Which pools worked, typical queue times, reliability -- **Error patterns**: Failures seen and the fixes that resolved them -- **Resource sizing**: GPU/CPU/memory/storage values that worked for specific - workload types (GR00T, SDG, RL, etc.) - -Keep `MEMORY.md` concise (under 200 lines). Use topic files for details. -Update existing entries rather than appending duplicates. diff --git a/docs/deployment_guide/appendix/keycloak_setup.rst b/docs/deployment_guide/appendix/keycloak_setup.rst index 37a177aaa..4564622d3 100644 --- a/docs/deployment_guide/appendix/keycloak_setup.rst +++ b/docs/deployment_guide/appendix/keycloak_setup.rst @@ -18,7 +18,7 @@ .. _keycloak_setup: ================================================ -Keycloak as an Identity Provider for OSMO +Keycloak as a sample IdP ================================================ This guide describes how to deploy `Keycloak `_ and configure it as the identity provider (IdP) for OSMO. Keycloak acts as an authentication broker, allowing OSMO to authenticate users through various identity providers (LDAP, SAML, social logins) while providing centralized group and role management. @@ -396,7 +396,7 @@ The typical workflow for setting up access control is: 2. Create groups in Keycloak 3. Assign roles to groups 4. Add users to groups (manually or via identity provider mappings) -5. Create matching pools in OSMO +5. Create matching pools 6. Verify access .. _keycloak_create_roles: @@ -551,7 +551,7 @@ User Cannot Access Pool **Solutions**: -1. **Verify Role Policy in OSMO**: Ensure the corresponding role has been created in OSMO. Follow the steps in :ref:`troubleshooting_roles_policies`. +1. **Verify Role Policy**: Ensure the corresponding role has been created. Follow the steps in :ref:`troubleshooting_roles_policies`. 2. **Verify Role Names**: Pool access roles must start with ``osmo-`` prefix (see :ref:`role_naming_for_pools`). Pool names must match the role suffix. Example: Role ``osmo-team1`` will make pools named ``team1*`` visible. diff --git a/.claude/skills/osmo-skill/LICENSE b/skills/osmo-agent/LICENSE similarity index 100% rename from .claude/skills/osmo-skill/LICENSE rename to skills/osmo-agent/LICENSE diff --git a/.claude/skills/osmo-skill/SKILL.md b/skills/osmo-agent/SKILL.md similarity index 71% rename from .claude/skills/osmo-skill/SKILL.md rename to skills/osmo-agent/SKILL.md index 39ba7682a..35f193d55 100644 --- a/.claude/skills/osmo-skill/SKILL.md +++ b/skills/osmo-agent/SKILL.md @@ -20,8 +20,8 @@ common OSMO CLI use cases. The `agents/` directory contains instructions for specialized subagents. Read them when you need to spawn the relevant subagent. -- `agents/workflow-expert.md` — expert for workflow generation, resource check, submission, failure diagnosis -- `agents/logs-reader.md` - expert for fetching and reading logs, extracting important information for monitoring and failure diagnosis. +- `agents/workflow-expert.md` — workflow generation, resource check, submission, failure diagnosis +- `agents/logs-reader.md` — log fetching and summarization for monitoring and failure diagnosis The `references/` directory has additional documentation: @@ -31,6 +31,18 @@ The `references/` directory has additional documentation: --- +## Intent Routing + +- Asks about resources, pools, GPUs, or quota → Check Available Resources +- Wants to submit a job (simple, no monitoring) → Generate and Submit a Workflow +- Wants to submit + monitor + handle failures → Orchestrate a Workflow End-to-End +- Asks about a workflow's status or logs → Check Workflow Status +- Lists recent workflows → List Workflows +- Asks what a workflow does → Explain What a Workflow Does +- Wants to publish a workflow as an app → Create an App + +--- + ## Use Case: Check Available Resources **When to use:** The user asks what resources, nodes, GPUs, or pools are available @@ -115,7 +127,8 @@ Derive GPU type from pool names when possible: **When to use:** The user wants to submit a job to run on OSMO (e.g. "submit a workflow to run SDG", "run RL training for me", "submit this yaml to OSMO"). -Evaluate the complexity of the user's request: if user also wants monitoring, debugging workflows, reporting results, or the workflow complexity is too high, refer to `Orchestrate a Workflow End-to-End` use case to delegate this to a sub-agent instead. +If the user also wants monitoring, debugging, or reporting results, use the +"Orchestrate a Workflow End-to-End" use case instead. ### Steps @@ -234,7 +247,8 @@ Evaluate the complexity of the user's request: if user also wants monitoring, de ## Use Case: Check Workflow Status **When to use:** The user asks about the status or logs of a workflow (e.g. "what's the -status of workflow abc-123?", "is my workflow done?", "show me the logs for xyz"). +status of workflow abc-123?", "is my workflow done?", "show me the logs for xyz", +"show me the resource usage for my workflow", "give me the Kubernetes dashboard link"). Also used as the polling step when monitoring a workflow during end-to-end orchestration. ### Steps @@ -243,6 +257,9 @@ Also used as the polling step when monitoring a workflow during end-to-end orche ``` osmo workflow query --format-type json ``` + **Cache the JSON result for the rest of the conversation.** If you have already queried + this workflow with `osmo workflow query` earlier in the conversation, reuse that JSON + — do not query again just to extract a field. 2. **Get recent logs** — Choose the log-fetching method based on task count (this rule applies everywhere logs are needed — monitoring, failure diagnosis, etc.): @@ -256,82 +273,98 @@ Also used as the polling step when monitoring a workflow during end-to-end orche - Concisely summarize what the logs show — what stage the job is at, any errors, or what it completed successfully - If the workflow failed, highlight the error and suggest next steps if possible - - **If the workflow is COMPLETED and has output datasets, you MUST ask this - explicit question before ending your response:** - `Would you like me to download the output dataset now?` - Also ask whether they want a specific output folder (default to `~/` if not). - Then run the download yourself: + - **Resource usage / Grafana link:** If the user asks about resource usage, GPU + utilization, or metrics for this workflow, extract `grafana_url` from the query + JSON. If present, render it as a clickable link: + `[View resource usage in Grafana]()` + If the field is empty or null, tell the user: "The Grafana resource usage link is + not available for this workflow." + - **Kubernetes dashboard link:** If the user asks for the Kubernetes dashboard, + pod details, or a k8s link, extract `kubernetes_dashboard` from the query JSON. + If present, render it as a clickable link: + `[Open Kubernetes dashboard]()` + If the field is empty or null, tell the user: "The Kubernetes dashboard link is + not available for this workflow." + - Proactively include both links in any detailed status report (e.g. when the + workflow is RUNNING or has just COMPLETED) — users often want them without + explicitly asking. If a field is empty or null, note it as not available rather + than silently omitting it. + - **If PENDING** (or the user asks why it isn't scheduling), run: ``` - osmo dataset download + osmo workflow events ``` - Use `~/` as the output path if the user doesn't specify one. - - - **After the dataset download question above**, if the workflow is COMPLETED, - also ask if the user would like to create an - OSMO app for it. Suggest a name derived from the workflow name (e.g. workflow - `sdg-run-42` → app name `sdg-run-42`) and generate a one-sentence description - based on what the workflow does. If the user agrees (or provides their own name), - follow the "Create an App" use case below. - - **When monitoring multiple workflows** that all complete from the same spec, offer - app creation once (not per workflow) after all workflows reach a terminal state. - Since they share the same YAML, a single app covers all runs. Do not skip this - offer just because you were in a batch monitoring loop. - - **If the workflow is PENDING** (or the user asks why it isn't scheduling), run: - ``` - osmo workflow events - ``` - These are Kubernetes pod conditions and cluster events — translate them into plain - language without Kubernetes jargon (e.g. "there aren't enough free GPUs in the pool - to schedule your job" rather than "Insufficient nvidia.com/gpu"). Also direct the - user to check resource availability in the pool their workflow is waiting in: + Translate Kubernetes events into plain language (e.g. "there aren't enough free + GPUs in the pool" rather than "Insufficient nvidia.com/gpu"). Also check: + ``` + osmo resource list -p + ``` + - If COMPLETED, proceed to Step 4. + +4. **Handle completed workflows:** + + Offer the output dataset for download: + `Would you like me to download the output dataset now?` + Ask whether they want a specific output folder (default to `~/`). Then run: ``` - osmo resource list -p + osmo dataset download ``` + + Also offer to create an OSMO app. Suggest a name derived from the workflow name + (e.g. `sdg-run-42` → app name `sdg-run-42`) and generate a one-sentence description. + If the user agrees, follow the "Create an App" use case. + + When monitoring multiple workflows from the same spec, offer app creation once + (not per workflow) after all reach a terminal state. Do not skip this offer + just because you were in a batch monitoring loop. + --- ## Use Case: Orchestrate a Workflow End-to-End -**When to use:** The user wants to create workflow, submit and monitor it to completion, -or requests an autonomous workflow cycle (e.g. "train GR00T on my data", "create a SDG workflow and run it", -"submit and monitor my workflow", "run end-to-end training", "submit this and -tell me when it's done"). - -### Phase-Split Pattern +**When to use:** The user wants to create a workflow, submit it, and monitor it to +completion (e.g. "train GR00T on my data", "submit and monitor my workflow", +"run end-to-end training", "submit this and tell me when it's done"). -The lifecycle is split between the `/agents/workflow-expert.md` subagent (workflow generation creation, resource check, submission, failure diagnosis) and **you** (live monitoring so the user sees real-time updates). Follow these steps exactly: +### Steps -#### Step 1: Spawn a `/agents/workflow-expert.md` subagent for setup and submission +The lifecycle is split between the `workflow-expert` subagent (workflow generation, +resource check, submission, failure diagnosis) and **you** (live monitoring so the +user sees real-time updates). -Spawn the `/agents/workflow-expert.md` subagent. Ask it to **write workflow YAML if needed, check resources and submit the workflow only**. Do NOT ask it to monitor, poll status, or report results — that is your job. +1. **Spawn the workflow-expert subagent for setup and submission.** -Example prompt: -> Create a workflow based on user's request, if any. Check resources first, then submit the workflow to an available resource pool. Return the workflow ID when done. + Ask it to **write workflow YAML if needed, check resources, and submit only**. + Do NOT ask it to monitor, poll status, or report results — that is your job. -The subagent returns: workflow ID, pool name, and OSMO Web link. + Example prompt: + > Create a workflow based on user's request, if any. Check resources first, + > then submit the workflow to an available resource pool. Return the workflow + > ID when done. -#### Step 2: Monitor the workflow inline (you do this — user sees live updates) + The subagent returns: workflow ID, pool name, and OSMO Web link. -After getting the workflow ID, use the "Check Workflow Status" use case to -poll and report. Repeat until a terminal state is reached. +2. **Monitor the workflow inline (you do this — user sees live updates).** -Report each state transition to the user: -- `Status: SCHEDULING (queued 15s)` -- `Workflow transitioned: SCHEDULING → RUNNING` -- `Status: RUNNING (task "train" active, 2m elapsed)` + Use the "Check Workflow Status" use case to poll and report. Repeat until a + terminal state is reached. Adjust the polling interval based on how long you + expect the workflow to take — poll more frequently for short jobs (every 10-15s) + and less frequently for long training runs (every 30-60s). Report each state + transition to the user: + - `Status: SCHEDULING (queued 15s)` + - `Workflow transitioned: SCHEDULING → RUNNING` + - `Status: RUNNING (task "train" active, 2m elapsed)` -#### Step 3: Handle the outcome +3. **Handle the outcome.** -**If COMPLETED:** Report results — workflow ID, OSMO Web link, output datasets. -In the same completion message, ask: `Would you like me to download the output dataset now?` -Then follow the COMPLETED handling in "Check Workflow Status". + **If COMPLETED:** Report results — workflow ID, OSMO Web link, output datasets. + Then follow Step 4 of "Check Workflow Status" (download offer + app creation). -**If FAILED:** First, fetch logs using the log-fetching rule from "Check Workflow Status" -Step 2 (1 task = inline, 2+ tasks = delegate to logs-reader subagents). Then resume the -`workflow-expert` subagent (use the `resume` parameter with the agent ID from Step 1) -and pass the logs summary: "Workflow FAILED. Here is the logs summary: . -Diagnose and fix." It returns a new workflow ID. Resume monitoring from Step 2. Max 3 -retries before asking the user for guidance. + **If FAILED:** First, fetch logs using the log-fetching rule from "Check Workflow + Status" Step 2 (1 task = inline, 2+ tasks = delegate to logs-reader subagents). + Then resume the `workflow-expert` subagent (use the `resume` parameter with the + agent ID from Step 1) and pass the logs summary: "Workflow FAILED. Here is + the logs summary: . Diagnose and fix." It returns a new workflow ID. + Resume monitoring from Step 2. Max 3 retries before asking the user for guidance. --- diff --git a/.claude/skills/osmo-skill/agents/logs-reader.md b/skills/osmo-agent/agents/logs-reader.md similarity index 97% rename from .claude/skills/osmo-skill/agents/logs-reader.md rename to skills/osmo-agent/agents/logs-reader.md index 170fc828e..febd4752f 100644 --- a/.claude/skills/osmo-skill/agents/logs-reader.md +++ b/skills/osmo-agent/agents/logs-reader.md @@ -1,5 +1,7 @@ # OSMO Logs Reader Agent +> Spawn a general-purpose subagent and pass these instructions as the prompt. + You are a subagent invoked by the main OSMO agent. Your sole job is to fetch and summarize logs for a specific workflow, then return a concise digest that the main agent can use without holding large raw logs in context. diff --git a/skills/osmo-agent/agents/workflow-expert.md b/skills/osmo-agent/agents/workflow-expert.md new file mode 100644 index 000000000..2efd728cd --- /dev/null +++ b/skills/osmo-agent/agents/workflow-expert.md @@ -0,0 +1,87 @@ +# OSMO Workflow Expert Agent + +> Spawn a subagent with access to the osmo skill (SKILL.md) and pass these +> instructions as the prompt. This agent handles workflow creation, resource +> checking, submission, and failure diagnosis — then RETURNS the workflow ID. +> It does NOT monitor workflows. The calling agent handles monitoring inline. + +You are a workflow specialist for the OSMO platform. You handle the heavy +lifting — workflow generation, resource selection, submission, and failure +diagnosis — then return control so the calling agent can monitor inline +with live status updates visible to the user. + +Read `SKILL.md` and its reference files for all CLI procedures. Use those +procedures directly — do not reinvent them. + +## Mode 1: Setup and Submit (default) + +Execute these steps using the osmo skill procedures: + +1. **Resource Check** — Follow the "Check Available Resources" use case. + Pick the pool with the best GPU match for the user's needs. + +2. **Workflow Generation** — If `workflow.yaml` already exists and the user + referenced it, submit it as-is. Do NOT modify the YAML — no adding/removing + tasks, renaming tasks, changing resource values, or altering the script + contents. If you spot an obvious issue (e.g. wrong template variable), + flag it in your return message but still submit the original unchanged. + Otherwise, follow the "Generate and Submit a Workflow" use case to create one. + +3. **Submit** — Follow the submission steps from the skill. Skip user + confirmation if pre-authorized. On validation errors, auto-adjust + resources per the skill's sizing rules and resubmit. + +4. **Return** — After successful submission, return a structured response: + - **Workflow ID** and **pool name** + - **OSMO Web link**: + - **Output datasets** the workflow will produce (names from the YAML) + + Do NOT poll or monitor the workflow. Return immediately after submission. + +## Mode 2: Diagnose and Fix (via resume) + +When resumed with a failure context (workflow ID + status): + +1. **Analyze logs** — Analyze the logs summary that is provided to you + first. If the summary is not informational enough for root-cause + analysis, fetch more detailed logs with + `osmo workflow logs -n 10000`. Note: for multi-task + workflows, the calling agent should delegate log fetching to + logs-reader subagents before resuming you — request this if the logs + summary is insufficient. + +2. **Root-cause analysis** — Identify the failure (OOM/exit 137, script + error, image pull failure, NCCL timeout, template variable errors, etc.) + +3. **Proactive review** — When fixing a script error, review the ENTIRE + script for other potential issues that would cause a runtime failure — + not just the line that failed. Fix all such issues in a single pass to + minimize retry cycles. Limit fixes to things that would break execution + (missing commands, wrong template variables, syntax errors, bad paths). + Do NOT change resource values (CPU, GPU, memory), task structure, or + make optimizations the user did not ask for. + +4. **Explain the fix** — State what failed, what you changed, and any + other issues you caught proactively. Use plain language. + +5. **Resubmit** — Submit to the same pool. + +6. **Return** — Provide the new workflow ID (same format as Mode 1 step 4), + plus a summary of what was fixed. + +Track retries across resume invocations. After 3 failures, ask the user. + +## Guidelines + +- Use plain language — no Kubernetes jargon. +- Run commands yourself — do not tell the user to run them. +- When in doubt about user intent, ask before submitting. + +## Learnings to Report + +After each successful workflow cycle (submit or diagnose+fix), include +these observations in your return message so the calling agent can track them: + +- **Pool performance**: Which pool was used, queue time, any reliability issues +- **Error patterns**: Failures seen and the fixes that resolved them +- **Resource sizing**: GPU/CPU/memory/storage values that worked for the workload diff --git a/.claude/skills/osmo-skill/references/advanced-patterns.md b/skills/osmo-agent/references/advanced-patterns.md similarity index 100% rename from .claude/skills/osmo-skill/references/advanced-patterns.md rename to skills/osmo-agent/references/advanced-patterns.md diff --git a/.claude/skills/osmo-skill/references/cookbook.md b/skills/osmo-agent/references/cookbook.md similarity index 100% rename from .claude/skills/osmo-skill/references/cookbook.md rename to skills/osmo-agent/references/cookbook.md diff --git a/.claude/skills/osmo-skill/references/workflow-patterns.md b/skills/osmo-agent/references/workflow-patterns.md similarity index 100% rename from .claude/skills/osmo-skill/references/workflow-patterns.md rename to skills/osmo-agent/references/workflow-patterns.md diff --git a/.claude/skills/osmo-skill/tests/orchestrator-runtime-failure.md b/skills/osmo-agent/tests/orchestrator-runtime-failure.md similarity index 99% rename from .claude/skills/osmo-skill/tests/orchestrator-runtime-failure.md rename to skills/osmo-agent/tests/orchestrator-runtime-failure.md index 2c8b8f627..c8d7cfa05 100644 --- a/.claude/skills/osmo-skill/tests/orchestrator-runtime-failure.md +++ b/skills/osmo-agent/tests/orchestrator-runtime-failure.md @@ -99,7 +99,6 @@ Phase 3 confirmation pause: ## Expected Behavior (Phase-Split Architecture) -This test validates the phase-split pattern defined in DESIGN_DOC_V2.md §9. The workflow expert handles setup/submit and failure diagnosis in its isolated context, while the main conversation monitors inline so the user sees live status updates. diff --git a/src/service/core/auth/auth_service.py b/src/service/core/auth/auth_service.py index bb7ae8050..bfb827acc 100644 --- a/src/service/core/auth/auth_service.py +++ b/src/service/core/auth/auth_service.py @@ -57,6 +57,10 @@ def get_new_jwt_token(refresh_token: str, workflow_id: str, """ API to fetch for a new access token using a refresh token. """ + if len(refresh_token) not in task_lib.VALID_TOKEN_LENGTHS: + raise osmo_errors.OSMOUserError( + f'Refresh token has invalid length {len(refresh_token)}') + postgres = connectors.PostgresConnector.get_instance() service_config = postgres.get_service_configs() @@ -114,6 +118,10 @@ def get_jwt_token_from_access_token(access_token: str): """ API to create a new jwt token from an access token. """ + if len(access_token) not in task_lib.VALID_TOKEN_LENGTHS: + raise osmo_errors.OSMOUserError( + f'Access token has invalid length {len(access_token)}') + postgres = connectors.PostgresConnector.get_instance() token = objects.AccessToken.validate_access_token(postgres, access_token) if not token: diff --git a/src/service/core/data/data_service.py b/src/service/core/data/data_service.py index 8f68b3577..71e1614fe 100755 --- a/src/service/core/data/data_service.py +++ b/src/service/core/data/data_service.py @@ -1004,7 +1004,8 @@ def list_dataset_from_bucket(name: objects.DatasetPattern | None = None, """ This api returns the list of datasets/colections.""" postgres = connectors.PostgresConnector.get_instance() fetch_cmd = ''' - SELECT dataset.*, dv.created_date as dv_created_date, dv.version_id as dv_version_id, + SELECT DISTINCT dataset.*, dv.created_date as dv_created_date, + dv.version_id as dv_version_id, COALESCE(dv.created_date, dataset.created_date) as combined_date FROM dataset LEFT JOIN (SELECT dataset_version.* FROM dataset_version @@ -1049,8 +1050,7 @@ def list_dataset_from_bucket(name: objects.DatasetPattern | None = None, fetch_cmd += ' AND name LIKE %s' fetch_input.append('%' + name + '%') - fetch_cmd += \ - ' GROUP BY dataset.id, dv.created_date, dv.version_id ORDER BY combined_date DESC LIMIT %s' + fetch_cmd += ' ORDER BY combined_date DESC LIMIT %s' fetch_input.append(min(count, 1000)) fetch_cmd = f'SELECT * FROM ({fetch_cmd}) as ds' diff --git a/src/ui/package.json b/src/ui/package.json index be8e3b072..89085889b 100644 --- a/src/ui/package.json +++ b/src/ui/package.json @@ -32,6 +32,10 @@ "validate": "pnpm licenses:check && pnpm type-check && pnpm lint && pnpm format:check && pnpm test && pnpm build" }, "dependencies": { + "@codemirror/lang-json": "^6.0.2", + "@codemirror/lang-markdown": "^6.5.0", + "@codemirror/lang-python": "^6.2.1", + "@codemirror/lang-xml": "^6.1.0", "@codemirror/lang-yaml": "^6.1.2", "@codemirror/language": "^6.12.1", "@codemirror/search": "^6.6.0", diff --git a/src/ui/pnpm-lock.yaml b/src/ui/pnpm-lock.yaml index 242300b50..47ff7bbf0 100644 --- a/src/ui/pnpm-lock.yaml +++ b/src/ui/pnpm-lock.yaml @@ -8,6 +8,18 @@ importers: .: dependencies: + '@codemirror/lang-json': + specifier: ^6.0.2 + version: 6.0.2 + '@codemirror/lang-markdown': + specifier: ^6.5.0 + version: 6.5.0 + '@codemirror/lang-python': + specifier: ^6.2.1 + version: 6.2.1 + '@codemirror/lang-xml': + specifier: ^6.1.0 + version: 6.1.0 '@codemirror/lang-yaml': specifier: ^6.1.2 version: 6.1.2 @@ -376,6 +388,27 @@ packages: '@codemirror/commands@6.10.2': resolution: {integrity: sha512-vvX1fsih9HledO1c9zdotZYUZnE4xV0m6i3m25s5DIfXofuprk6cRcLUZvSk3CASUbwjQX21tOGbkY2BH8TpnQ==} + '@codemirror/lang-css@6.3.1': + resolution: {integrity: sha512-kr5fwBGiGtmz6l0LSJIbno9QrifNMUusivHbnA1H6Dmqy4HZFte3UAICix1VuKo0lMPKQr2rqB+0BkKi/S3Ejg==} + + '@codemirror/lang-html@6.4.11': + resolution: {integrity: sha512-9NsXp7Nwp891pQchI7gPdTwBuSuT3K65NGTHWHNJ55HjYcHLllr0rbIZNdOzas9ztc1EUVBlHou85FFZS4BNnw==} + + '@codemirror/lang-javascript@6.2.5': + resolution: {integrity: sha512-zD4e5mS+50htS7F+TYjBPsiIFGanfVqg4HyUz6WNFikgOPf2BgKlx+TQedI1w6n/IqRBVBbBWmGFdLB/7uxO4A==} + + '@codemirror/lang-json@6.0.2': + resolution: {integrity: sha512-x2OtO+AvwEHrEwR0FyyPtfDUiloG3rnVTSZV1W8UteaLL8/MajQd8DpvUb2YVzC+/T18aSDv0H9mu+xw0EStoQ==} + + '@codemirror/lang-markdown@6.5.0': + resolution: {integrity: sha512-0K40bZ35jpHya6FriukbgaleaqzBLZfOh7HuzqbMxBXkbYMJDxfF39c23xOgxFezR+3G+tR2/Mup+Xk865OMvw==} + + '@codemirror/lang-python@6.2.1': + resolution: {integrity: sha512-IRjC8RUBhn9mGR9ywecNhB51yePWCGgvHfY1lWN/Mrp3cKuHr0isDKia+9HnvhiWNnMpbGhWrkhuWOc09exRyw==} + + '@codemirror/lang-xml@6.1.0': + resolution: {integrity: sha512-3z0blhicHLfwi2UgkZYRPioSgVTo9PV5GP5ducFH6FaHy0IAJRg+ixj5gTR1gnT/glAIC8xv4w2VL1LoZfs+Jg==} + '@codemirror/lang-yaml@6.1.2': resolution: {integrity: sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==} @@ -797,12 +830,33 @@ packages: '@lezer/common@1.5.1': resolution: {integrity: sha512-6YRVG9vBkaY7p1IVxL4s44n5nUnaNnGM2/AckNgYOnxTG2kWh1vR8BMxPseWPjRNpb5VtXnMpeYAEAADoRV1Iw==} + '@lezer/css@1.3.1': + resolution: {integrity: sha512-PYAKeUVBo3HFThruRyp/iK91SwiZJnzXh8QzkQlwijB5y+N5iB28+iLk78o2zmKqqV0uolNhCwFqB8LA7b0Svg==} + '@lezer/highlight@1.2.3': resolution: {integrity: sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==} + '@lezer/html@1.3.13': + resolution: {integrity: sha512-oI7n6NJml729m7pjm9lvLvmXbdoMoi2f+1pwSDJkl9d68zGr7a9Btz8NdHTGQZtW2DA25ybeuv/SyDb9D5tseg==} + + '@lezer/javascript@1.5.4': + resolution: {integrity: sha512-vvYx3MhWqeZtGPwDStM2dwgljd5smolYD2lR2UyFcHfxbBQebqx8yjmFmxtJ/E6nN6u1D9srOiVWm3Rb4tmcUA==} + + '@lezer/json@1.0.3': + resolution: {integrity: sha512-BP9KzdF9Y35PDpv04r0VeSTKDeox5vVr3efE7eBbx3r4s3oNLfunchejZhjArmeieBH+nVOpgIiBJpEAv8ilqQ==} + '@lezer/lr@1.4.8': resolution: {integrity: sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==} + '@lezer/markdown@1.6.3': + resolution: {integrity: sha512-jpGm5Ps+XErS+xA4urw7ogEGkeZOahVQF21Z6oECF0sj+2liwZopd2+I8uH5I/vZsRuuze3OxBREIANLf6KKUw==} + + '@lezer/python@1.1.18': + resolution: {integrity: sha512-31FiUrU7z9+d/ElGQLJFXl+dKOdx0jALlP3KEOsGTex8mvj+SoE1FgItcHWK/axkxCHGUSpqIHt6JAWfWu9Rhg==} + + '@lezer/xml@1.0.6': + resolution: {integrity: sha512-CdDwirL0OEaStFue/66ZmFSeppuL6Dwjlk8qk153mSQwiSH/Dlri4GNymrNWnUmPl2Um7QfV1FO9KFUyX3Twww==} + '@lezer/yaml@1.0.4': resolution: {integrity: sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==} @@ -5230,6 +5284,68 @@ snapshots: '@codemirror/view': 6.39.15 '@lezer/common': 1.5.1 + '@codemirror/lang-css@6.3.1': + dependencies: + '@codemirror/autocomplete': 6.20.0 + '@codemirror/language': 6.12.1 + '@codemirror/state': 6.5.4 + '@lezer/common': 1.5.1 + '@lezer/css': 1.3.1 + + '@codemirror/lang-html@6.4.11': + dependencies: + '@codemirror/autocomplete': 6.20.0 + '@codemirror/lang-css': 6.3.1 + '@codemirror/lang-javascript': 6.2.5 + '@codemirror/language': 6.12.1 + '@codemirror/state': 6.5.4 + '@codemirror/view': 6.39.15 + '@lezer/common': 1.5.1 + '@lezer/css': 1.3.1 + '@lezer/html': 1.3.13 + + '@codemirror/lang-javascript@6.2.5': + dependencies: + '@codemirror/autocomplete': 6.20.0 + '@codemirror/language': 6.12.1 + '@codemirror/lint': 6.9.4 + '@codemirror/state': 6.5.4 + '@codemirror/view': 6.39.15 + '@lezer/common': 1.5.1 + '@lezer/javascript': 1.5.4 + + '@codemirror/lang-json@6.0.2': + dependencies: + '@codemirror/language': 6.12.1 + '@lezer/json': 1.0.3 + + '@codemirror/lang-markdown@6.5.0': + dependencies: + '@codemirror/autocomplete': 6.20.0 + '@codemirror/lang-html': 6.4.11 + '@codemirror/language': 6.12.1 + '@codemirror/state': 6.5.4 + '@codemirror/view': 6.39.15 + '@lezer/common': 1.5.1 + '@lezer/markdown': 1.6.3 + + '@codemirror/lang-python@6.2.1': + dependencies: + '@codemirror/autocomplete': 6.20.0 + '@codemirror/language': 6.12.1 + '@codemirror/state': 6.5.4 + '@lezer/common': 1.5.1 + '@lezer/python': 1.1.18 + + '@codemirror/lang-xml@6.1.0': + dependencies: + '@codemirror/autocomplete': 6.20.0 + '@codemirror/language': 6.12.1 + '@codemirror/state': 6.5.4 + '@codemirror/view': 6.39.15 + '@lezer/common': 1.5.1 + '@lezer/xml': 1.0.6 + '@codemirror/lang-yaml@6.1.2': dependencies: '@codemirror/autocomplete': 6.20.0 @@ -5588,14 +5704,55 @@ snapshots: '@lezer/common@1.5.1': {} + '@lezer/css@1.3.1': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + '@lezer/lr': 1.4.8 + '@lezer/highlight@1.2.3': dependencies: '@lezer/common': 1.5.1 + '@lezer/html@1.3.13': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + '@lezer/lr': 1.4.8 + + '@lezer/javascript@1.5.4': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + '@lezer/lr': 1.4.8 + + '@lezer/json@1.0.3': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + '@lezer/lr': 1.4.8 + '@lezer/lr@1.4.8': dependencies: '@lezer/common': 1.5.1 + '@lezer/markdown@1.6.3': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + + '@lezer/python@1.1.18': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + '@lezer/lr': 1.4.8 + + '@lezer/xml@1.0.6': + dependencies: + '@lezer/common': 1.5.1 + '@lezer/highlight': 1.2.3 + '@lezer/lr': 1.4.8 + '@lezer/yaml@1.0.4': dependencies: '@lezer/common': 1.5.1 diff --git a/src/ui/src/components/code-viewer/lib/languages.ts b/src/ui/src/components/code-viewer/lib/languages.ts index 82e19e4d8..c41293c05 100644 --- a/src/ui/src/components/code-viewer/lib/languages.ts +++ b/src/ui/src/components/code-viewer/lib/languages.ts @@ -23,6 +23,10 @@ */ import { yaml } from "@codemirror/lang-yaml"; +import { json } from "@codemirror/lang-json"; +import { markdown } from "@codemirror/lang-markdown"; +import { python } from "@codemirror/lang-python"; +import { xml } from "@codemirror/lang-xml"; import type { LanguageExtension } from "@/components/code-viewer/lib/types"; /** YAML language extension preset for specs, configs, and templates */ @@ -30,3 +34,45 @@ export const YAML_LANGUAGE: LanguageExtension = { name: "YAML", extension: yaml(), }; + +export const JSON_LANGUAGE: LanguageExtension = { + name: "JSON", + extension: json(), +}; + +export const MARKDOWN_LANGUAGE: LanguageExtension = { + name: "Markdown", + extension: markdown(), +}; + +export const PYTHON_LANGUAGE: LanguageExtension = { + name: "Python", + extension: python(), +}; + +export const XML_LANGUAGE: LanguageExtension = { + name: "XML", + extension: xml(), +}; + +export const PLAIN_TEXT_LANGUAGE: LanguageExtension = { + name: "Text", + extension: [], +}; + +/** + * Resolves a CodeMirror language preset from a MIME content type and file name. + * Falls back to plain text when no specific language matches. + */ +export function getLanguageForContentType(contentType: string, fileName: string): LanguageExtension { + const ext = fileName.split(".").pop()?.toLowerCase() ?? ""; + + if (contentType.includes("json") || ext === "json") return JSON_LANGUAGE; + if (contentType.includes("yaml") || ext === "yaml" || ext === "yml") return YAML_LANGUAGE; + if (contentType.includes("xml") || ext === "xml") return XML_LANGUAGE; + if (contentType.startsWith("text/markdown") || ext === "md" || ext === "mdx") return MARKDOWN_LANGUAGE; + if (contentType.startsWith("application/x-python") || contentType.startsWith("text/x-python") || ext === "py") + return PYTHON_LANGUAGE; + + return PLAIN_TEXT_LANGUAGE; +} diff --git a/src/ui/src/components/data-table/table-states.tsx b/src/ui/src/components/data-table/table-states.tsx index ffa3d3273..60b6e986a 100644 --- a/src/ui/src/components/data-table/table-states.tsx +++ b/src/ui/src/components/data-table/table-states.tsx @@ -47,6 +47,8 @@ interface TableLoadingSkeletonProps { rows?: number; /** Height of each skeleton row */ rowHeight?: number; + /** Column header labels — shows actual text instead of skeleton bars */ + headers?: string[]; /** Additional CSS class for the container */ className?: string; } @@ -58,6 +60,8 @@ interface TableErrorStateProps { title?: string; /** Callback when retry button is clicked */ onRetry?: () => void; + /** Column header labels — renders a header row above the error content */ + headers?: string[]; /** Additional CSS class for the container */ className?: string; } @@ -76,6 +80,7 @@ export function TableLoadingSkeleton({ rows = 10, rowHeight = 48, columnCount = 5, + headers, className, }: TableLoadingSkeletonProps) { return ( @@ -83,6 +88,7 @@ export function TableLoadingSkeleton({ rowCount={rows} rowHeight={rowHeight} columnCount={columnCount} + headers={headers} className={className} /> ); @@ -97,9 +103,22 @@ export function TableLoadingSkeleton({ * * Displays an error message with optional retry button. */ -export function TableErrorState({ error, title = "Unable to load data", onRetry, className }: TableErrorStateProps) { +export function TableErrorState({ + error, + title = "Unable to load data", + onRetry, + headers, + className, +}: TableErrorStateProps) { return ( -
+
+ {headers && ( + + )}
{title}
{error.message}
diff --git a/src/ui/src/components/filter-bar/filter-bar.css b/src/ui/src/components/filter-bar/filter-bar.css index abc27999b..aa8122a25 100644 --- a/src/ui/src/components/filter-bar/filter-bar.css +++ b/src/ui/src/components/filter-bar/filter-bar.css @@ -75,7 +75,7 @@ /* FilterBar input needs wider min-width to accommodate filter expressions */ .fb-input { - min-width: 150px; + min-width: 70px; } .fb-clear-button { diff --git a/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx b/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx index d297e0967..2e3c5e741 100644 --- a/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx +++ b/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx @@ -1,52 +1,57 @@ -// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 +//SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. + +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at + +//http://www.apache.org/licenses/LICENSE-2.0 + +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. + +//SPDX-License-Identifier: Apache-2.0 /** * Dataset Detail Content (Client Component) * - * Google Drive-style file browser for a dataset version or collection. + * Side-by-side layout: file browser (left, flex-1) + toggleable file preview panel (right). + * Dataset details open in the layout-level overlay panel (DatasetsPanelLayout). * - * For datasets: ?version= selects which version's files to browse. - * For collections: the file browser root lists member datasets; navigating into - * one sets ?path= to the member ID (e.g., "imagenet-1k:2"), and deeper paths - * are within that member's file manifest. + * File preview panel state: + * closed ──[click file]──► open (file preview) + * open ────[click file]──► open (update preview) + * open ────[X / Esc]─────► closed * - * URL state: ?path= (current directory), ?version= (dataset version), ?file= (selected file) + * URL state: ?path= (current dir), ?version= (dataset version), ?file= (selected file) */ "use client"; -import { useState, useMemo, useCallback, useRef } from "react"; +import { useState, useMemo, useCallback, useRef, useEffect, useLayoutEffect } from "react"; +import { usePrevious } from "@react-hookz/web"; +import type { SearchChip } from "@/components/filter-bar/lib/types"; import { usePage } from "@/components/chrome/page-context"; import { InlineErrorBoundary } from "@/components/error/inline-error-boundary"; import { Button } from "@/components/shadcn/button"; -import { cn } from "@/lib/utils"; +import { GripVertical } from "lucide-react"; +import { cn, naturalCompare } from "@/lib/utils"; import { useResizeDrag } from "@/components/panel/hooks/use-resize-drag"; +import { usePanelAnimation } from "@/components/panel/hooks/use-panel-animation"; import { FileBrowserBreadcrumb } from "@/features/datasets/detail/components/file-browser-breadcrumb"; -import { FileBrowserControls } from "@/features/datasets/detail/components/file-browser-controls"; +import { FileBrowserControlStrip } from "@/features/datasets/detail/components/file-browser-control-strip"; import { FileBrowserTable } from "@/features/datasets/detail/components/file-browser-table"; import { FilePreviewPanel } from "@/features/datasets/detail/components/file-preview-panel"; import { useDatasetsPanelContext } from "@/features/datasets/layout/datasets-panel-context"; -import { useDatasetDetail } from "@/features/datasets/detail/hooks/use-dataset-detail"; import { useFileBrowserState } from "@/features/datasets/detail/hooks/use-file-browser-state"; -import { useDatasetFiles } from "@/lib/api/adapter/datasets-hooks"; -import { buildDirectoryListing } from "@/lib/api/adapter/datasets"; +import { useDataset, useDatasetFiles } from "@/lib/api/adapter/datasets-hooks"; +import { buildDirectoryListing, binarySearchByPath } from "@/lib/api/adapter/datasets"; +import { searchManifest, searchByExtension } from "@/lib/api/adapter/dataset-search"; import { DatasetType } from "@/lib/api/generated"; -import type { SwitcherItem } from "@/features/datasets/detail/components/version-switcher"; import type { DatasetFile } from "@/lib/api/adapter/datasets"; +import "@/components/panel/resizable-panel.css"; interface Props { bucket: string; @@ -58,7 +63,7 @@ export function DatasetDetailContent({ bucket, name }: Props) { // Dataset/collection metadata // ========================================================================== - const { detail, error: datasetError, refetch: refetchDataset } = useDatasetDetail(bucket, name); + const { data: detail, error: datasetError, refetch: refetchDataset } = useDataset(bucket, name); // ========================================================================== // URL state: path, version (datasets only), selected file @@ -66,12 +71,110 @@ export function DatasetDetailContent({ bucket, name }: Props) { const { path, version, selectedFile, navigateTo, setVersion, selectFile, clearSelection } = useFileBrowserState(); + // ========================================================================== + // File filter state — chip-based (no debounce needed; chips commit on Enter) + // ========================================================================== + + const [filterChips, setFilterChips] = useState([]); + + // Reset filter when the user navigates to a different directory or version. + // Uses the same usePrevious pattern as previewPanelOpen sync above (derived-state + // during render) to avoid calling setState inside a useEffect body. + const prevFilterPath = usePrevious(path); + const prevFilterVersion = usePrevious(version); + if (prevFilterPath !== undefined && (prevFilterPath !== path || prevFilterVersion !== version)) { + if (filterChips.length > 0) setFilterChips([]); + } + + // ========================================================================== + // File preview panel state + // ========================================================================== + + // Lazy init: if the URL already has file= on mount (e.g. shared link), open immediately. + const [previewPanelOpen, setPreviewPanelOpen] = useState(() => selectedFile !== null); + + // Derived-state sync: keep previewPanelOpen in sync when file= URL param changes externally + // (browser back/forward, shared link, navigation). + const prevSelectedFile = usePrevious(selectedFile); + // file= cleared → close preview + if (prevSelectedFile != null && selectedFile === null && previewPanelOpen) { + setPreviewPanelOpen(false); + } + // file= added while preview closed → open + if (prevSelectedFile === null && selectedFile !== null && !previewPanelOpen) { + setPreviewPanelOpen(true); + } + + // Click a file row → open file preview (or replace current preview) + const handleSelectFile = useCallback( + (filePath: string) => { + selectFile(filePath); + setPreviewPanelOpen(true); + }, + [selectFile], + ); + + // Close preview panel (X button, Esc) + // clearSelection() is deferred to the animation onClosed callback so the file + // preview stays visible inside the panel while it slides out. + const handleClosePanel = useCallback(() => { + setPreviewPanelOpen(false); + }, []); + + // ========================================================================== + // Details overlay panel — controlled by the layout-level DatasetsPanelLayout + // ========================================================================== + + const { isPanelOpen, openPanel, closePanel } = useDatasetsPanelContext(); + + // Priority-ordered Esc: details panel closes first, file preview closes second. + // Used by both the global keydown listener and FileBrowserTable's Esc shortcut. + const handleEscapeKey = useCallback(() => { + if (isPanelOpen) { + closePanel(); + } else if (previewPanelOpen) { + handleClosePanel(); + } + }, [isPanelOpen, closePanel, previewPanelOpen, handleClosePanel]); + + // Global Esc — fires from any focus position + useEffect(() => { + const onKeyDown = (e: KeyboardEvent) => { + if (e.key !== "Escape" || e.defaultPrevented) return; + if (!isPanelOpen && !previewPanelOpen) return; + handleEscapeKey(); + }; + document.addEventListener("keydown", onKeyDown); + return () => document.removeEventListener("keydown", onKeyDown); + }, [isPanelOpen, previewPanelOpen, handleEscapeKey]); + + const handleDetailsToggle = useCallback(() => { + if (isPanelOpen) { + closePanel(); + } else { + openPanel(bucket, name, version ?? null); + } + }, [isPanelOpen, openPanel, closePanel, bucket, name, version]); + + const handleViewAllVersions = useCallback(() => { + // Defer to a microtask so the Popover-close render (setOpen(false)) commits first. + // Without this, React batches both updates into one render; usePrevious(phase) + // then returns "closing" instead of "closed", which bypasses ResizablePanel's + // useLayoutEffect reflow trick and causes the panel to appear without its slide-in. + queueMicrotask(() => openPanel(bucket, name, version ?? null)); + }, [openPanel, bucket, name, version]); + + const handleNavigateUp = useCallback(() => { + if (!path) return; + navigateTo(path.split("/").slice(0, -1).join("/")); + }, [path, navigateTo]); + // ========================================================================== // Resolve location + files based on type // ========================================================================== const { - switcherItems, + versions, location, files: virtualFiles, memberSubPath, @@ -79,7 +182,7 @@ export function DatasetDetailContent({ bucket, name }: Props) { } = useMemo(() => { if (!detail) { return { - switcherItems: [] as SwitcherItem[], + versions: [], location: null as string | null, files: null as DatasetFile[] | null, memberSubPath: "", @@ -88,16 +191,11 @@ export function DatasetDetailContent({ bucket, name }: Props) { } if (detail.type === DatasetType.DATASET) { - const sorted = [...detail.versions].sort((a, b) => parseInt(a.version, 10) - parseInt(b.version, 10)); + const sorted = [...detail.versions].sort((a, b) => naturalCompare(a.version, b.version)); const latestVersion = sorted.at(-1) ?? null; - const items: SwitcherItem[] = sorted.map((v) => ({ - id: v.version, - label: `v${v.version}`, - isLatest: v.version === latestVersion?.version, - })); const currentVersionData = (version ? sorted.find((v) => v.version === version) : null) ?? latestVersion; return { - switcherItems: items, + versions: detail.versions, location: currentVersionData?.location ?? null, files: null, memberSubPath: path, @@ -121,7 +219,7 @@ export function DatasetDetailContent({ bucket, name }: Props) { size: m.size, })); return { - switcherItems: [] as SwitcherItem[], + versions: [], location: null, files: memberEntries, memberSubPath: "", @@ -134,7 +232,7 @@ export function DatasetDetailContent({ bucket, name }: Props) { const member = detail.members.find((m) => m.id === memberId) ?? null; const subPath = path.split("/").slice(1).join("/"); return { - switcherItems: [] as SwitcherItem[], + versions: [], location: member?.location ?? null, files: null, memberSubPath: subPath, @@ -143,87 +241,182 @@ export function DatasetDetailContent({ bucket, name }: Props) { }, [detail, version, path]); // ========================================================================== - // File listing — fetch manifest for selected version/member, filter client-side + // File listing — fetch manifest for selected version/member // ========================================================================== const { - data: rawFiles, + data: manifest, isLoading: isFilesLoading, error: filesError, refetch: refetchFiles, } = useDatasetFiles(location); - // Build directory listing for the current path - const files = useMemo( - () => virtualFiles ?? buildDirectoryListing(rawFiles ?? [], memberSubPath), - [virtualFiles, rawFiles, memberSubPath], + // Normal (unfiltered) directory listing — used for FilterBar suggestions and as base view + const normalFiles = useMemo( + () => virtualFiles ?? buildDirectoryListing(manifest?.byPath ?? [], memberSubPath), + [virtualFiles, manifest, memberSubPath], ); + // Apply filter chips to produce the displayed file list. + // "search:" chip → recursive prefix search; "type:" chip → recursive extension filter. + // When both are present, apply extension filter as an AND on the prefix search results. + const { filteredFiles } = useMemo(() => { + const searchChip = filterChips.find((c) => c.field === "file"); + const typeChip = filterChips.find((c) => c.field === "type"); + + if (!searchChip && !typeChip) return { filteredFiles: normalFiles, capped: false }; + if (!manifest) return { filteredFiles: [] as DatasetFile[], capped: false }; + + if (searchChip && typeChip) { + // AND: prefix-search first, then filter results by extension + const { files, capped: searchCapped } = searchManifest(manifest, memberSubPath, searchChip.value); + const suffix = `.${typeChip.value.toLowerCase()}`; + return { filteredFiles: files.filter((f) => f.name.toLowerCase().endsWith(suffix)), capped: searchCapped }; + } + if (searchChip) { + const { files, capped: searchCapped } = searchManifest(manifest, memberSubPath, searchChip.value); + return { filteredFiles: files, capped: searchCapped }; + } + // typeChip only + const { files, capped: extCapped } = searchByExtension(manifest, memberSubPath, typeChip!.value); + return { filteredFiles: files, capped: extCapped }; + }, [filterChips, manifest, normalFiles, memberSubPath]); + + const handleRetryFiles = useCallback(() => void refetchFiles(), [refetchFiles]); + // ========================================================================== - // File preview panel — side-by-side split with drag-to-resize + // Resolve selected file data for the right panel + // + // First checks the current file list (fastest, has full metadata). + // Falls back to a direct manifest lookup so the panel stays visible + // when the user navigates to a different folder while a file is selected. // ========================================================================== - const containerRef = useRef(null); - const [previewPanelWidth, setPreviewPanelWidth] = useState(35); + const panelFileData = useMemo((): DatasetFile | null => { + if (!selectedFile) return null; + const fileName = selectedFile.split("/").pop() ?? ""; - const [closedForFile, setClosedForFile] = useState(null); - const previewPanelOpen = !!selectedFile && closedForFile !== selectedFile; + // Prefer current file list entry (has all derived fields) + const fromDir = filteredFiles.find((f) => f.name === fileName && f.type === "file"); + if (fromDir) return fromDir; - const handleClearSelection = useCallback(() => { - if (previewPanelOpen && selectedFile) { - setClosedForFile(selectedFile); - } else { - clearSelection(); - if (document.activeElement instanceof HTMLElement) { - document.activeElement.blur(); - } - } - }, [previewPanelOpen, selectedFile, clearSelection]); + // Fall back to full manifest so preview survives directory navigation (binary search, O(log n)) + const idx = manifest ? binarySearchByPath(manifest.byPath, selectedFile) : -1; + const raw = manifest?.byPath[idx]?.relative_path === selectedFile ? manifest.byPath[idx] : undefined; + if (!raw) return null; + return { + name: fileName, + type: "file", + size: raw.size, + checksum: raw.etag, + url: raw.url, + relativePath: raw.relative_path, + storagePath: raw.storage_path, + }; + }, [selectedFile, filteredFiles, manifest]); - const { isDragging, bindResizeHandle, dragStyles } = useResizeDrag({ - width: previewPanelWidth, - onWidthChange: setPreviewPanelWidth, - minWidth: 20, - maxWidth: 70, - containerRef, - }); - - const selectedFileData = useMemo(() => { - if (!selectedFile) return null; - const fileName = selectedFile.split("/").pop() ?? ""; - return files.find((f) => f.name === fileName && f.type === "file") ?? null; - }, [selectedFile, files]); + // Derive the file's own directory from the URL param so the copy path + // is always correct regardless of which directory is currently browsed. + const fileDirPath = selectedFile ? selectedFile.split("/").slice(0, -1).join("/") : ""; // ========================================================================== - // Details panel — controlled by the layout-level DatasetsPanelLayout + // Panel slide animation — drives mount lifecycle + translateX transitions. + // clearSelection() is deferred to onClosed so the preview stays visible + // inside the panel while it slides out. // ========================================================================== - const { isPanelOpen, openPanel, closePanel } = useDatasetsPanelContext(); + const panelRef = useRef(null); - const handleToggleDetails = useCallback(() => { - if (isPanelOpen) { - closePanel(); - } else { - openPanel(bucket, name); + const { + phase, + shellMounted, + panelSlideIn, + contentMounted, + contentState, + contentRef, + handleContentAnimationEnd, + handlePanelTransitionEnd, + } = usePanelAnimation(previewPanelOpen, clearSelection); + + const prevPhase = usePrevious(phase); + + // When the panel finishes opening or closing, fire layout-stable callbacks so + // the table recalculates column widths for its new size. + useEffect(() => { + if ((phase === "open" && prevPhase === "opening") || (phase === "closed" && prevPhase === "closing")) { + for (const cb of layoutStableCallbacksRef.current) cb(); + } + }, [phase, prevPhase]); + + // Both open and close use the same reflow trick so the CSS transition always + // starts from the correct position (before browser paint, unlike useEffect). + // + // Open: panel is flex child (table shrinks), set 100% → reflow → 0 + // Close: panel is absolute (table expands), reset 100% → 0 → reflow → 100% + useLayoutEffect(() => { + if (!panelRef.current) return; + const panel = panelRef.current; + + if (phase === "opening" && prevPhase === "closed") { + panel.style.transform = "translateX(100%)"; + void panel.offsetHeight; + panel.style.transform = "translateX(0)"; } - }, [isPanelOpen, openPanel, closePanel, bucket, name]); - const handleNavigateUp = useCallback(() => { - if (!path) return; - navigateTo(path.split("/").slice(0, -1).join("/")); - }, [path, navigateTo]); + if (phase === "closing" && prevPhase === "open") { + panel.style.transform = "translateX(0)"; + void panel.offsetHeight; + panel.style.transform = "translateX(100%)"; + } + }, [phase, prevPhase]); - const handleRefetchFiles = useCallback(() => { - void refetchFiles(); - }, [refetchFiles]); + // ========================================================================== + // Resizable split between file browser and right panel + // ========================================================================== + + const containerRef = useRef(null); + const [rightPanelWidth, setRightPanelWidth] = useState(35); + + // Callbacks registered by the file browser table's column sizer. + // Called when gutter drag ends so the table recalculates column widths at final size. + const layoutStableCallbacksRef = useRef void>>(new Set()); + + const registerLayoutStableCallback = useCallback((callback: () => void) => { + layoutStableCallbacksRef.current.add(callback); + return () => layoutStableCallbacksRef.current.delete(callback); + }, []); + + const { isDragging, bindResizeHandle } = useResizeDrag({ + width: rightPanelWidth, + onWidthChange: setRightPanelWidth, + minWidth: 20, + maxWidth: 70, + containerRef, + onDragEnd: () => { + for (const cb of layoutStableCallbacksRef.current) cb(); + }, + }); // ========================================================================== - // Chrome: breadcrumbs + inline path + controls + // Chrome: static breadcrumbs (Datasets > bucket > name) + // Path segments live in the control strip breadcrumb below. // ========================================================================== + usePage({ + title: name, + breadcrumbs: [ + { label: "Datasets", href: "/datasets" }, + { label: bucket, href: `/datasets?f=bucket:${encodeURIComponent(bucket)}` }, + ], + }); + // For collections, don't pass rawFiles to breadcrumb (disables sibling popovers - // which don't make sense for member-level segments) - const breadcrumbRawFiles = detail?.type === DatasetType.COLLECTION ? undefined : (rawFiles ?? undefined); + // which don't make sense for member-level segments). + // Collections also pin the first path segment (member dataset name) so it stays + // visible even when deeper folders collapse into the ellipsis. + const isCollection = detail?.type === DatasetType.COLLECTION; + const breadcrumbRawFiles = isCollection ? undefined : (manifest?.byPath ?? undefined); + const breadcrumbPinnedPrefixCount = isCollection ? 1 : 0; const breadcrumbTrail = useMemo( () => ( @@ -233,34 +426,12 @@ export function DatasetDetailContent({ bucket, name }: Props) { onNavigate={navigateTo} rawFiles={breadcrumbRawFiles} segmentLabels={Object.keys(segmentLabels).length > 0 ? segmentLabels : undefined} + pinnedPrefixCount={breadcrumbPinnedPrefixCount} /> ), - [name, path, navigateTo, breadcrumbRawFiles, segmentLabels], - ); - - const headerControls = useMemo( - () => ( - - ), - [switcherItems, version, setVersion, isPanelOpen, handleToggleDetails], + [name, path, navigateTo, breadcrumbRawFiles, segmentLabels, breadcrumbPinnedPrefixCount], ); - usePage({ - title: "", - breadcrumbs: [ - { label: "Datasets", href: "/datasets" }, - { label: bucket, href: `/datasets?f=bucket:${encodeURIComponent(bucket)}` }, - ], - trailingBreadcrumbs: breadcrumbTrail, - headerActions: headerControls, - }); - // ========================================================================== // Error state — dataset/collection failed to load // ========================================================================== @@ -290,28 +461,25 @@ export function DatasetDetailContent({ bucket, name }: Props) { // File listing content — handles query error inline // ========================================================================== - const fileTableContent = filesError ? ( -
-

Failed to load files.

- -
- ) : ( + // Filter input is shown for datasets and for collections when browsing inside a member. + // Hidden on the collection root view (which shows virtual dataset-member entries, not real files). + const showFilter = !isCollection || path !== ""; + + const fileTableContent = ( 0} path={path} selectedFile={selectedFile} onNavigate={navigateTo} - onSelectFile={selectFile} + onSelectFile={handleSelectFile} onNavigateUp={handleNavigateUp} - onClearSelection={handleClearSelection} - previewOpen={previewPanelOpen} + onClearSelection={handleEscapeKey} isLoading={isFilesLoading && !virtualFiles} + error={filesError} + onRetry={handleRetryFiles} + suspendResize={isDragging} + registerLayoutStableCallback={registerLayoutStableCallback} /> ); @@ -320,42 +488,95 @@ export function DatasetDetailContent({ bucket, name }: Props) { // ========================================================================== return ( -
+
+ {/* Control strip */} + + + {/* File browser + optional file preview panel */}
+ {/* File browser — fills remaining width */}
{fileTableContent}
- {previewPanelOpen && ( + {shellMounted && ( <> + {/* Resize gutter — hidden instantly on close (frees flex space for the table) */}
+ aria-valuenow={rightPanelWidth} + > +
+ + {/* File preview panel — slides in/out via translateX */} diff --git a/src/ui/src/features/datasets/detail/components/file-browser-breadcrumb.tsx b/src/ui/src/features/datasets/detail/components/file-browser-breadcrumb.tsx index 533541f92..e3f98929b 100644 --- a/src/ui/src/features/datasets/detail/components/file-browser-breadcrumb.tsx +++ b/src/ui/src/features/datasets/detail/components/file-browser-breadcrumb.tsx @@ -17,28 +17,31 @@ /** * FileBrowserBreadcrumb — In-browser path navigation for the dataset file browser. * - * Renders: > datasetName > segment > segment > ... + * Renders: datasetName > segment > segment > ... * - * Designed to be placed in the chrome header's `trailingBreadcrumbs` slot (inline in the nav - * after the standard page breadcrumbs). The leading ChevronRight is included so it flows - * seamlessly after "bucket" in the breadcrumb trail. + * Intended to be placed in FileBrowserControlStrip's breadcrumb slot. + * The separator between the VersionPicker (datasets only) and this breadcrumb + * is owned by FileBrowserControlStrip — this component renders no leading chevron. * * - Dataset name links to file browser root (path="") * - Each path segment opens a popover listing sibling folders (when rawFiles provided) * - Deep paths (> 2 segments) collapse to: datasetName > … > parent > current + * The ellipsis is non-interactive; the immediate parent is always shown. */ "use client"; import { memo, useMemo } from "react"; -import { Button } from "@/components/shadcn/button"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/shadcn/popover"; import { ChevronRight, Folder, Check } from "lucide-react"; import { cn } from "@/lib/utils"; import { buildDirectoryListing } from "@/lib/api/adapter/datasets"; import type { RawFileItem } from "@/lib/api/adapter/datasets"; -/** Show all segments when depth ≤ this; collapse with ellipsis when deeper. */ +/** + * How many trailing segments are always visible (parent + current folder). + * Collapse triggers when the non-pinned segment count exceeds this. + */ const COLLAPSE_THRESHOLD = 2; // ============================================================================= @@ -46,59 +49,46 @@ const COLLAPSE_THRESHOLD = 2; // ============================================================================= interface SiblingPopoverProps { - /** The name of the current segment (highlighted in the list) */ + /** The name of the current (last) segment */ segment: string; /** The parent directory path used to compute siblings */ parentPath: string; /** Full flat file manifest */ rawFiles: RawFileItem[]; - /** Whether this is the last (current) segment */ - isCurrent: boolean; /** Called to navigate to a sibling folder */ onNavigate: (path: string) => void; } -function SiblingPopover({ segment, parentPath, rawFiles, isCurrent, onNavigate }: SiblingPopoverProps) { +function SiblingPopover({ segment, parentPath, rawFiles, onNavigate }: SiblingPopoverProps) { const siblings = useMemo( () => buildDirectoryListing(rawFiles, parentPath).filter((f) => f.type === "folder"), [rawFiles, parentPath], ); - // Fall back to plain text for the current segment when no siblings exist + // Fall back to plain text when no siblings exist if (siblings.length === 0) { - return isCurrent ? ( + return ( {segment} - ) : null; + ); } return ( - {isCurrent ? ( - // Plain - ) : ( - - )} + {/* Plain ; + /** + * Number of leading path segments to pin (always show even when collapsed). + * Default 0 (datasets). Use 1 for collections so the member dataset name stays visible: + * Collection + 1 level: collectionName > datasetName > folder + * Collection + 2+ levels: collectionName > datasetName > … > folder + */ + pinnedPrefixCount?: number; } /** @@ -168,29 +165,43 @@ interface FileBrowserBreadcrumbProps { * Includes a leading ChevronRight separator so it flows after the preceding chrome breadcrumbs. * Intended to be placed in the `trailingBreadcrumbs` slot of `usePage()`. */ +type SegmentItem = { kind: "segment"; segment: string; absoluteIndex: number }; +type EllipsisItem = { kind: "ellipsis" }; +type BreadcrumbItem = SegmentItem | EllipsisItem; + export const FileBrowserBreadcrumb = memo(function FileBrowserBreadcrumb({ datasetName, path, onNavigate, rawFiles, segmentLabels, + pinnedPrefixCount = 0, }: FileBrowserBreadcrumbProps) { const segments = path ? path.split("/").filter(Boolean) : []; - // When deeply nested, show only the last COLLAPSE_THRESHOLD segments - const collapsed = segments.length > COLLAPSE_THRESHOLD; - const visibleSegments = collapsed ? segments.slice(-COLLAPSE_THRESHOLD) : segments; - const visibleOffset = collapsed ? segments.length - COLLAPSE_THRESHOLD : 0; + // Collapse when non-prefix segments exceed the threshold. + // pinnedPrefixCount=0 (dataset): collapse at depth > 2 → name > … > parent > folder + // pinnedPrefixCount=1 (collection): collapse at depth > 3 → name > member > … > parent > folder + const collapsed = segments.length > pinnedPrefixCount + COLLAPSE_THRESHOLD; + + const pinnedSegments = segments.slice(0, pinnedPrefixCount); + + // Build the ordered list of items to render. When collapsed, the ellipsis sentinel is + // inserted AFTER the pinned prefix and BEFORE the two trailing segments — preserving correct order: + // dataset: name > [ellipsis] > parent > folder + // collection: name > member > [ellipsis] > parent > folder + const items: BreadcrumbItem[] = collapsed + ? [ + ...pinnedSegments.map((seg, i): SegmentItem => ({ kind: "segment", segment: seg, absoluteIndex: i })), + { kind: "ellipsis" }, + { kind: "segment", segment: segments[segments.length - 2], absoluteIndex: segments.length - 2 }, + { kind: "segment", segment: segments[segments.length - 1], absoluteIndex: segments.length - 1 }, + ] + : segments.map((seg, i): SegmentItem => ({ kind: "segment", segment: seg, absoluteIndex: i })); return ( <> - {/* Separator between preceding chrome breadcrumbs and dataset name */} -