rllm/mkdocs.yml at main · fw-ai-external/rllm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
site_name: ""
site_description: A library for training LLM agents with reinforcement learning
repo_url: https://github.com/rllm-org/rllm
docs_dir: docs
theme:
  name: material
  logo: assets/rllm_logo_white.png  # use a white or transparent version if possible
  favicon: assets/rllm_logo.png
  palette:
    - scheme: default
      primary: custom
      accent: custom
      toggle:
        icon: material/brightness-7
        name: Switch to dark mode
    - scheme: slate
      primary: custom
      accent: custom
      toggle:
        icon: material/brightness-4
        name: Switch to light mode
  features:
    - navigation.instant
    - navigation.tracking
    - navigation.tabs
    - navigation.sections
    - navigation.top
    - navigation.expand
    - navigation.indexes
    - content.code.copy
    - content.code.annotate

extra_css:
  - stylesheets/extra.css

plugins:
  - search
  - mkdocstrings:
      handlers:
        python:
          paths: [../]
          options:
            docstring_style: google
            show_source: true
            show_root_heading: true
            show_root_toc_entry: true
            show_signature_annotations: true
            separate_signature: true
            show_symbol_type_heading: true
            show_symbol_type_toc: true
            members_order: source

watch:
  - ./rllm/

markdown_extensions:
  - pymdownx.highlight:
      anchor_linenums: true
  - pymdownx.inlinehilite
  - pymdownx.snippets:
      base_path: ['.', '..']
      check_paths: true
  - pymdownx.superfences:
      custom_fences:
        - name: mermaid
          class: mermaid
          format: !!python/name:pymdownx.superfences.fence_code_format
  - admonition
  - pymdownx.details
  - pymdownx.tabbed:
      alternate_style: true
  - tables
  - footnotes


nav:
  - Home:
      - Overview: index.md
      - Getting Started:
        - Installation Guide: getting-started/installation.md
        - Quick Start: getting-started/quick-start.md
      - Core Concepts:
        - rLLM's Main Components: core-concepts/overview.md
        - Agent and Environment: core-concepts/agent_env.md
        - AgentExecutionEngine for Trajectory Rollout: core-concepts/execution-engine.md
        - AgentWorkflowEngine for Episode Rollout: core-concepts/workflow-engine.md
        - AgentTrainer for RL Training: core-concepts/training.md
        - RL Algorithms for Agents: core-concepts/rl-algos.md
        - SDK Overview: core-concepts/sdk.md
      - Projects:
        - FinQA Financial Agent: projects/finqa.md
      - Examples:
        - Training a LangGraph Search Agent using the rLLM SDK: examples/sdk_langgraph_rag.md
        - RL Training with Tinker: examples/tinker_rl.md
        - VLM Training on Geo3k dataset: examples/vlm.md
        - Joint Training of Solver-Judge Workflow: examples/solver_judge.md
        - LoRA Training in rLLM (with verl): examples/gsm8k_lora.md
        - Eval Protocol Integration: core-concepts/eval-protocol.md
      - References:
        - Examples: examples/index.md
        - Projects: projects/index.md
        - API Reference: api/index.md
        - Contributing: contributing.md
        - Blogs: https://rllm-project.com/blog.html
  - Getting Started:
      - Installation: getting-started/installation.md
      - Quick Start: getting-started/quick-start.md
  - Core Concepts:
      - rLLM's Main Components: core-concepts/overview.md
      - Agent and Environment: core-concepts/agent_env.md
      - AgentExecutionEngine for Trajectory Rollout: core-concepts/execution-engine.md
      - AgentWorkflowEngine for Episode Rollout: core-concepts/workflow-engine.md
      - AgentTrainer for RL Training: core-concepts/training.md
      - RL Algorithms for Agents: core-concepts/rl-algos.md
      - SDK Overview: core-concepts/sdk.md
      - Eval Protocol Integration: core-concepts/eval-protocol.md
  - Examples:
      - Overview: examples/index.md
      - Math Agent with rLLM SDK: examples/sdk_math.md
      - Solver-Judge Workflow with rLLM SDK: examples/sdk_solver_judge.md
      - LangGraph RAG Agent with rLLM SDK: examples/sdk_langgraph_rag.md
      - RL Training with Tinker: examples/tinker_rl.md
      - Verifiers (Prime Intellect Hub): examples/verifiers.md
      - LoRA Training in rLLM (with verl): examples/gsm8k_lora.md
      - Solver-Judge Workflow: examples/solver_judge.md
      - Vision-Language Models (VLM): examples/vlm.md
      - DeepScaleR: examples/deepscaler.md
      - DeepCoder: examples/deepcoder.md
      - DeepSWE: examples/swe.md
      - Search Agent: examples/search.md
      - FrozenLake Agent: examples/frozenlake.md
      - Math SFT: examples/sft.md
  - Experimental:
      - Experimental Features: experimental/index.md
      - Unified Workflow Trainer:
        - Unified Training Loop: experimental/unified-trainer.md
        - Backend Protocol: experimental/backend-protocol.md
        - Configuration: experimental/rllm-and-backend-config.md
  - Projects:
      - Overview: projects/index.md
      - FinQA: projects/finqa.md
  - API Reference:
      - Overview: api/index.md
      - Agents:
          - Agent Base: api/agents/agent.md
          - Agent Utils: api/agents/utils.md
      - Environments:
          - Environment Base: api/environments/base.md
          - Environment Utils: api/environments/env_utils.md
      - Workflow:
          - Workflow Base: api/workflows/workflow.md
      - Engine:
          - Agent Execution Engine: api/engine/agent_execution_engine.md
          - Agent Workflow Engine: api/engine/agent_workflow_engine.md
      - Trainer:
          - Agent Trainer: api/trainer/agent_trainer.md
          - Ray Runtime Environment: api/trainer/ray_runtime_env.md
      - Tools:
          - Tool Base Classes: api/tools/tool_base.md
          - Tool Registry: api/tools/registry.md
          - Web Tools: api/tools/web_tools.md
          - Code Tools: api/tools/code_tools.md
      - Parser:
          - Tool Parsers: api/parser/tool_parser.md
          - Chat Parsers: api/parser/chat_parser.md
  - Contributing: contributing.md
  - Blogs: https://rllm-project.com/blog.html