TensorRT-LLM/.github/tava_architecture_diagram.md at main · brb-nv/TensorRT-LLM

graph TB
    subgraph "User API & CLI Tools"
        CLI[CLI Tools]
        LLMAPI[LLM API]
        CLI --> LLMAPI
    end

    subgraph "Model Checkpoint"
        Checkpoint[Huggingface Models]
        Checkpoint --> CLI
        Checkpoint --> LLMAPI
    end

    subgraph "TensorRT_Flow"
        trtllmExecutor[trtllm.Executor]
        Engine[TensorRT Engine]
        TRTGraph[TensorRT Graph]
        Plugins[TensorRT Plugins]
        cudaKernel[CUDA Kernel]
        Executor[Executor]
        LLMAPI --> trtllmExecutor
        trtllmExecutor --> |build|Engine
        trtllmExecutor --> |compile|TRTGraph
        trtllmExecutor --> |compile|Plugins
        Engine --> Executor
        Plugins --> Executor
        TRTGraph --> Executor
        Plugins --> cudaKernel
    end

    subgraph "PyTorch_Flow"
        PyExecutor[PyExecutor]
        PyEngine[PyTorch Engine]
        CustomOps[Custom Ops]
        PyTorchOps[Pytorch Ops]
        KernelLibs[Kernel Libs]
        PyScheduler[Scheduler]
        PyDecoder[Decoder]
        CUDAKernel[CUDA Kernel]
        LLMAPI --> PyExecutor
        PyExecutor --> PyEngine[PyTorch Engine]
        PyEngine --> CustomOps
        PyEngine --> PyTorchOps
        PyEngine --> KernelLibs
        PyEngine --> PyScheduler
        PyEngine --> PyDecoder
        KernelLibs --> CUDAKernel
        CustomOps --> CUDAKernel
    end

    subgraph "AutoDeploy_Flow"
        ADExecutor[ADExecutor]
        ADEngine[ADEngine]
        GraphTransforms[Graph Transforms]
        TorchExport[torch.export]
        LLMAPI --> ADExecutor
        ADExecutor --> ADEngine
        ADEngine --> GraphTransforms
        GraphTransforms --> TorchExport
    end

    subgraph "Visual_Gen_Flow"
        VisualGenAPI[VisualGen API]
        DiffusionClient[DiffusionRemoteClient]
        DiffusionExecutor[DiffusionExecutor]
        PipelineLoader[PipelineLoader]
        DiffusionPipeline[BasePipeline WAN]
        MediaOutput[MediaOutput]
        LLMAPI --> VisualGenAPI
        VisualGenAPI --> DiffusionClient
        DiffusionClient --> DiffusionExecutor
        DiffusionExecutor --> PipelineLoader
        PipelineLoader --> DiffusionPipeline
        DiffusionPipeline --> MediaOutput
    end

    subgraph "Shared_Component"
        Shared_Decoder[Decoder]
        Shared_Scheduler[Scheduler]
        Sampling[Sampling]
        BatchManager[Batch Manager]
        KVCache[KV Cache Manager]
        PyScheduler --> |Nanobind|Shared_Scheduler
        PyDecoder --> |Nanobind|Shared_Decoder
        ADExecutor --> Shared_Scheduler
        ADExecutor --> Shared_Decoder
        Shared_Decoder --> Sampling
        Executor --> Shared_Scheduler[Scheduler]
        Shared_Scheduler --> |In-flight Batching| BatchManager
        BatchManager --> KVCache
    end

    subgraph "Output_Results"
        Tokens[Generated Tokens]
        Stats[Performance Stats]
        Metrics[Accuracy Metrics]
        GenImages[Generated Images]
        GenVideos[Generated Videos]
    end

    PyTorch_Flow ~~~ TensorRT_Flow

    TensorRT_Flow --> Output_Results
    PyTorch_Flow --> Output_Results
    AutoDeploy_Flow --> Output_Results
    Visual_Gen_Flow --> Output_Results

    %% Force Output_Results to be between PyTorch_flow and TensorRT_flow
    PyTorch_Flow ~~~ Output_Results

    %% Model checkpoint format
    classDef checkpoint fill:#ff1,stroke:#333,stroke-width:2px;
    class Checkpoint checkpoint;

    %% CLI tools format
    classDef cli fill:#f9f,stroke:#333,stroke-width:2px;
    class CLI cli;

    %% TRT flow format
    classDef trt fill:#bbf,stroke:#333,stroke-width:2px;
    class trtllmExecutor,TRTGraph,Plugins,Engine,Executor,cudaKernel trt;

    %% PyTorch flow format
    classDef pytorch fill:#8bf,stroke:#333,stroke-width:2px;
    class PyExecutor,PyEngine,CustomOps,PyTorchOps,KernelLibs,PyScheduler,PyDecoder,CUDAKernel pytorch;

    %% AutoDeploy flow format
    classDef autodeploy fill:#9f8,stroke:#333,stroke-width:2px;
    class ADExecutor,ADEngine,GraphTransforms,TorchExport autodeploy;

    %% Visual Gen flow format
    classDef visualgen fill:#d9c,stroke:#333,stroke-width:2px;
    class VisualGenAPI,DiffusionClient,DiffusionExecutor,PipelineLoader,DiffusionPipeline,MediaOutput visualgen;

    %% Shared Componnet format
    classDef component fill:#fc8,stroke:#333,stroke-width:2px;
    class Shared_Decoder,Sampling,Shared_Scheduler,BatchManager,KVCache component;

    %% APIs format
    classDef api fill:#bfb,stroke:#333,stroke-width:2px;
    class PythonAPI,CppAPI,LLMAPI api;

    %% Results format
    classDef result fill:#fbb,stroke:#333,stroke-width:2px;
    class Tokens,Stats,Metrics,GenImages,GenVideos result;
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

tava_architecture_diagram.md

Latest commit

History

tava_architecture_diagram.md

File metadata and controls