Skip to content

Latest commit

 

History

History
52 lines (44 loc) · 1.21 KB

File metadata and controls

52 lines (44 loc) · 1.21 KB
%%{init: {
  "theme": "default",
  "themeVariables": {
    "primaryTextColor": "#000000",
    "lineColor": "#333333",
    "fontSize": "16px"
  }
}}%%

graph TD
    A["Input Audio
(e.g., song.mp3)"] --> B["CLAP Model
(Audio to V/A Scores)"];
    B --> C["Audio V/A Scores"];

    D["Optional:
Provided Lyrics"];
    E["ASR Model
(e.g., Whisper)"];
    F["Transcribed Text"];
    G["NLI Sentiment Model
(Text to V/A Scores)"];
    H["Text V/A Scores"];

    D -- "If lyrics provided" --> G;
    A -- "If no lyrics, for ASR" --> E;
    E --> F;
    F -- "If ASR used" --> G;
    
    G --> H;

    C --> I["Output Dictionary:
{'audio': {valence, arousal},
 'text':  {valence, arousal}}"];
    H --> I;
    
    I --> J["Visualization:
Circumplex Plot
(2D space: valence x arousal)
- Audio point
- Text point"];

    %% Aplicando classes aos nós
    class A,D,E,F,I,J default;
    class B,G model;
    class C,H scores;

    %% Definições de classe para cores (versão para tema claro)
    classDef default fill:#DDEFFD,stroke:#333333,stroke-width:1px,color:#000000;
    classDef model fill:#C9D4F9,stroke:#333333,stroke-width:1px,color:#000000;
    classDef scores fill:#D4F9D8,stroke:#333333,stroke-width:1px,color:#000000;
Loading