From e9e28575289671aed898d4e5c283d8c05c9d94b4 Mon Sep 17 00:00:00 2001 From: Kunal-Somani Date: Fri, 3 Apr 2026 22:52:29 +0530 Subject: [PATCH] feat(schema): add FacialEmotionOutput for multimodal engine integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds schemas/multimodal_output_schema.py — a Pydantic wrapper around GetEmotionPercentagesResponse that standardizes the facial API output for consumption by the MultimodalSentimentEngine in sentiment-analysis-api. Key additions: - FacialEmotionOutput.from_response(): converts GetEmotionPercentagesResponse into a validated Pydantic model with field-level range constraints (0-100%) - to_multimodal_dict(): produces the {emotion_label: percentage} dict format expected by MultimodalSentimentEngine.analyze(facial_emotions=...) - dominant_emotion(): returns the top emotion and its confidence as a (label, float) tuple — useful for logging and single-label summaries This is the integration boundary between the two RUXAILAB sentiment repos: the facial API produces GetEmotionPercentagesResponse, FacialEmotionOutput converts it, and the multimodal engine fuses it with text and prosody. Closes the gap identified in PR #21 and #22 where the two pipelines had no shared contract. --- schemas/multimodal_output_schema.py | 101 ++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 schemas/multimodal_output_schema.py diff --git a/schemas/multimodal_output_schema.py b/schemas/multimodal_output_schema.py new file mode 100644 index 0000000..1dbc584 --- /dev/null +++ b/schemas/multimodal_output_schema.py @@ -0,0 +1,101 @@ +""" +Standardized output schema for multimodal sentiment fusion. + +Converts the facial-sentiment-analysis-api emotion percentage output +into the dict format expected by MultimodalSentimentEngine.analyze() +in the sentiment-analysis-api repo. + +Usage: + from schemas.multimodal_output_schema import FacialEmotionOutput + + result = GetEmotionPercentagesResponse(...) + payload = FacialEmotionOutput.from_response(result) + # payload.to_multimodal_dict() → ready for /multimodal/analyze +""" + +from pydantic import BaseModel, Field +from schemas.emotion_schema import GetEmotionPercentagesResponse + + +class FacialEmotionOutput(BaseModel): + """ + Wraps GetEmotionPercentagesResponse and exposes a conversion method + that produces the facial_emotions dict expected by the + MultimodalSentimentEngine. + + The MultimodalSentimentEngine expects: + {emotion_label: percentage_float, ...} + where percentage values are in the range 0.0 to 100.0 and labels + match the 7-class taxonomy used by the facial CNN model. + """ + + Angry: float = Field(ge=0.0, le=100.0) + Disgusted: float = Field(ge=0.0, le=100.0) + Fearful: float = Field(ge=0.0, le=100.0) + Happy: float = Field(ge=0.0, le=100.0) + Neutral: float = Field(ge=0.0, le=100.0) + Sad: float = Field(ge=0.0, le=100.0) + Surprised: float = Field(ge=0.0, le=100.0) + + @classmethod + def from_response( + cls, response: GetEmotionPercentagesResponse + ) -> "FacialEmotionOutput": + """ + Build a FacialEmotionOutput from a GetEmotionPercentagesResponse. + + Args: + response: the Pydantic model returned by EmotionsAnalysisImp + + Returns: + FacialEmotionOutput instance ready for to_multimodal_dict() + """ + return cls( + Angry=response.Angry, + Disgusted=response.Disgusted, + Fearful=response.Fearful, + Happy=response.Happy, + Neutral=response.Neutral, + Sad=response.Sad, + Surprised=response.Surprised, + ) + + def to_multimodal_dict(self) -> dict: + """ + Convert to the facial_emotions dict format expected by + MultimodalSentimentEngine.analyze(). + + Returns: + dict of {emotion_label: percentage_float} + Example: + { + 'Angry': 2.5, + 'Disgusted': 0.0, + 'Fearful': 1.2, + 'Happy': 61.3, + 'Neutral': 28.0, + 'Sad': 5.0, + 'Surprised': 2.0 + } + """ + return { + 'Angry': self.Angry, + 'Disgusted': self.Disgusted, + 'Fearful': self.Fearful, + 'Happy': self.Happy, + 'Neutral': self.Neutral, + 'Sad': self.Sad, + 'Surprised': self.Surprised, + } + + def dominant_emotion(self) -> tuple: + """ + Return the emotion with the highest percentage and its value. + + Returns: + (label: str, percentage: float) + Example: ('Happy', 61.3) + """ + emotions = self.to_multimodal_dict() + label = max(emotions, key=emotions.get) + return label, emotions[label]