11import torch
2- import numpy as np
3- import pandas as pd
4- from pathlib import Path
5- import logging
6- import joblib
7- import sys
82
9- # Add project root to path
10- project_root = str (Path (__file__ ).resolve ().parents [2 ])
11- if project_root not in sys .path :
12- sys .path .insert (0 , project_root )
13-
14- from pipelines .train_transformer_pipeline import TransformerModel
15-
16- # This dictionary will act as a global cache for loaded resources
17- cache = {}
18-
19- def load_resources ():
20- """Loads all necessary ML resources into the cache."""
21- if "model" in cache :
22- logging .info ("Resources already loaded." )
23- return
24-
25- logging .info ("Loading resources for Transformer model..." )
26- device = "cuda" if torch .cuda .is_available () else "cpu"
27- cache ["device" ] = device
28-
29- try :
30- cache ["x_scaler" ] = joblib .load (Path ("data/sequences_sentiment/x_scaler.joblib" ))
31- cache ["y_scaler" ] = joblib .load (Path ("data/sequences_sentiment/y_scaler.joblib" ))
32- data_path = Path ("data/processed/final_fused_data.csv" )
33- df = pd .read_csv (data_path , parse_dates = ['Date' ])
34- cache ["data" ] = df
35-
36- model_path = Path ("models/transformer_v1.pt" )
37- input_size = cache ["x_scaler" ].n_features_in_
38-
39- model = TransformerModel (input_size = input_size ).to (device )
40- model .load_state_dict (torch .load (model_path , map_location = device ))
41- model .eval ()
42- cache ["model" ] = model
43- logging .info ("Transformer model and all resources loaded successfully." )
44- except Exception as e :
45- logging .error (f"Failed to load resources on startup: { e } " )
46-
47- def make_prediction (ticker : str , date_str : str ) -> float :
48- """Makes a single stock prediction for a given ticker and date."""
49- if "model" not in cache :
50- raise ValueError ("Model and resources are not loaded." )
51-
52- model = cache ["model" ]
53- data_df = cache ["data" ]
54- x_scaler = cache ["x_scaler" ]
55- y_scaler = cache ["y_scaler" ]
56- device = cache ["device" ]
57-
58- sequence_length = 60
59- end_date = pd .to_datetime (date_str )
60-
61- ticker_data = data_df [data_df ['Ticker' ] == ticker .upper ()]
62- if ticker_data .empty :
63- raise FileNotFoundError (f"Data for ticker '{ ticker } ' not found." )
64-
65- data_up_to_date = ticker_data [ticker_data ['Date' ] <= end_date ]
66- if len (data_up_to_date ) < sequence_length :
67- raise ValueError (f"Not enough historical data for { ticker } before { date_str } ." )
68-
69- sequence_to_predict = data_up_to_date .tail (sequence_length )
70- feature_cols = x_scaler .feature_names_in_
71- sequence_scaled = x_scaler .transform (sequence_to_predict [feature_cols ])
72-
73- input_tensor = torch .from_numpy (sequence_scaled ).float ().unsqueeze (0 ).to (device )
74- with torch .no_grad ():
75- prediction_scaled = model (input_tensor )
76-
77- prediction_unscaled = y_scaler .inverse_transform (prediction_scaled .cpu ().numpy ())[0 ][0 ]
78- return prediction_unscaled
3+ from ml .models .multimodal_model import MultimodalTradingModel
4+
5+
6+ class InferenceService :
7+ """
8+ Stateless inference service.
9+ Loads model once, serves predictions.
10+ """
11+
12+ def __init__ (self , model_path : str , input_dim : int ):
13+ self .device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
14+
15+ self .model = MultimodalTradingModel (input_dim = input_dim )
16+ self .model .load_state_dict (
17+ torch .load (model_path , map_location = self .device )
18+ )
19+ self .model .to (self .device )
20+ self .model .eval ()
21+
22+ @torch .no_grad ()
23+ def predict (
24+ self ,
25+ market_x : torch .Tensor ,
26+ input_ids : torch .Tensor ,
27+ attention_mask : torch .Tensor ,
28+ ) -> dict :
29+ market_x = market_x .to (self .device )
30+ input_ids = input_ids .to (self .device )
31+ attention_mask = attention_mask .to (self .device )
32+
33+ output = self .model (
34+ market_x ,
35+ input_ids ,
36+ attention_mask ,
37+ )
38+
39+ return {
40+ "p_up" : float (output ["p_up" ].mean ().cpu ()),
41+ "expected_return" : float (output ["expected_return" ].mean ().cpu ()),
42+ "uncertainty" : float (output ["uncertainty" ].mean ().cpu ()),
43+ }
0 commit comments