@@ -141,150 +141,150 @@ async def run_learning_cycle(self) -> Dict[str, Any]:
141141 logger .info (f"🔄 Starting learning cycle #{ cycle_number } ..." )
142142
143143 try :
144- # Step 1: Fetch from all sources
145- all_entries = self .source_integration .fetch_all_sources (
146- max_items_per_source = 5 ,
147- use_pre_filter = True , # Apply pre-filter to reduce costs
148- content_curator = self .content_curator ,
149- min_importance_score = 0.3
150- )
151-
152- entries_fetched = len (all_entries )
153- logger .info (f"📥 Fetched { entries_fetched } entries from all sources" )
154-
155- # Step 2: Create fetch cycle for tracking
156- cycle_id = None
157- if self .rss_fetch_history :
158- cycle_id = self .rss_fetch_history .create_fetch_cycle (cycle_number = cycle_number )
159-
160- # Step 3: Pre-filter content (if not already filtered)
161- entries_to_add = []
162- entries_filtered = 0
163-
164- if self .content_curator and all_entries :
165- filtered_entries , rejected_entries = self .content_curator .pre_filter_content (all_entries )
166- entries_to_add = filtered_entries
167- entries_filtered = len (rejected_entries )
144+ # Step 1: Fetch from all sources
145+ all_entries = self .source_integration .fetch_all_sources (
146+ max_items_per_source = 5 ,
147+ use_pre_filter = True , # Apply pre-filter to reduce costs
148+ content_curator = self .content_curator ,
149+ min_importance_score = 0.3
150+ )
168151
169- # Track rejected entries
170- for rejected in rejected_entries :
171- if self .rss_fetch_history and cycle_id :
172- self .rss_fetch_history .add_fetch_item (
173- cycle_id = cycle_id ,
174- title = rejected .get ("title" , "" ),
175- source_url = rejected .get ("source" , "" ),
176- link = rejected .get ("link" , "" ),
177- summary = rejected .get ("summary" , "" ),
178- status = "Filtered: Low Score" ,
179- status_reason = rejected .get ("rejection_reason" , "Low quality/Short content" )
180- )
181- else :
182- entries_to_add = all_entries
183-
184- # Step 4: Add to RAG (if enabled)
185- entries_added_to_rag = 0
186- if self .auto_add_to_rag and self .rag_retrieval and entries_to_add :
187- logger .info (f"📚 Adding { len (entries_to_add )} entries to RAG..." )
152+ entries_fetched = len (all_entries )
153+ logger .info (f"📥 Fetched { entries_fetched } entries from all sources" )
154+
155+ # Step 2: Create fetch cycle for tracking
156+ cycle_id = None
157+ if self .rss_fetch_history :
158+ cycle_id = self .rss_fetch_history .create_fetch_cycle (cycle_number = cycle_number )
188159
189- for entry in entries_to_add :
190- try :
191- # Add to RAG
192- success = self .rag_retrieval .add_learning_content (
193- content = entry .get ("summary" , "" ),
194- source = entry .get ("source" , "unknown" ),
195- content_type = "knowledge" ,
196- metadata = {
197- "title" : entry .get ("title" , "" ),
198- "link" : entry .get ("link" , "" ),
199- "published" : entry .get ("published" , "" ),
200- "source_type" : entry .get ("source_type" , "rss" )
201- }
202- )
203-
204- if success :
205- entries_added_to_rag += 1
160+ # Step 3: Pre-filter content (if not already filtered)
161+ entries_to_add = []
162+ entries_filtered = 0
163+
164+ if self .content_curator and all_entries :
165+ filtered_entries , rejected_entries = self .content_curator .pre_filter_content (all_entries )
166+ entries_to_add = filtered_entries
167+ entries_filtered = len (rejected_entries )
168+
169+ # Track rejected entries
170+ for rejected in rejected_entries :
171+ if self .rss_fetch_history and cycle_id :
172+ self .rss_fetch_history .add_fetch_item (
173+ cycle_id = cycle_id ,
174+ title = rejected .get ("title" , "" ),
175+ source_url = rejected .get ("source" , "" ),
176+ link = rejected .get ("link" , "" ),
177+ summary = rejected .get ("summary" , "" ),
178+ status = "Filtered: Low Score" ,
179+ status_reason = rejected .get ("rejection_reason" , "Low quality/Short content" )
180+ )
181+ else :
182+ entries_to_add = all_entries
183+
184+ # Step 4: Add to RAG (if enabled)
185+ entries_added_to_rag = 0
186+ if self .auto_add_to_rag and self .rag_retrieval and entries_to_add :
187+ logger .info (f"📚 Adding { len (entries_to_add )} entries to RAG..." )
188+
189+ for entry in entries_to_add :
190+ try :
191+ # Add to RAG
192+ success = self .rag_retrieval .add_learning_content (
193+ content = entry .get ("summary" , "" ),
194+ source = entry .get ("source" , "unknown" ),
195+ content_type = "knowledge" ,
196+ metadata = {
197+ "title" : entry .get ("title" , "" ),
198+ "link" : entry .get ("link" , "" ),
199+ "published" : entry .get ("published" , "" ),
200+ "source_type" : entry .get ("source_type" , "rss" )
201+ }
202+ )
206203
207- # Track in history
204+ if success :
205+ entries_added_to_rag += 1
206+
207+ # Track in history
208+ if self .rss_fetch_history and cycle_id :
209+ self .rss_fetch_history .add_fetch_item (
210+ cycle_id = cycle_id ,
211+ title = entry .get ("title" , "" ),
212+ source_url = entry .get ("source" , "" ),
213+ link = entry .get ("link" , "" ),
214+ summary = entry .get ("summary" , "" ),
215+ status = "Added to RAG" ,
216+ vector_id = f"knowledge_{ entry .get ('link' , '' )[:8 ]} " ,
217+ added_to_rag_at = datetime .now ().isoformat ()
218+ )
219+ except Exception as add_error :
220+ logger .error (f"Error adding entry to RAG: { add_error } " )
221+ # Track error in history
208222 if self .rss_fetch_history and cycle_id :
209223 self .rss_fetch_history .add_fetch_item (
210224 cycle_id = cycle_id ,
211225 title = entry .get ("title" , "" ),
212226 source_url = entry .get ("source" , "" ),
213227 link = entry .get ("link" , "" ),
214228 summary = entry .get ("summary" , "" ),
215- status = "Added to RAG" ,
216- vector_id = f"knowledge_{ entry .get ('link' , '' )[:8 ]} " ,
217- added_to_rag_at = datetime .now ().isoformat ()
229+ status = "Error: Failed to add" ,
230+ status_reason = str (add_error )
218231 )
219- except Exception as add_error :
220- logger .error (f"Error adding entry to RAG: { add_error } " )
221- # Track error in history
222- if self .rss_fetch_history and cycle_id :
223- self .rss_fetch_history .add_fetch_item (
224- cycle_id = cycle_id ,
225- title = entry .get ("title" , "" ),
226- source_url = entry .get ("source" , "" ),
227- link = entry .get ("link" , "" ),
228- summary = entry .get ("summary" , "" ),
229- status = "Error: Failed to add" ,
230- status_reason = str (add_error )
231- )
232+
233+ logger .info (f"✅ Added { entries_added_to_rag } entries to RAG" )
234+
235+ # Update cycle count and timestamps
236+ self .cycle_count = cycle_number
237+ self .last_run_time = datetime .now ()
238+ self .next_run_time = self .last_run_time + timedelta (hours = self .interval_hours )
239+
240+ processing_time = (datetime .now () - start_time ).total_seconds ()
241+
242+ result = {
243+ "cycle_number" : cycle_number ,
244+ "entries_fetched" : entries_fetched ,
245+ "entries_filtered" : entries_filtered ,
246+ "entries_added_to_rag" : entries_added_to_rag ,
247+ "processing_time_seconds" : processing_time ,
248+ "timestamp" : self .last_run_time .isoformat (),
249+ "next_run" : self .next_run_time .isoformat () if self .next_run_time else None ,
250+ "status" : "success"
251+ }
232252
233- logger .info (f"✅ Added { entries_added_to_rag } entries to RAG" )
234-
235- # Update cycle count and timestamps
236- self .cycle_count = cycle_number
237- self .last_run_time = datetime .now ()
238- self .next_run_time = self .last_run_time + timedelta (hours = self .interval_hours )
239-
240- processing_time = (datetime .now () - start_time ).total_seconds ()
241-
242- result = {
243- "cycle_number" : cycle_number ,
244- "entries_fetched" : entries_fetched ,
245- "entries_filtered" : entries_filtered ,
246- "entries_added_to_rag" : entries_added_to_rag ,
247- "processing_time_seconds" : processing_time ,
248- "timestamp" : self .last_run_time .isoformat (),
249- "next_run" : self .next_run_time .isoformat () if self .next_run_time else None ,
250- "status" : "success"
251- }
252-
253253 logger .info (f"✅ Learning cycle #{ cycle_number } completed: { entries_added_to_rag } entries added to RAG in { processing_time :.2f} s" )
254254
255255 return result
256256
257257 except Exception as e :
258- logger .error (f"❌ Error in learning cycle #{ cycle_number } : { e } " , exc_info = True )
259-
260- # Record error in unified metrics
261- try :
262- from stillme_core .monitoring import get_metrics_collector , MetricCategory
263- unified_metrics = get_metrics_collector ()
264- unified_metrics .record_learning_cycle (
265- cycle_number = cycle_number ,
266- entries_fetched = 0 ,
267- entries_added = 0 ,
268- entries_filtered = 0 ,
269- sources = {},
270- duration_seconds = (datetime .now () - start_time ).total_seconds (),
271- error = str (e )
272- )
273- except Exception as metrics_error :
274- logger .debug (f"Failed to record learning error metrics: { metrics_error } " )
275-
276- # Don't increment cycle count on error
277- return {
278- "cycle_number" : 0 , # 0 indicates error
279- "entries_fetched" : 0 ,
280- "entries_filtered" : 0 ,
281- "entries_added_to_rag" : 0 ,
282- "processing_time_seconds" : (datetime .now () - start_time ).total_seconds (),
283- "timestamp" : datetime .now ().isoformat (),
284- "next_run" : None ,
285- "status" : "error" ,
286- "error" : str (e )
287- }
258+ logger .error (f"❌ Error in learning cycle #{ cycle_number } : { e } " , exc_info = True )
259+
260+ # Record error in unified metrics
261+ try :
262+ from stillme_core .monitoring import get_metrics_collector , MetricCategory
263+ unified_metrics = get_metrics_collector ()
264+ unified_metrics .record_learning_cycle (
265+ cycle_number = cycle_number ,
266+ entries_fetched = 0 ,
267+ entries_added = 0 ,
268+ entries_filtered = 0 ,
269+ sources = {},
270+ duration_seconds = (datetime .now () - start_time ).total_seconds (),
271+ error = str (e )
272+ )
273+ except Exception as metrics_error :
274+ logger .debug (f"Failed to record learning error metrics: { metrics_error } " )
275+
276+ # Don't increment cycle count on error
277+ return {
278+ "cycle_number" : 0 , # 0 indicates error
279+ "entries_fetched" : 0 ,
280+ "entries_filtered" : 0 ,
281+ "entries_added_to_rag" : 0 ,
282+ "processing_time_seconds" : (datetime .now () - start_time ).total_seconds (),
283+ "timestamp" : datetime .now ().isoformat (),
284+ "next_run" : None ,
285+ "status" : "error" ,
286+ "error" : str (e )
287+ }
288288
289289 async def _scheduler_loop (self ):
290290 """
0 commit comments