diff --git a/examples/storm_examples/run_storm_wiki_deepseek.py b/examples/storm_examples/run_storm_wiki_deepseek.py index e831797f..11b045ed 100644 --- a/examples/storm_examples/run_storm_wiki_deepseek.py +++ b/examples/storm_examples/run_storm_wiki_deepseek.py @@ -49,13 +49,13 @@ def sanitize_topic(topic): topic = topic.replace(" ", "_") # Remove any character that isn't alphanumeric, underscore, or hyphen - topic = re.sub(r"[^a-zA-Z0-9_-]", "", topic) + topic = re.sub(r'[^a-zA-Z0-9_\-\s]', '', topic) # Ensure the topic isn't empty after sanitization if not topic: topic = "unnamed_topic" - return topic + return topic[:100] if topic else "unnamed_topic" def main(args): @@ -152,8 +152,8 @@ def main(args): try: runner.run( topic=sanitized_topic, - do_research=args.do_research, - do_generate_outline=args.do_generate_outline, + do_research=args.do_research and bool(sanitized_topic.strip()), + do_generate_outline=args.do_generate_outline and bool(sanitized_topic.strip()), do_generate_article=args.do_generate_article, do_polish_article=args.do_polish_article, remove_duplicate=args.remove_duplicate, diff --git a/knowledge_storm/rm.py b/knowledge_storm/rm.py index 563116fe..27ddd1a7 100644 --- a/knowledge_storm/rm.py +++ b/knowledge_storm/rm.py @@ -932,6 +932,10 @@ def forward( if isinstance(query_or_queries, str) else query_or_queries ) + valid_queries = [q for q in queries if q and len(q.strip()) >= 3] + if not valid_queries: + logging.warning(f"All queries are empty: {queries}") + return [] self.usage += len(queries) collected_results = [] diff --git a/knowledge_storm/storm_wiki/modules/knowledge_curation.py b/knowledge_storm/storm_wiki/modules/knowledge_curation.py index d6b89295..e885cb02 100644 --- a/knowledge_storm/storm_wiki/modules/knowledge_curation.py +++ b/knowledge_storm/storm_wiki/modules/knowledge_curation.py @@ -209,6 +209,13 @@ def forward(self, topic: str, question: str, ground_truth_url: str): q.replace("-", "").strip().strip('"').strip('"').strip() for q in queries.split("\n") ] + queries = [q for q in queries if q and len(q.strip()) > 3] # 长度>3字符 + if not queries: + return dspy.Prediction( + queries=[], + searched_results=[], + answer="No valid queries generated for this question" + ) queries = queries[: self.max_search_queries] # Search searched_results: List[Information] = self.retriever.retrieve(