vhive-serverless · JasonZhang517 · Mar 3, 2025
diff --git a/scholar_classifier.py b/scholar_classifier.py
@@ -279,7 +279,7 @@ def extract_and_classify_papers(self, email_message):
             self.logger.info("No papers found in email")
             return []
 
-        # Filter out duplicates before processing
+        # Filter out duplicates and patents before processing
         filtered_papers = []
         for paper in papers:
             title = paper.title.lower().strip()
@@ -293,6 +293,14 @@ def extract_and_classify_papers(self, email_message):
                 self.logger.info(f"Skipping duplicate paper (by URL): {paper.title}")
                 continue
 
+            # Skip if it's a patent
+            if any(word in title.lower() for word in ['patent', 'apparatus', 'method and system']):
+                self.logger.info(f"Skipping patent: {paper.title}")
+                continue
+            if 'patent' in paper.authors.lower():
+                self.logger.info(f"Skipping patent (from authors): {paper.title}")
+                continue
+
             # Add to tracking sets
             self._processed_titles.add(title)
             if url:
@@ -301,7 +309,7 @@ def extract_and_classify_papers(self, email_message):
             filtered_papers.append(paper)
 
         self.logger.info(
-            f"Found {len(papers)} papers, {len(filtered_papers)} after duplicate filtering"
+            f"Found {len(papers)} papers, {len(filtered_papers)} after filtering out duplicates and patents"
         )
 
         results = []
@@ -760,16 +768,31 @@ def format_topic_summary(papers_by_topic):
 
 
 if __name__ == "__main__":
+    import argparse
+
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description='Run the Scholar Classifier')
+    parser.add_argument(
+        '--debug',
+        action='store_true',  # This makes it a flag that's either True or False
+        default=False,        # Default value is False
+        help='Run in debug mode (disable Slack notifications)'
+    )
+    args = parser.parse_args()
+
     # Set up logging
     logging.basicConfig(level=logging.DEBUG)
     logger = logging.getLogger(__name__)
 
     # Load environment variables
     load_dotenv()
 
+    # Log debug mode status
+    logger.info(f"Debug mode: {'enabled' if args.debug else 'disabled'}")
+
     # Load config and print relevant parts (without sensitive data)
     logger.info("Loading configuration...")
-    classifier = ScholarClassifier(config_file="config.yml", debug_mode=True)  # Enable debug mode
+    classifier = ScholarClassifier(config_file="config.yml", debug_mode=args.debug)
 
     # Print config structure (without passwords)
     safe_config = classifier.config.copy()

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -23,10 +23,10 @@
 from scholar_classifier import ScholarClassifier
 
 
-class TestGmailIntegration(unittest.TestCase):
+class TestIntegration(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        """Set up test environment."""
+        """Set up test environment for full pipeline integration testing."""
         # Setup detailed logging
         logging.basicConfig(
             level=logging.DEBUG,
@@ -63,6 +63,7 @@ def setUpClass(cls):
     def test_end_to_end_pipeline(self):
         """Test the entire pipeline from Gmail connection to paper classification."""
         try:
+            # Use the main program's methods
             mail = self.classifier.connect_to_gmail()
 
             # for dmitrii's account
@@ -83,7 +84,7 @@ def test_end_to_end_pipeline(self):
 
             messages = message_numbers[0].split()
             self.assertTrue(len(messages) > 0, "No Google Scholar alert emails found")
-            print(f"\nFound {len(messages)} Google Scholar alert emails from Dec 23, 2024")
+            print(f"\nFound {len(messages)} Google Scholar alert emails from Jan 05, 2025")
 
             # Use messages directly since they're already filtered
             test_messages = messages