Skip to content

Commit 11550e3

Browse files
committed
added databricks
1 parent 5662c3d commit 11550e3

File tree

4 files changed

+40
-5
lines changed

4 files changed

+40
-5
lines changed

db/sqlite.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ def add_publisher(self, conn, publisher_name, publisher_type,):
314314
VALUES (?, ?)
315315
""", (publisher_name, publisher_type))
316316
logger.info(f"Publisher {publisher_name} added successfully")
317+
return c.lastrowid
317318

318319
def update_publisher(self, conn, publisher_id, last_scraped_at):
319320
logger.info(f"Updating publisher: {publisher_id}, last_scraped_at: {last_scraped_at}")
@@ -372,6 +373,16 @@ def get_posts(self, conn):
372373
rows = c.fetchall()
373374
return [dict(row) for row in rows]
374375

376+
def get_posts_by_publisher_id(self, conn, pub_id):
377+
c = conn.cursor()
378+
c.execute("""
379+
SELECT *
380+
FROM posts
381+
WHERE publisher_id = ?
382+
""", (pub_id,))
383+
rows = c.fetchall()
384+
return [dict(row) for row in rows]
385+
375386
def update_post_label(self, conn, post_id, label):
376387
logger.info(f"Updating post: {post_id}, with label: {label}")
377388
c = conn.cursor()

handlers/databricks.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .base import BaseScraper
2+
3+
BASE_URL = "https://databricks.com/feed"
4+
class DatabricksScraper(BaseScraper):
5+
6+
def get_feed_url(self):
7+
return BASE_URL

handlers/factory.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from handlers import aws, github, netflix, airbnb, dropbox, facebook, slack, spotify, cloudfare, nvidea, salesforce, google
1+
from handlers import aws, github, netflix, airbnb, dropbox, facebook, slack, spotify, cloudfare, nvidea, salesforce, google, databricks
22
from logger_config import get_logger;
33

44
logger = get_logger("HANDLERS")
@@ -29,6 +29,8 @@ def get_scraper(comapny):
2929
return salesforce.SalesforceScraper()
3030
elif comapny.lower() == 'google':
3131
return google.GoogleScraper()
32+
elif comapny.lower() == "databricks":
33+
return databricks.DatabricksScraper()
3234
else:
3335
logger.error(f"No handler found for {comapny}")
3436
return None

tests/e2e/test_scrape_pubs.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,35 @@
55
from scrape_pubs import scrape_pubs
66

77
@pytest.mark.pubs
8-
def test_scrape_pubs_techteams(db):
8+
def test_scrape_pubs_techteams1(db):
99
conn = db.get_connection()
1010

11-
db.add_publisher(conn, "facebook", "techteam")
12-
db.add_subscription(conn, "newemail5@gmail.com", enums.PublisherCategory.SOFTWARE_ENGINEERING.value, 1)
11+
pubid = db.add_publisher(conn, "facebook", "techteam")
12+
db.add_subscription(conn, "newemail5@gmail.com", enums.PublisherCategory.SOFTWARE_ENGINEERING.value, pubid)
1313

1414
scrape_pubs(db, conn)
1515

16-
posts = db.get_posts(conn)
16+
posts = db.get_posts_by_publisher_id(conn, pubid)
1717

1818
assert len(posts) > 10
1919

2020
conn.close()
2121

22+
@pytest.mark.pubs
23+
def test_scrape_pubs_techteams2(db):
24+
conn = db.get_connection()
25+
26+
pubid = db.add_publisher(conn, "databricks", "techteam")
27+
db.add_subscription(conn, "newemail6@gmail.com", enums.PublisherCategory.SOFTWARE_ENGINEERING.value, pubid)
28+
29+
scrape_pubs(db, conn)
30+
31+
posts = db.get_posts_by_publisher_id(conn, pubid)
32+
33+
assert len(posts) > 9
34+
35+
conn.close()
36+
2237

2338

2439

0 commit comments

Comments
 (0)