Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
LIST="" #directory of your lists. "ct_membership_lists" for example
PERISCOPE_URL="" #url shared with your chapter to grab data from PERISCOPE_URL
PERISCOPE_PASS="" #password for the periscope dashboard
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dependencies = [
"plotly~=6.2",
"python-dotenv~=1.1",
"ratelimit~=2.2",
"tqdm~=4.67"
"selenium>=4.40.0",
"tqdm~=4.67",
]

[project.optional-dependencies]
Expand Down
52 changes: 51 additions & 1 deletion src/app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import threading

import dash
import dash_bootstrap_components as dbc
import dash_bootstrap_templates
from dash import Dash, Input, Output, clientside_callback, html
from dash import Dash, Input, Output, State, clientside_callback, html, callback

Check failure

Code scanning / Ruff

`{name}` imported but unused; consider using `importlib.util.find_spec` to test for availability Error

dash.State imported but unused

Check failure on line 6 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (F401)

src/app.py:6:39: F401 `dash.State` imported but unused

from src.utils.fetch_list import fetch_list

Check failure

Code scanning / Ruff

Import block is un-sorted or un-formatted Error

Import block is un-sorted or un-formatted

Check failure on line 8 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (I001)

src/app.py:1:1: I001 Import block is un-sorted or un-formatted

# shared state for the background fetch thread
_fetch_state: dict = {"running": False, "status": ""}
_fetch_lock = threading.Lock()

FAVICON = {
"rel": "icon",
Expand Down Expand Up @@ -37,5 +45,47 @@
Input(component_id="color-mode-switch", component_property="value"),
)

def _run_fetch():

Check failure

Code scanning / Ruff

Missing return type annotation for private function `{name}` Error

Missing return type annotation for private function _run_fetch

Check failure on line 48 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ANN202)

src/app.py:48:5: ANN202 Missing return type annotation for private function `_run_fetch`
try:
fetch_list()
with _fetch_lock:
_fetch_state["status"] = "Done."
except Exception as e:

Check failure

Code scanning / Ruff

Do not catch blind exception: `{name}` Error

Do not catch blind exception: Exception

Check failure on line 53 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (BLE001)

src/app.py:53:12: BLE001 Do not catch blind exception: `Exception`
with _fetch_lock:
_fetch_state["status"] = f"Error: {e}"
finally:
with _fetch_lock:
_fetch_state["running"] = False


@callback(
Output("fetch-list-poll", "disabled"),
Output("fetch-list-status", "children"),
Input("fetch-list-button", "n_clicks"),
prevent_initial_call=True,
)
def start_fetch(n_clicks):

Check failure

Code scanning / Ruff

Missing return type annotation for public function `{name}` Error

Missing return type annotation for public function start_fetch

Check failure

Code scanning / Ruff

Missing type annotation for function argument `{name}` Error

Missing type annotation for function argument n_clicks

Check failure

Code scanning / Ruff

Unused function argument: `{name}` Error

Unused function argument: n_clicks

Check failure on line 67 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ARG001)

src/app.py:67:17: ARG001 Unused function argument: `n_clicks`

Check failure on line 67 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ANN001)

src/app.py:67:17: ANN001 Missing type annotation for function argument `n_clicks`

Check failure on line 67 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ANN201)

src/app.py:67:5: ANN201 Missing return type annotation for public function `start_fetch`
with _fetch_lock:
if _fetch_state["running"]:
return False, _fetch_state["status"]
_fetch_state["running"] = True
_fetch_state["status"] = "Fetching..."
threading.Thread(target=_run_fetch, daemon=True).start()
return False, "Fetching..."


@callback(
Output("fetch-list-poll", "disabled", allow_duplicate=True),
Output("fetch-list-status", "children", allow_duplicate=True),
Input("fetch-list-poll", "n_intervals"),
prevent_initial_call=True,
)
def poll_fetch_status(n_intervals):

Check failure

Code scanning / Ruff

Missing return type annotation for public function `{name}` Error

Missing return type annotation for public function poll_fetch_status

Check failure

Code scanning / Ruff

Missing type annotation for function argument `{name}` Error

Missing type annotation for function argument n_intervals

Check failure

Code scanning / Ruff

Unused function argument: `{name}` Error

Unused function argument: n_intervals

Check failure on line 83 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ARG001)

src/app.py:83:23: ARG001 Unused function argument: `n_intervals`

Check failure on line 83 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ANN001)

src/app.py:83:23: ANN001 Missing type annotation for function argument `n_intervals`

Check failure on line 83 in src/app.py

View workflow job for this annotation

GitHub Actions / ruff-check

ruff (ANN201)

src/app.py:83:5: ANN201 Missing return type annotation for public function `poll_fetch_status`
with _fetch_lock:
running = _fetch_state["running"]
status = _fetch_state["status"]
return running, status


if __name__ == "__main__":
app.run(debug=True)
19 changes: 17 additions & 2 deletions src/components/sidebar.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html
from dash import dcc, html, Input, Output, callback

Check failure

Code scanning / Ruff

`{name}` imported but unused; consider using `importlib.util.find_spec` to test for availability Error

dash.Input imported but unused

Check failure

Code scanning / Ruff

`{name}` imported but unused; consider using `importlib.util.find_spec` to test for availability Error

dash.Output imported but unused

Check failure

Code scanning / Ruff

`{name}` imported but unused; consider using `importlib.util.find_spec` to test for availability Error

dash.callback imported but unused

from src.utils.scan_lists import MEMB_LISTS

Expand Down Expand Up @@ -35,7 +35,22 @@
dbc.Col(dbc.Label(className="fa fa-moon", html_for="color-mode-switch")),
],
className="g-0",
)
),
dbc.Row(
dbc.Col(
[
dbc.Button(
"Fetch New List",
id="fetch-list-button",
size="sm",
color="secondary",
className="mt-1 w-100",
),
html.Small(id="fetch-list-status", className="text-muted"),
dcc.Interval(id="fetch-list-poll", interval=1000, disabled=True),
]
),
),
],
width="auto",
align="center",
Expand Down
98 changes: 98 additions & 0 deletions src/utils/fetch_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Check failure

Code scanning / Ruff

Lowercase `{name}` imported as non-lowercase `{asname}` Error

Lowercase expected_conditions imported as non-lowercase EC
from selenium.webdriver.common.action_chains import ActionChains
import dotenv
import os
import time
from pathlib import Path, PurePath

Check failure

Code scanning / Ruff

Import block is un-sorted or un-formatted Error

Import block is un-sorted or un-formatted

config = dotenv.dotenv_values(Path(PurePath(__file__).parents[2], ".env"))
DOWNLOAD_DIR = config.get('LIST')

Check failure

Code scanning / Ruff

Single quotes found but double quotes preferred Error

Single quotes found but double quotes preferred
PERISCOPE_URL = config.get('PERISCOPE_URL')

Check failure

Code scanning / Ruff

Single quotes found but double quotes preferred Error

Single quotes found but double quotes preferred
PERISCOPE_PASS= config.get('PERISCOPE_PASS')

Check failure

Code scanning / Ruff

Single quotes found but double quotes preferred Error

Single quotes found but double quotes preferred

def fetch_list(download_dir = DOWNLOAD_DIR, periscope_url = PERISCOPE_URL, periscope_pass = PERISCOPE_PASS):

Check failure

Code scanning / Ruff

Missing return type annotation for public function `{name}` Error

Missing return type annotation for public function fetch_list

Check failure

Code scanning / Ruff

Missing type annotation for function argument `{name}` Error

Missing type annotation for function argument download_dir

Check failure

Code scanning / Ruff

Missing type annotation for function argument `{name}` Error

Missing type annotation for function argument periscope_url

Check failure

Code scanning / Ruff

Missing type annotation for function argument `{name}` Error

Missing type annotation for function argument periscope_pass

if download_dir is None:
return None

Check failure

Code scanning / Ruff

Do not explicitly `return None` in function if it is the only possible return value Error

Do not explicitly return None in function if it is the only possible return value

#make sure the download dir exists
os.makedirs(os.path.abspath(download_dir), exist_ok=True)

Check failure

Code scanning / Ruff

`os.makedirs()` should be replaced by `Path.mkdir(parents=True)` Error

os.makedirs() should be replaced by Path.mkdir(parents=True)

Check failure

Code scanning / Ruff

`os.path.abspath()` should be replaced by `Path.resolve()` Error

os.path.abspath() should be replaced by Path.resolve()

options = Options()
options.add_argument("--window-size=1920,1080") #set standard window size
options.add_argument(argument="--headless=new") #headless
options.add_experimental_option("prefs", {
"download.default_directory": download_dir,
"download.prompt_for_download": False,
})

driver = webdriver.Chrome(options=options)

# set download directory
driver.execute_cdp_cmd("Browser.setDownloadBehavior", {
"behavior": "allow",
"downloadPath": os.path.abspath(download_dir),

Check failure

Code scanning / Ruff

`os.path.abspath()` should be replaced by `Path.resolve()` Error

os.path.abspath() should be replaced by Path.resolve()
})

wait = WebDriverWait(driver, 15)

# auth section, fills in periscope pw given url
driver.get(periscope_url)
password_input = driver.find_element(By.XPATH, """//*[@id="password"]""")
password_input.send_keys(periscope_pass)
button = driver.find_element(By.XPATH, """//*[@id="submit-button"]""")
ActionChains(driver).move_to_element(button).move_by_offset(10, 0).click().perform()
print("Authorized periscope.")

Check failure

Code scanning / Ruff

`print` found Error

print found

# scrolls to the widget, hovers to reveal controls, then click "More Options"
# because periscope can't just give you a link to a csv for some reason
title = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".widget-18183666 div.title")))
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", title)
time.sleep(0.5)
ActionChains(driver).move_to_element(title).perform()
print("Found main member widget.")

Check failure

Code scanning / Ruff

`print` found Error

print found

# move to the little hamburger button
expand = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".widget-18183666 div.expand.button")))
ActionChains(driver).move_to_element(expand).click().perform()
print("Found hamburger icon.")

Check failure

Code scanning / Ruff

`print` found Error

print found

# click "Download Data" from the menu
download_option = wait.until(EC.element_to_be_clickable((By.XPATH, "//*[contains(text(), 'Download Data')]")))
download_option.click()
print("Clicked Download Data")

Check failure

Code scanning / Ruff

`print` found Error

print found

# waits for periscope to finish materializing the csv:
# first wait for the loader to appear, then for it to disappear
loader_css = ".widget-18183666 .loader.materializing"
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CSS_SELECTOR, loader_css))
)
print("Materializing...")

Check failure

Code scanning / Ruff

`print` found Error

print found
WebDriverWait(driver, 120).until(
EC.invisibility_of_element_located((By.CSS_SELECTOR, loader_css))
)
print("Materialization complete.")

Check failure

Code scanning / Ruff

`print` found Error

print found

# wait for the .csv file to finish downloading
timeout = 60
start = time.time()
while time.time() - start < timeout:
files = os.listdir(download_dir)

Check failure

Code scanning / Ruff

Use `pathlib.Path.iterdir()` instead. Error

Use pathlib.Path.iterdir() instead.
csv_files = [f for f in files if f.endswith(".csv")]
tmp_files = [f for f in files if f.endswith(".crdownload") or f.startswith(".com.google.Chrome")]
if csv_files and not tmp_files:
print("Downloaded:", csv_files)

Check failure

Code scanning / Ruff

`print` found Error

print found
break
time.sleep(1)
else:
print("Timed out waiting for download")

Check failure

Code scanning / Ruff

`print` found Error

print found

driver.quit()

if __name__ == "__main__":
fetch_list()
37 changes: 33 additions & 4 deletions src/utils/scan_lists.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Parse all membership lists into pandas dataframes for display on dashboard."""
from re import L

Check failure

Code scanning / Ruff

`{name}` imported but unused; consider using `importlib.util.find_spec` to test for availability Error

re.L imported but unused

import logging
from io import TextIOWrapper
Expand Down Expand Up @@ -145,7 +146,7 @@
return df


def scan_memb_list_from_csv(csv_file_data: TextIOWrapper | IO[bytes]) -> pd.DataFrame:
def scan_memb_list_from_csv(csv_file_data: str | TextIOWrapper | IO[bytes]) -> pd.DataFrame:
"""Convert the provided csv data into a pandas dataframe."""
return pd.read_csv(csv_file_data, dtype={"zip": str}, header=0)

Expand All @@ -155,23 +156,51 @@
with ZipFile(zip_path) as memb_list_zip, memb_list_zip.open(f"{list_name}.csv", "r") as memb_list_csv:
return scan_memb_list_from_csv(memb_list_csv)

def date_from_stem(stem: str) -> str:
"""Extract an ISO date string from a filename stem by trying each underscore-separated segment."""
for part in reversed(stem.split("_")):
try:
parsed = pd.to_datetime(part, format="mixed").date()
if parsed.year < 2000 or parsed.year > 2100:

Check failure

Code scanning / Ruff

Magic value used in comparison, consider replacing `{value}` with a constant variable Error

Magic value used in comparison, consider replacing 2000 with a constant variable

Check failure

Code scanning / Ruff

Magic value used in comparison, consider replacing `{value}` with a constant variable Error

Magic value used in comparison, consider replacing 2100 with a constant variable
continue
return parsed.isoformat()
except ValueError:
continue
raise ValueError(f"No parseable date found in filename stem: {stem}")

Check failure

Code scanning / Ruff

Exception must not use an f-string literal, assign to variable first Error

Exception must not use an f-string literal, assign to variable first

def scan_all_membership_lists(list_name: str) -> dict[str, pd.DataFrame]:

def scan_all_zip_membership_lists(list_name: str) -> dict[str, pd.DataFrame]:
"""Scan all zip files and call scan_memb_list_from_zip on each, returning the results."""
memb_lists = {}
logger.info("Scanning zipped membership lists in %s/.", list_name)
files = sorted((Path(__file__).parents[2] / list_name).glob("**/*.zip"), reverse=True)
for zip_file in files:
filename = Path(zip_file).name
try:
date_from_filename = str(PurePath(filename).stem).split("_")[-1]
list_date_iso = pd.to_datetime(date_from_filename, format="%Y%m%d").date().isoformat()
list_date_iso = date_from_stem(PurePath(filename).stem)
memb_lists[list_date_iso] = scan_memb_list_from_zip(str(Path(zip_file).absolute()), list_name)
except (IndexError, ValueError):
logger.warning("Could not extract list from %s. Skipping file.", filename)
logger.info("Found %s zipped membership lists.", len(memb_lists))
return memb_lists

def scan_all_csv_membership_lists(list_name: str) -> dict[str, pd.DataFrame]:
"""Scan all csv files and call scan_memb_list_from_csv on each, return results."""
memb_lists = {}
logger.info("Scanning csv membership lists in %s/.", list_name)
files = sorted((Path(__file__).parents[2] / list_name).glob("**/*.csv"), reverse=True)
for csv in files:
filename = Path(csv).name
try:
list_date_iso = date_from_stem(PurePath(filename).stem)
memb_lists[list_date_iso] = scan_memb_list_from_csv(csv)
except (IndexError, ValueError):
logger.warning("Could not extract list from %s. Skipping file.", filename)
logger.info("Found %s csv membership lists.", len(memb_lists))
return memb_lists

def scan_all_membership_lists(list_name: str) -> dict[str, pd.DataFrame]:
return scan_all_zip_membership_lists(list_name) | scan_all_csv_membership_lists(list_name)

def branch_name_from_zip_code(zip_code: str, branch_zips: pd.DataFrame) -> str:
"""Check for provided zip_code in provided branch_zips and return relevant branch name if found."""
Expand Down
Loading