Skip to content

Commit 81a1716

Browse files
ckingbaileyOpenDisclosure Deploybot
and
OpenDisclosure Deploybot
committed
Ci: test Python download code on PR (#337)
* squash ci/test-downloader * merge requirements * rm unused download/requirements * Run `make clean download import process` This is an automated update by travis-ci at Fri Dec 13 00:27:12 PST 2024 [skip ci] --------- Co-authored-by: OpenDisclosure Deploybot <[email protected]>
1 parent ca78e5c commit 81a1716

19 files changed

+184
-105
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: Python tests
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- feat/pull-v2-api
7+
push:
8+
branches:
9+
- ci/test-downloader
10+
workflow_dispatch:
11+
12+
env:
13+
working_dir: download
14+
15+
jobs:
16+
run_tests:
17+
name: Run tests on Python download code
18+
runs-on: ubuntu-22.04
19+
defaults:
20+
run:
21+
working-directory: ${{ env.working_dir }}
22+
steps:
23+
- uses: actions/checkout@v4
24+
- uses: actions/setup-python@v4
25+
with:
26+
python-version-file: ${{ env.working_dir }}/.python-version
27+
cache: pip
28+
cache-dependency-path: ${{ env.working_dir }}/requirements.txt
29+
- run: pip install -r requirements.txt
30+
- name: Run tests
31+
run: pytest tests/test_*.py
32+

download/main.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def main():
3232
with open(f'{DATA_DIR_PATH}/filers.json', encoding='utf8') as f:
3333
filers = json.loads(f.read())
3434

35-
committees = Committees(filers, elections)
35+
committees = Committees(filers, elections.pl)
3636

3737
# A-Contribs:
3838
# join filers + filings + elections + transactions
@@ -41,13 +41,13 @@ def main():
4141
# committees.Ballot_Measure_Election -> elections.Ballot_Measure_Election
4242
# where trans['transaction']['calTransactionType'] == 'F460A'
4343
with open(f'{DATA_DIR_PATH}/filings.json', encoding='utf8') as f:
44-
filings = Filings(json.loads(f.read()))
44+
filings = Filings(json.loads(f.read())).pl
4545

4646
with open(f'{DATA_DIR_PATH}/transactions.json', encoding='utf8') as f:
4747
records = json.loads(f.read())
48-
transactions = Transactions(records)
48+
transactions = Transactions(records).pl
4949

50-
a_contributions = A_Contributions(transactions, filings, committees)
50+
a_contributions = A_Contributions(transactions, filings, committees.pl)
5151
a_contribs_df = a_contributions.df
5252
if not a_contribs_df.is_empty:
5353
print(a_contribs_df.drop(columns=[
@@ -72,8 +72,8 @@ def main():
7272
'XRef_Match',
7373
]).sample(n=20))
7474

75-
elections.df.write_csv(f'{OUTPUT_DIR}/elections.csv')
76-
committees.df.write_csv(f'{OUTPUT_DIR}/committees.csv')
75+
elections.pl.write_csv(f'{OUTPUT_DIR}/elections.csv')
76+
committees.pl.write_csv(f'{OUTPUT_DIR}/committees.csv')
7777
a_contributions.df.write_csv(f'{OUTPUT_DIR}/a_contributions.csv')
7878

7979
if __name__ == '__main__':

download/model/a_contributions.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
Schedule A, Contributions
33
Hopefully this can be joined with other Schedule classes into a single Transaction class
44
"""
5-
from .committee import Committees
6-
from .filing import Filings
7-
from .transaction import Transactions
5+
import polars as pl
86
from .schedule import ScheduleBase
97

108
class A_Contributions(ScheduleBase):
@@ -13,9 +11,9 @@ class A_Contributions(ScheduleBase):
1311
"""
1412
def __init__(
1513
self,
16-
transactions:Transactions,
17-
filings:Filings,
18-
committees:Committees
14+
transactions:pl.DataFrame,
15+
filings:pl.DataFrame,
16+
committees:pl.DataFrame
1917
):
2018
self._form_id = 'F460A'
2119
super().__init__(

download/model/base.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
""" This is the base model, upon all others shall be based """
2+
import pandas as pd
23
import polars as pl
34

45
class BaseModel:
56
""" Base model other models inherit from """
67
def __init__(self, data):
78
self._data = data
89
self._df = None
9-
self._lazy = None
10+
self._pl = None
1011
self._dtypes = []
1112
self._pl_dtypes = []
1213
self._sql_dtypes = []
@@ -19,18 +20,32 @@ def data(self):
1920
return self._data
2021

2122
@property
22-
def lazy(self):
23-
''' Return a Polars Lazyframe '''
24-
if self._lazy is None:
25-
self._lazy = pl.LazyFrame(self._data, schema=self._dtypes)
26-
27-
return self._lazy
23+
def pl(self):
24+
''' Return a Polars dataframe '''
25+
if self._pl is None or self._pl.is_empty():
26+
self._pl = pl.DataFrame(self._data, schema=self._pl_dtypes)
2827

28+
return self._pl
29+
2930
@property
3031
def df(self):
31-
''' Return a Polars dataframe '''
32-
if self._df is None:
33-
self._df = self.lazy.collect()
32+
""" Get a dataframe of the data """
33+
if self._df is None or self._df.empty:
34+
self._df = pd.DataFrame(self._data).astype(self._dtypes)
3435

3536
return self._df
3637

38+
def to_sql(self, connection, **kwargs):
39+
""" Write to a postgresql table """
40+
options = {
41+
'index_label': 'id',
42+
'if_exists': 'replace'
43+
}
44+
options.update(kwargs)
45+
46+
self.df[self._sql_cols].to_sql(
47+
self._sql_table_name,
48+
connection,
49+
dtype=self._sql_dtypes,
50+
**options
51+
)

download/model/committee.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
from typing import List
33
import polars as pl
44
from sqlalchemy.types import String
5-
# Next line ingored because Pylint reports cannot find election in model
6-
from . import base, election # pylint: disable=no-name-in-module
5+
from . import base
76

87
class Committees(base.BaseModel):
98
""" A collection of committees """
10-
def __init__(self, filers:List[dict], elections:election.Elections):
9+
def __init__(self, filers:List[dict], elections:pl.DataFrame):
1110
empty_election_influence = {
1211
'electionDate': None,
1312
'measure': None,
@@ -20,6 +19,7 @@ def __init__(self, filers:List[dict], elections:election.Elections):
2019
super().__init__([
2120
{
2221
'filer_nid': int(f['filerNid']),
22+
# 'Ballot_Measure_Election': [ *elections[elections['date'] == infl['electionDate']]['name'].array, None ][0],
2323
'Ballot_Measure_Election': self._get_possibly_empty_ballot_measure_election(
2424
elections,
2525
infl
@@ -49,6 +49,21 @@ def __init__(self, filers:List[dict], elections:election.Elections):
4949
if f['registrations'].get('CA SOS')
5050
])
5151
self._dtypes = {
52+
'filer_nid': int,
53+
'Ballot_Measure_Election': 'string',
54+
'Filer_ID': 'string',
55+
'Filer_NamL': 'string',
56+
'_Status': 'string',
57+
'_Committee_Type': 'string',
58+
'Ballot_Measure': 'string',
59+
'Support_Or_Oppose': 'string',
60+
'candidate_controlled_id': 'string',
61+
'Start_Date': 'string',
62+
'End_Date': 'string',
63+
'data_warning': 'string',
64+
'Make_Active': 'string'
65+
}
66+
self._pl_dtypes = {
5267
'filer_nid': pl.UInt64,
5368
'Ballot_Measure_Election': pl.Utf8,
5469
'Filer_ID': pl.Utf8,
@@ -63,6 +78,22 @@ def __init__(self, filers:List[dict], elections:election.Elections):
6378
'data_warning': pl.Utf8,
6479
'Make_Active': pl.Utf8
6580
}
81+
self._sql_dtypes = {
82+
'Ballot_Measure_Election': String,
83+
'Filer_ID': String,
84+
'Filer_NamL': String,
85+
'_Status': String,
86+
'_Committee_Type': String,
87+
'Ballot_Measure': String,
88+
'Support_Or_Oppose': String,
89+
'candidate_controlled_id': String,
90+
'Start_Date': String,
91+
'End_Date': String,
92+
'data_warning': String,
93+
'Make_Active': String
94+
}
95+
self._sql_cols = self._sql_dtypes.keys()
96+
self._sql_table_name = 'committees'
6697

6798
@staticmethod
6899
def support_or_oppose(influence):
@@ -85,7 +116,7 @@ def _get_possibly_empty_ballot_measure_election(elections: pl.DataFrame, influen
85116
list, which will contain either the matched election slug or None.
86117
'''
87118
return [
88-
*elections.lazy.filter(
119+
*elections.lazy().filter(
89120
pl.col('date') == influence['electionDate']
90121
).first().collect().get_column('name'),
91122
None

download/model/d_expenditures.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
'''
22
FPPC Form 460, Schedule D, Expenditures
33
'''
4-
from .committee import Committees
5-
from .filing import Filings
6-
from .transaction import Transactions
4+
import polars as pl
75
from .schedule import ScheduleBase
86

97
class DExpenditures(ScheduleBase):
@@ -12,9 +10,9 @@ class DExpenditures(ScheduleBase):
1210
'''
1311
def __init__(
1412
self,
15-
transactions:Transactions,
16-
filings:Filings,
17-
committees:Committees
13+
transactions: pl.DataFrame,
14+
filings: pl.DataFrame,
15+
committees: pl.DataFrame
1816
):
1917
self._form_id = 'F460D'
2018
super().__init__(

download/model/election.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
import os
55
from datetime import datetime
6-
from polars import Utf8
6+
from sqlalchemy.types import String
77
from .base import BaseModel
88

99
class Elections(BaseModel):
@@ -53,11 +53,19 @@ def __init__(self, election_records):
5353

5454
super().__init__(elections)
5555
self._dtypes = {
56-
'title': Utf8,
57-
'name': Utf8,
58-
'location': Utf8,
59-
'date': Utf8
56+
'title': 'string',
57+
'name': 'string',
58+
'location': 'string',
59+
'date': 'string'
6060
}
61+
self._sql_dtypes = {
62+
'title': String,
63+
'name': String,
64+
'location': String,
65+
'date': String
66+
}
67+
self._sql_cols = self._sql_dtypes.keys()
68+
self._sql_table_name = 'elections'
6169

6270
@staticmethod
6371
def ordinal(n):

download/model/filing.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ def __init__(self, filings):
1717
])
1818

1919
self._dtypes = {
20+
'filing_nid': 'string',
21+
'filer_nid': int,
22+
'Report_Num': 'Int64',
23+
'Rpt_Date': 'string',
24+
'From_Date': 'string',
25+
'Thru_Date': 'string'
26+
}
27+
28+
self._pl_dtypes = {
2029
'filing_nid': Utf8,
2130
'filer_nid': UInt64,
2231
'Report_Num': UInt64,

download/model/schedule.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
Abstracts much of the boilerplate common to FPPC Form 460 Schedule data
33
'''
44
import polars as pl
5-
from .committee import Committees
6-
from .filing import Filings
7-
from .transaction import Transactions
85

96
DTYPES = {
107
'Filer_ID': 'string',
@@ -91,18 +88,18 @@ class ScheduleBase:
9188
def __init__(
9289
self,
9390
form_id: str,
94-
transactions:Transactions,
95-
filings:Filings,
96-
committees:Committees
91+
transactions: pl.DataFrame,
92+
filings: pl.DataFrame,
93+
committees: pl.DataFrame
9794
):
98-
schedule = committees.df.lazy().group_by('Filer_ID').first().join(
99-
filings.df.lazy(),
95+
schedule = committees.lazy().group_by('Filer_ID').first().join(
96+
filings.lazy(),
10097
on='filer_nid',
10198
how='inner'
10299
).rename({
103100
'_Committee_Type': 'Committee_Type'
104101
}).join(
105-
transactions.df.lazy().filter(pl.col('cal_tran_type') == form_id),
102+
transactions.lazy().filter(pl.col('cal_tran_type') == form_id),
106103
on='filing_nid',
107104
how='inner'
108105
).drop([
@@ -121,19 +118,14 @@ def __init__(
121118
])
122119

123120
self._lazy = schedule
124-
self._df = None
125121

126122
self._dtypes = DTYPES
127123

128124
@property
129125
def lazy(self):
130-
''' Get data as Polars LazyFrame '''
131126
return self._lazy
132127

133128
@property
134129
def df(self):
135-
''' Get data as Polars DataFrame '''
136-
if self._df is None:
137-
self._df = self._lazy.collect()
138-
139-
return self._df
130+
# QUESTION: Does this invalidate self._lazy?
131+
return self._lazy.collect()

download/model/transaction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,4 @@ def __init__(self, transactions):
153153
} for t in transactions
154154
])
155155

156-
self._dtypes = DTYPES
156+
self._pl_dtypes = DTYPES

download/requirements.dev.txt

Lines changed: 0 additions & 3 deletions
This file was deleted.

download/requirements.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)