Skip to content

Commit 8a32e4b

Browse files
authored
Merge pull request #17 from swerik-project/dev
pre-release
2 parents f50f777 + b605e6a commit 8a32e4b

7 files changed

Lines changed: 911 additions & 425 deletions

File tree

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cff-version: 1.2.0
22
message: To cite this reposository, please use these metadata.
33
title: "The Swedish Parliament Corpus: pyriksdagen"
4-
version: v1.2.0
4+
version: v1.5.0
55
authors:
66
- given-names: Väinö
77
family-names: Yrjänäinen
@@ -10,7 +10,7 @@ authors:
1010
given-names: Robert
1111
orcid: "https://orcid.org/0000-0002-7647-4048"
1212
alias: BobBorges
13-
date-released: 2024-04-25
13+
date-released: 2024-10-30
1414
identifiers:
1515
- description: Repository basename
1616
type: other

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "pyriksdagen"
3-
version = "1.2.1"
3+
version = "1.5.0"
44
description = "Access the Riksdagen corpus"
55
authors = ["ninpnin <vainoyrjanainen@icloud.com>"]
66
repository = "https://github.com/welfare-state-analytics/riksdagen-corpus"

pyriksdagen/args.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
"""
2+
Dry Argparse helper.
3+
4+
"""
5+
from datetime import datetime
6+
from glob import glob
7+
from pyriksdagen.utils import (
8+
get_data_location,
9+
protocol_iterators,
10+
corpus_iterator,
11+
)
12+
import argparse
13+
import inspect
14+
import sys
15+
16+
17+
18+
19+
def populate_common_arguments(parser):
20+
"""
21+
add arguments common to all doctypes to a parser object
22+
- start
23+
- end
24+
- year
25+
- from-list
26+
- specific-files
27+
- data-folder
28+
Args:
29+
parser: argparse Parser
30+
31+
Returns:
32+
parser: argparse Parser
33+
"""
34+
parser.add_argument("-s", "--start",
35+
type=int,
36+
default=None,
37+
help="start year. If -s is set, -e must also be set. If -s and -e are explicitly set, all protocols in that range are processed. If -s and -e are unset and neither -p nor -l are set, the script will process all records from 1867 until the current year. Priority == 4")
38+
parser.add_argument("-e", "--end",
39+
type=int,
40+
default=None,
41+
help="end year. If -e is set, -s must also be set. If -s and -e are explicitly set, all protocols in that range are processed. If -s and -e are unset and neither -p nor -l are set, the script will process all records from 1867 until the current year. Priority == 4")
42+
parser.add_argument("-y", "--parliament-year",
43+
type=int,
44+
default=None,
45+
nargs='*',
46+
help="Parliament year, e.g. 1971 or 198384. Priority == 3")
47+
parser.add_argument("-l", "--from-list",
48+
type=str,
49+
default=None,
50+
help="operate on a list of records from a file. Set the path from ./ -- this option doesn't cooperate with `-R`. Priority == 2")
51+
parser.add_argument("-f", "--specific-files",
52+
type=str,
53+
default=None,
54+
nargs='*',
55+
help="operate on a specific file or list of files. Set the path from ./ -- this option doesn't cooperate with `-R`. User is responsible to ensure the file is the correct doctype. Priority == 1")
56+
parser.add_argument("-L", "--data-folder",
57+
type=str,
58+
default=None,
59+
help="(optional) Path to folder containing the data, defaults to environment variable according to the document type or `data/` if no suitable variable is found.")
60+
return parser
61+
62+
63+
64+
65+
def rename_file_list(args):
66+
"""
67+
Renames the specific_files arg to a doctype-relevant name
68+
69+
Args:
70+
args: argparse Namespace
71+
doctype (str): document type
72+
73+
Returns:
74+
args
75+
"""
76+
d = vars(args)
77+
d[d["doctype"]] = d.pop("specific_files")
78+
return argparse.Namespace(**d)
79+
80+
81+
82+
83+
def common_args(args):
84+
"""
85+
common preprocessing of arguments
86+
87+
Args:
88+
args: argparse Argument namespace
89+
90+
Returns:
91+
args
92+
"""
93+
if (args.start is None or args.end is None) and args.start != args.end:
94+
raise ValueError("Set -s and -e or neither.")
95+
96+
if args.data_folder is None:
97+
args.data_folder = get_data_location(args.doctype)
98+
99+
if args.specific_files is not None and len(args.specific_files) != 0:
100+
pass
101+
elif args.from_list is not None:
102+
with open(args.from_list, 'r') as inf:
103+
lines = inf.readlines()
104+
args.specific_files = [_.strip() for _ in lines if _.strip() != '']
105+
elif args.parliament_year is not None and len(args.parliament_year) != 0:
106+
args.specific_files = []
107+
for py in args.parliament_year:
108+
args.specific_files.extend(glob(f"{args.data_folder}/{py}/*.xml"))
109+
else:
110+
args.specific_files = sorted(list(corpus_iterator(args.doctype, start=args.start, end=args.end)))
111+
if args.specific_files is not None and len(list(args.specific_files)) != 0:
112+
args.specific_files = sorted(list(args.specific_files))
113+
return rename_file_list(args)
114+
115+
116+
117+
118+
def record_parser(parser):
119+
"""
120+
Take an argparse ArgumentParser object and populate standard arguments for working with riksdagen records.
121+
122+
Args:
123+
parser: parser
124+
125+
Returns:
126+
parser
127+
"""
128+
parser = populate_common_arguments(parser)
129+
# leaving the records-specific function in place, in case we need records-specific args
130+
return parser
131+
132+
133+
134+
135+
def record_args(args):
136+
"""
137+
Takes an argparse namespace object for working with riksdagen records and imputes standard stuff
138+
139+
Args:
140+
args: args
141+
142+
Returns:
143+
args
144+
"""
145+
args = common_args(args)
146+
# leaving the records-specific function in place, in case we need records-specific actions
147+
return args
148+
149+
150+
151+
152+
def motion_parser(parser):
153+
"""
154+
Take an argparse ArgumentParser object and populate standard arguments for working with riksdagen motions.
155+
156+
Args:
157+
parser: parser
158+
159+
Returns:
160+
parser
161+
"""
162+
parser = populate_common_arguments(parser)
163+
# leaving the motions-specific function in place, in case we need motions-specific args
164+
return parser
165+
166+
167+
168+
169+
def motion_args(args):
170+
"""
171+
Takes an argparse namespace object for working with riksdagen records and imputes standard stuff
172+
173+
Args:
174+
args: args
175+
Returns:
176+
args
177+
"""
178+
args = common_args(args)
179+
# leaving the records-specific function in place, in case we need records-specific actions
180+
return args
181+
182+
183+
184+
185+
def interpellation_parser(parser):
186+
"""
187+
Take an argparse ArgumentParser object and populate standard arguments for working with riksdagen interpellations.
188+
189+
Args:
190+
parser: parser
191+
192+
Returns:
193+
parser
194+
"""
195+
parser = populate_common_arguments(parser)
196+
# leaving the interpellations-specific function in place, in case we need interpellations-specific args
197+
return parser
198+
199+
200+
201+
202+
def interpellation_args(args):
203+
"""
204+
Takes an argparse namespace object for working with riksdagen interpellations and imputes standard stuff
205+
206+
Args:
207+
args: args
208+
209+
Returns:
210+
args
211+
"""
212+
args = common_args(args)
213+
# leaving the interpellations-specific function in place, in case we need interpellations-specific actions
214+
return args
215+
216+
217+
218+
219+
def fetch_parser(doctype, docstring=None):
220+
"""
221+
Fetch an argparse argument parser based on the doctype.
222+
223+
Args:
224+
doctype (str): doctype, one listed in D
225+
docstring (str): string describing scripts for which the parser is called.
226+
227+
Returns:
228+
argparse Parser object
229+
"""
230+
D = {
231+
"records": record_parser,
232+
"motions": motion_parser,
233+
"interpellations": interpellation_parser,
234+
}
235+
parser = argparse.ArgumentParser(description=docstring)
236+
parser.add_argument("--doctype", default=doctype, help=argparse.SUPPRESS)
237+
return D[doctype](parser)
238+
239+
240+
241+
242+
def impute_args(args):
243+
"""
244+
Impute args based on the doctype (args.doctype).
245+
246+
Args:
247+
args: argparse parsed args namespace
248+
249+
Returns:
250+
argparse parsed args namespace
251+
"""
252+
D = {
253+
"records": record_args,
254+
"motions": motion_args,
255+
"interpellations": interpellation_args,
256+
}
257+
return D[args.doctype](args)
258+
259+
260+
261+
262+
# test
263+
if __name__ == '__main__':
264+
args = fetch_parser("motions").parse_args()
265+
print(impute_args(args))

0 commit comments

Comments
 (0)