Skip to content

Commit 7faa53c

Browse files
committed
Handle the activities
1 parent 0808018 commit 7faa53c

File tree

2 files changed

+57
-13
lines changed

2 files changed

+57
-13
lines changed

repo2docker/contentproviders/ckan.py

+27-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from datetime import datetime, timedelta, timezone
22
from os import path
3-
from urllib.parse import urlparse
3+
from urllib.parse import parse_qs, urlparse
44

55
from requests import Session
66

@@ -43,23 +43,33 @@ def detect(self, source, ref=None, extra_args=None):
4343
if not parsed_url.netloc:
4444
return None
4545

46-
url_parts = parsed_url.path.split("/")
47-
if url_parts[-2] == "dataset":
48-
self.dataset_id = url_parts[-1]
46+
url_parts_1 = parsed_url.path.split("/history/")
47+
url_parts_2 = url_parts_1[0].split("/")
48+
if url_parts_2[-2] == "dataset":
49+
self.dataset_id = url_parts_2[-1]
4950
else:
5051
return None
5152

5253
api_url_path = "/api/3/action/"
5354
api_url = parsed_url._replace(
54-
path="/".join(url_parts[:-2]) + api_url_path
55+
path="/".join(url_parts_2[:-2]) + api_url_path, query=""
5556
).geturl()
5657

5758
status_show_url = f"{api_url}status_show"
5859
resp = self.urlopen(status_show_url)
5960
if resp.status_code == 200:
61+
62+
# handle the activites
63+
activity_id = None
64+
if parse_qs(parsed_url.query).get("activity_id") is not None:
65+
activity_id = parse_qs(parsed_url.query).get("activity_id")[0]
66+
if len(url_parts_1) == 2:
67+
activity_id = url_parts_1[-1]
68+
6069
self.version = self._fetch_version(api_url)
6170
return {
6271
"dataset_id": self.dataset_id,
72+
"activity_id": activity_id,
6373
"api_url": api_url,
6474
"version": self.version,
6575
}
@@ -69,11 +79,21 @@ def detect(self, source, ref=None, extra_args=None):
6979
def fetch(self, spec, output_dir, yield_output=False):
7080
"""Fetch a CKAN dataset."""
7181
dataset_id = spec["dataset_id"]
82+
activity_id = spec["activity_id"]
7283

7384
yield f"Fetching CKAN dataset {dataset_id}.\n"
74-
package_show_url = f"{spec['api_url']}package_show?id={dataset_id}"
85+
86+
# handle the activites
87+
if activity_id:
88+
fetch_url = (
89+
f"{spec['api_url']}activity_data_show?"
90+
f"id={activity_id}&object_type=package"
91+
)
92+
else:
93+
fetch_url = f"{spec['api_url']}package_show?id={dataset_id}"
94+
7595
resp = self.urlopen(
76-
package_show_url,
96+
fetch_url,
7797
headers={"accept": "application/json"},
7898
)
7999

tests/unit/contentproviders/test_ckan.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,23 @@ def test_detect_ckan(requests_mock):
1414

1515
expected = {
1616
"dataset_id": "1234",
17+
"activity_id": None,
1718
"api_url": "http://demo.ckan.org/api/3/action/",
1819
"version": "1709043354",
1920
}
2021

22+
expected_activity = expected.copy()
23+
expected_activity["activity_id"] = "5678"
24+
2125
assert CKAN().detect("http://demo.ckan.org/dataset/1234") == expected
26+
assert (
27+
CKAN().detect("http://demo.ckan.org/dataset/1234?activity_id=5678")
28+
== expected_activity
29+
)
30+
assert (
31+
CKAN().detect("http://demo.ckan.org/dataset/1234/history/5678")
32+
== expected_activity
33+
)
2234

2335

2436
def test_detect_not_ckan():
@@ -41,15 +53,27 @@ def test_ckan_fetch(requests_mock):
4153
requests_mock.get(
4254
"http://demo.ckan.org/api/3/action/package_show?id=1234", json=mock_response
4355
)
56+
requests_mock.get(
57+
"http://demo.ckan.org/api/3/action/activity_data_show?id=5678",
58+
json=mock_response,
59+
)
4460
requests_mock.get(f"file://{ckan_path}", content=open(ckan_path, "rb").read())
61+
62+
ckan = CKAN()
63+
spec = {"dataset_id": "1234", "api_url": "http://demo.ckan.org/api/3/action/"}
64+
65+
expected = {ckan_path.rsplit("/", maxsplit=1)[1]}
66+
67+
with TemporaryDirectory() as d:
68+
spec["activity_id"] = None
69+
output = []
70+
for l in ckan.fetch(spec, d):
71+
output.append(l)
72+
assert expected == set(os.listdir(d))
73+
4574
with TemporaryDirectory() as d:
46-
ckan = CKAN()
47-
spec = {
48-
"dataset_id": "1234",
49-
"api_url": "http://demo.ckan.org/api/3/action/",
50-
}
75+
spec["activity_id"] = "5678"
5176
output = []
5277
for l in ckan.fetch(spec, d):
5378
output.append(l)
54-
expected = {ckan_path.rsplit("/", maxsplit=1)[1]}
5579
assert expected == set(os.listdir(d))

0 commit comments

Comments
 (0)