1
1
from datetime import datetime , timedelta , timezone
2
2
from os import path
3
- from urllib .parse import urlparse
3
+ from urllib .parse import parse_qs , urlparse
4
4
5
5
from requests import Session
6
6
@@ -43,23 +43,33 @@ def detect(self, source, ref=None, extra_args=None):
43
43
if not parsed_url .netloc :
44
44
return None
45
45
46
- url_parts = parsed_url .path .split ("/" )
47
- if url_parts [- 2 ] == "dataset" :
48
- self .dataset_id = url_parts [- 1 ]
46
+ url_parts_1 = parsed_url .path .split ("/history/" )
47
+ url_parts_2 = url_parts_1 [0 ].split ("/" )
48
+ if url_parts_2 [- 2 ] == "dataset" :
49
+ self .dataset_id = url_parts_2 [- 1 ]
49
50
else :
50
51
return None
51
52
52
53
api_url_path = "/api/3/action/"
53
54
api_url = parsed_url ._replace (
54
- path = "/" .join (url_parts [:- 2 ]) + api_url_path
55
+ path = "/" .join (url_parts_2 [:- 2 ]) + api_url_path , query = ""
55
56
).geturl ()
56
57
57
58
status_show_url = f"{ api_url } status_show"
58
59
resp = self .urlopen (status_show_url )
59
60
if resp .status_code == 200 :
61
+
62
+ # handle the activites
63
+ activity_id = None
64
+ if parse_qs (parsed_url .query ).get ("activity_id" ) is not None :
65
+ activity_id = parse_qs (parsed_url .query ).get ("activity_id" )[0 ]
66
+ if len (url_parts_1 ) == 2 :
67
+ activity_id = url_parts_1 [- 1 ]
68
+
60
69
self .version = self ._fetch_version (api_url )
61
70
return {
62
71
"dataset_id" : self .dataset_id ,
72
+ "activity_id" : activity_id ,
63
73
"api_url" : api_url ,
64
74
"version" : self .version ,
65
75
}
@@ -69,11 +79,21 @@ def detect(self, source, ref=None, extra_args=None):
69
79
def fetch (self , spec , output_dir , yield_output = False ):
70
80
"""Fetch a CKAN dataset."""
71
81
dataset_id = spec ["dataset_id" ]
82
+ activity_id = spec ["activity_id" ]
72
83
73
84
yield f"Fetching CKAN dataset { dataset_id } .\n "
74
- package_show_url = f"{ spec ['api_url' ]} package_show?id={ dataset_id } "
85
+
86
+ # handle the activites
87
+ if activity_id :
88
+ fetch_url = (
89
+ f"{ spec ['api_url' ]} activity_data_show?"
90
+ f"id={ activity_id } &object_type=package"
91
+ )
92
+ else :
93
+ fetch_url = f"{ spec ['api_url' ]} package_show?id={ dataset_id } "
94
+
75
95
resp = self .urlopen (
76
- package_show_url ,
96
+ fetch_url ,
77
97
headers = {"accept" : "application/json" },
78
98
)
79
99
0 commit comments