@@ -17,32 +17,32 @@ def parse_args():
1717
1818
1919def activity_stream_extract (args , json_data ):
20- assert json_data [' type' ] == ' OrderedCollectionPage'
20+ assert json_data [" type" ] == " OrderedCollectionPage"
2121
2222 data = []
23- if ' orderedItems' in json_data :
24- for item in json_data [' orderedItems' ]:
25- for k in ['id' , ' created' , ' endTime' ]:
23+ if " orderedItems" in json_data :
24+ for item in json_data [" orderedItems" ]:
25+ for k in ["id" , " created" , " endTime" ]:
2626 item .pop (k )
27- obj = item .pop (' object' )
27+ obj = item .pop (" object" )
2828
29- type_ = item .pop (' type' )
30- if type_ == ' Delete' :
29+ type_ = item .pop (" type" )
30+ if type_ == " Delete" :
3131 with args .db .conn :
32- args .db ["activity_stream" ].delete_where ("path = ?" , [obj .get ('id' )])
33- elif type_ == ' Update' :
32+ args .db ["activity_stream" ].delete_where ("path = ?" , [obj .get ("id" )])
33+ elif type_ == " Update" :
3434 continue # TODO: implement in-band Update mechanism
35- elif type_ not in [' Create' ]:
35+ elif type_ not in [" Create" ]:
3636 raise
3737
3838 obj_info = {
39- ' path' : obj .get ('id' ),
40- ' type' : obj .get (' type' ),
41- ** {k : v for k , v in obj .items () if k not in ['id' , ' type' ]},
39+ " path" : obj .get ("id" ),
40+ " type" : obj .get (" type" ),
41+ ** {k : v for k , v in obj .items () if k not in ["id" , " type" ]},
4242 }
4343 data .append (obj_info )
4444 if item :
45- print (' item' , item )
45+ print (" item" , item )
4646
4747 else :
4848 raise
@@ -68,23 +68,24 @@ def activity_stream_fetch(url):
6868
6969 return r .json ()
7070
71+
7172def update_activity_stream (args ):
72- current_page = int (args .db .pop (' select max(page) from activity_stream' ) or 0 ) + 1
73+ current_page = int (args .db .pop (" select max(page) from activity_stream" ) or 0 ) + 1
7374
7475 next_page_url = f"https://data.getty.edu/museum/collection/activity-stream/page/{ current_page } "
7576 while next_page_url :
7677 log .debug ("Fetching %s..." , next_page_url )
7778
7879 page_data = activity_stream_fetch (next_page_url )
7980 if page_data :
80- current_page = int (page_data ['id' ].split ('/' )[- 1 ])
81+ current_page = int (page_data ["id" ].split ("/" )[- 1 ])
8182
8283 activities = activity_stream_extract (args , page_data )
8384 args .db ["activity_stream" ].insert_all (
8485 [{"page" : current_page , ** activity } for activity in activities ], alter = True , replace = True # pk="id",
8586 )
8687
87- next_page_url = page_data .get (' next' , {}).get ('id' )
88+ next_page_url = page_data .get (" next" , {}).get ("id" )
8889 else :
8990 break
9091
@@ -94,6 +95,7 @@ def getty_add():
9495
9596 update_activity_stream (args )
9697
98+
9799# https://data.getty.edu/museum/collection/group/ee294bfc-bbe5-42b4-95b2-04872b802bfe
98100# https://data.getty.edu/museum/collection/object/08eaed9f-1354-4817-8aed-1db49e893a03
99101# https://data.getty.edu/museum/collection/document/37194afd-905c-43df-9f28-baacdd91062a
0 commit comments