@@ -95,19 +95,20 @@ async def monitor_solr():
9595 graphite_address = 'graphite.us.archive.org:2004' ,
9696 )
9797
98+
9899@limit_server (["ol-www0" ], scheduler )
99100@scheduler .scheduled_job ('interval' , seconds = 60 )
100101async def monitor_partner_useragents ():
101102
102103 def graphite_safe (s : str ) -> str :
103104 """Normalize a string for safe use as a Graphite metric name."""
104- # Replace dots and spaces with underscores
105+ # Replace dots and spaces with underscores
105106 s = s .replace ('.' , '_' ).replace (' ' , '_' )
106- # Remove or replace unsafe characters
107+ # Remove or replace unsafe characters
107108 s = re .sub (r'[^A-Za-z0-9_-]+' , '_' , s )
108- # Collapse multiple underscores
109+ # Collapse multiple underscores
109110 s = re .sub (r'_+' , '_' , s )
110- # Strip leading/trailing underscores or dots
111+ # Strip leading/trailing underscores or dots
111112 return s .strip ('._' )
112113
113114 def extract_agent_counts (ua_counts , allowed_names = None ):
@@ -123,39 +124,42 @@ def extract_agent_counts(ua_counts, allowed_names=None):
123124 agent_counts ['other' ] += count
124125 return agent_counts
125126
126- known_names = extract_agent_counts ("""
127- 177 Whefi/1.0 (contact@whefi.com)
128- 85 Bookhives/1.0 (paulpleela@gmail.com)
129- 85 AliyunSecBot/Aliyun (AliyunSecBot@service.alibaba.com)
130- 62 BookHub/1.0 (contact@ybookshub.com)
131- 58 Bookscovery/1.0 (https://bookscovery.com; info@bookscovery.com)
132- 45 BookstoreApp/1.0 (contact@thounkai.com)
133- 20 Gleeph/1.0 (contact-openlibrary@gleeph.net)
134- 2 Tomeki/1.0 (ankit@yopmail.com , gzip)
135- 2 Snipd/1.0 (https://www.snipd.com) contact: company@snipd.com
136- 2 OnTrack/1.0 (ashkan.haghighifashi@gmail.com)
137- 2 Leaders.org (leaders.org) janakan@leaders.org
138- 2 AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com)
139- 1 ISBNdb (support@isbndb.com)
140- """ )
127+ known_names = extract_agent_counts (
128+ """
129+ 177 Whefi/1.0 (contact@whefi.com)
130+ 85 Bookhives/1.0 (paulpleela@gmail.com)
131+ 85 AliyunSecBot/Aliyun (AliyunSecBot@service.alibaba.com)
132+ 62 BookHub/1.0 (contact@ybookshub.com)
133+ 58 Bookscovery/1.0 (https://bookscovery.com; info@bookscovery.com)
134+ 45 BookstoreApp/1.0 (contact@thounkai.com)
135+ 20 Gleeph/1.0 (contact-openlibrary@gleeph.net)
136+ 2 Tomeki/1.0 (ankit@yopmail.com , gzip)
137+ 2 Snipd/1.0 (https://www.snipd.com) contact: company@snipd.com
138+ 2 OnTrack/1.0 (ashkan.haghighifashi@gmail.com)
139+ 2 Leaders.org (leaders.org) janakan@leaders.org
140+ 2 AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com)
141+ 1 ISBNdb (support@isbndb.com)
142+ """
143+ )
141144
142145 recent_uas = bash_run (
143- f """obfi_in_docker obfi_previous_minute | obfi_grep_bots -v | grep " 200 " | grep -Eo '[^"]+@[^"]+' | sort | uniq -c | sort -rn""" ,
146+ """obfi_in_docker obfi_previous_minute | obfi_grep_bots -v | grep " 200 " | grep -Eo '[^"]+@[^"]+' | sort | uniq -c | sort -rn""" ,
144147 sources = ["../obfi.sh" ],
145- capture_output = True
148+ capture_output = True ,
146149 ).stdout
147150
148151 agent_counts = extract_agent_counts (recent_uas , allowed_names = known_names )
149152 events = []
150153 ts = int (time .time ())
151154 for agent , count in agent_counts .items ():
152- events .append (GraphiteEvent (
153- path = f'stats.ol.partners. { agent } ' ,
154- value = float (count ),
155- timestamp = ts
156- ))
155+ events .append (
156+ GraphiteEvent (
157+ path = f'stats.ol.partners. { agent } ' , value = float (count ), timestamp = ts
158+ )
159+ )
157160 GraphiteEvent .submit_many (events , 'graphite.us.archive.org:2004' )
158161
162+
159163@limit_server (["ol-www0" ], scheduler )
160164@scheduler .scheduled_job ('interval' , seconds = 60 )
161165async def monitor_empty_homepage ():
0 commit comments