@@ -179,178 +179,6 @@ HeuristicBlocker.prototype = {
179
179
}
180
180
} ,
181
181
182
- /**
183
- * Calls the pixel cookie sharing checking function
184
- * iff the request is for an image in the top-level frame,
185
- * and the request URL has querystring parameters.
186
- *
187
- * @param {Object } details webRequest onResponseStarted details object
188
- */
189
- checkForPixelCookieSharing : function ( details ) {
190
- if ( ! badger . isLearningEnabled ( details . tabId ) ) {
191
- return ;
192
- }
193
-
194
- if ( details . type != 'image' || details . frameId !== 0 || details . url . indexOf ( '?' ) == - 1 ) {
195
- return ;
196
- }
197
-
198
- let self = this ,
199
- tab_base = self . tabOrigins [ details . tabId ] ;
200
- if ( ! tab_base ) {
201
- return ;
202
- }
203
- let tab_url = self . tabUrls [ details . tabId ] ;
204
-
205
- let request_host = ( new URI ( details . url ) ) . host ;
206
- // CNAME uncloaking
207
- if ( utils . hasOwn ( badger . cnameDomains , request_host ) ) {
208
- request_host = badger . cnameDomains [ request_host ] ;
209
- }
210
- let request_base = getBaseDomain ( request_host ) ;
211
-
212
- let initiator_url = getInitiatorUrl ( tab_url , details ) ;
213
- if ( initiator_url ) {
214
- tab_url = initiator_url ;
215
- tab_base = getBaseDomain ( extractHostFromURL ( initiator_url ) ) ;
216
- }
217
-
218
- // ignore first-party requests
219
- if ( ! utils . isThirdPartyDomain ( request_base , tab_base ) ) {
220
- return ;
221
- }
222
-
223
- // short-circuit if we already observed this eTLD+1 tracking on this site
224
- let firstParties = self . storage . getStore ( 'snitch_map' ) . getItem ( request_base ) ;
225
- if ( firstParties && firstParties . includes ( tab_base ) ) {
226
- return ;
227
- }
228
-
229
- // short-circuit if we already made a decision for this FQDN
230
- let action = self . storage . getBestAction ( request_host ) ;
231
- if ( action != constants . NO_TRACKING && action != constants . ALLOW ) {
232
- return ;
233
- }
234
-
235
- // get all non-HttpOnly cookies for the top-level frame
236
- // and pass those to the pixel cookie-share accounting function
237
- let config = {
238
- url : tab_url
239
- } ;
240
- if ( badger . firstPartyDomainPotentiallyRequired ) {
241
- config . firstPartyDomain = null ;
242
- }
243
- chrome . cookies . getAll ( config , function ( cookies ) {
244
- cookies = cookies . filter ( cookie => ! cookie . httpOnly ) ;
245
- if ( cookies . length < 1 ) {
246
- return ;
247
- }
248
-
249
- // TODO refactor with new URI() above?
250
- let searchParams = ( new URL ( details . url ) ) . searchParams ;
251
-
252
- self . pixelCookieShareAccounting ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) ;
253
- } ) ;
254
- } ,
255
-
256
- /**
257
- * Checks for cookie sharing: requests to third-party domains
258
- * that include high entropy data from first-party cookies.
259
- *
260
- * Only catches plain-text verbatim sharing (b64 encoding etc. defeat it).
261
- *
262
- * Assumes any long string that doesn't contain URL fragments
263
- * or stopwords is an identifier.
264
- *
265
- * Doesn't catch cookie syncing (3rd party -> 3rd party),
266
- * but most of those tracking cookies should be blocked anyway.
267
- */
268
- pixelCookieShareAccounting : function ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) {
269
- const TRACKER_ENTROPY_THRESHOLD = 33 ,
270
- MIN_STR_LEN = 8 ;
271
-
272
- let self = this ;
273
-
274
- for ( let p of searchParams ) {
275
- let key = p [ 0 ] ,
276
- value = p [ 1 ] ;
277
-
278
- // the argument must be sufficiently long
279
- if ( ! value || value . length < MIN_STR_LEN ) {
280
- continue ;
281
- }
282
-
283
- // check if this argument is derived from a high-entropy first-party cookie
284
- for ( let cookie of cookies ) {
285
- // the cookie value must be sufficiently long
286
- if ( ! cookie . value || cookie . value . length < MIN_STR_LEN ) {
287
- continue ;
288
- }
289
-
290
- // find the longest common substring between this arg and the cookies
291
- // associated with the document
292
- let substrings = utils . findCommonSubstrings ( cookie . value , value ) || [ ] ;
293
- for ( let s of substrings ) {
294
- // ignore the substring if it's part of the first-party URL. sometimes
295
- // content servers take the url of the page they're hosting content
296
- // for as an argument. e.g.
297
- // https://example-cdn.com/content?u=http://example.com/index.html
298
- if ( tab_url . indexOf ( s ) != - 1 ) {
299
- continue ;
300
- }
301
-
302
- // elements of the user agent string are also commonly included in
303
- // both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
304
- // This is not a special tracking risk since third parties can see
305
- // this info anyway.
306
- if ( navigator . userAgent . indexOf ( s ) != - 1 ) {
307
- continue ;
308
- }
309
-
310
- // Sometimes the entire url and then some is included in the
311
- // substring -- the common string might be "https://example.com/:true"
312
- // In that case, we only care about the information around the URL.
313
- if ( s . indexOf ( tab_url ) != - 1 ) {
314
- s = s . replace ( tab_url , "" ) ;
315
- }
316
-
317
- // During testing we found lots of common values like "homepage",
318
- // "referrer", etc. were being flagged as high entropy. This searches
319
- // for a few of those and removes them before we go further.
320
- let lower = s . toLowerCase ( ) ;
321
- lowEntropyQueryValues . forEach ( function ( qv ) {
322
- let start = lower . indexOf ( qv ) ;
323
- if ( start != - 1 ) {
324
- s = s . replace ( s . substring ( start , start + qv . length ) , "" ) ;
325
- }
326
- } ) ;
327
-
328
- // at this point, since we might have removed things, make sure the
329
- // string is still long enough to bother with
330
- if ( s . length < MIN_STR_LEN ) {
331
- continue ;
332
- }
333
-
334
- // compute the entropy of this common substring. if it's greater than
335
- // our threshold, record the tracking action and exit the function.
336
- let entropy = utils . estimateMaxEntropy ( s ) ;
337
- if ( entropy > TRACKER_ENTROPY_THRESHOLD ) {
338
- log ( "Found high-entropy cookie share from" , tab_base , "to" , request_host ,
339
- ":" , entropy , "bits\n cookie:" , cookie . name , '=' , cookie . value ,
340
- "\n arg:" , key , "=" , value , "\n substring:" , s ) ;
341
- self . _recordPrevalence ( request_host , request_base , tab_base ) ;
342
-
343
- // record pixel cookie sharing
344
- badger . storage . recordTrackingDetails (
345
- request_base , tab_base , 'pixelcookieshare' ) ;
346
-
347
- return ;
348
- }
349
- }
350
- }
351
- }
352
- } ,
353
-
354
182
/**
355
183
* Wraps _recordPrevalence for use outside of webRequest listeners.
356
184
*
@@ -647,51 +475,6 @@ var lowEntropyCookieValues = {
647
475
"zu" :8
648
476
} ;
649
477
650
- const lowEntropyQueryValues = [
651
- "https" ,
652
- "http" ,
653
- "://" ,
654
- "%3A%2F%2F" ,
655
- "www" ,
656
- "url" ,
657
- "undefined" ,
658
- "impression" ,
659
- "session" ,
660
- "homepage" ,
661
- "client" ,
662
- "version" ,
663
- "business" ,
664
- "title" ,
665
- "get" ,
666
- "site" ,
667
- "name" ,
668
- "category" ,
669
- "account_id" ,
670
- "smartadserver" ,
671
- "front" ,
672
- "page" ,
673
- "view" ,
674
- "first" ,
675
- "visit" ,
676
- "platform" ,
677
- "language" ,
678
- "automatic" ,
679
- "disabled" ,
680
- "landing" ,
681
- "entertainment" ,
682
- "amazon" ,
683
- "official" ,
684
- "webvisor" ,
685
- "anonymous" ,
686
- "across" ,
687
- "narrative" ,
688
- "\":null" ,
689
- "\":false" ,
690
- "\":\"" ,
691
- "\",\"" ,
692
- "\",\"" ,
693
- ] ;
694
-
695
478
/**
696
479
* Extract cookies from onBeforeSendHeaders
697
480
*
@@ -807,9 +590,6 @@ function startListeners() {
807
590
badger . heuristicBlocking . checkForTrackingCookies ( details ) ;
808
591
}
809
592
810
- // check for pixel cookie sharing if the response appears to be for an image pixel
811
- badger . heuristicBlocking . checkForPixelCookieSharing ( details ) ;
812
-
813
593
} , { urls : [ "<all_urls>" ] } , extraInfoSpec ) ;
814
594
}
815
595
0 commit comments