@@ -193,178 +193,6 @@ HeuristicBlocker.prototype = {
193
193
}
194
194
} ,
195
195
196
- /**
197
- * Calls the pixel cookie sharing checking function
198
- * iff the request is for an image in the top-level frame,
199
- * and the request URL has querystring parameters.
200
- *
201
- * @param {Object } details webRequest onResponseStarted details object
202
- */
203
- checkForPixelCookieSharing : function ( details ) {
204
- if ( ! badger . isLearningEnabled ( details . tabId ) ) {
205
- return ;
206
- }
207
-
208
- if ( details . type != 'image' || details . frameId !== 0 || details . url . indexOf ( '?' ) == - 1 ) {
209
- return ;
210
- }
211
-
212
- let self = this ,
213
- tab_base = self . tabBases [ details . tabId ] ;
214
- if ( ! tab_base ) {
215
- return ;
216
- }
217
- let tab_url = self . tabUrls [ details . tabId ] ;
218
-
219
- let request_host = ( new URI ( details . url ) ) . host ;
220
- // CNAME uncloaking
221
- if ( utils . hasOwn ( badger . cnameDomains , request_host ) ) {
222
- request_host = badger . cnameDomains [ request_host ] ;
223
- }
224
- let request_base = getBaseDomain ( request_host ) ;
225
-
226
- let initiator_url = getInitiatorUrl ( tab_url , details ) ;
227
- if ( initiator_url ) {
228
- tab_url = initiator_url ;
229
- tab_base = getBaseDomain ( extractHostFromURL ( initiator_url ) ) ;
230
- }
231
-
232
- // ignore first-party requests
233
- if ( ! utils . isThirdPartyDomain ( request_base , tab_base ) ) {
234
- return ;
235
- }
236
-
237
- // short-circuit if we already observed this eTLD+1 tracking on this site
238
- let firstParties = self . storage . getStore ( 'snitch_map' ) . getItem ( request_base ) ;
239
- if ( firstParties && firstParties . includes ( tab_base ) ) {
240
- return ;
241
- }
242
-
243
- // short-circuit if we already made a decision for this FQDN
244
- let action = self . storage . getBestAction ( request_host ) ;
245
- if ( action != constants . NO_TRACKING && action != constants . ALLOW ) {
246
- return ;
247
- }
248
-
249
- // get all non-HttpOnly cookies for the top-level frame
250
- // and pass those to the pixel cookie-share accounting function
251
- let config = {
252
- url : tab_url
253
- } ;
254
- if ( badger . firstPartyDomainPotentiallyRequired ) {
255
- config . firstPartyDomain = null ;
256
- }
257
- chrome . cookies . getAll ( config , function ( cookies ) {
258
- cookies = cookies . filter ( cookie => ! cookie . httpOnly ) ;
259
- if ( cookies . length < 1 ) {
260
- return ;
261
- }
262
-
263
- // TODO refactor with new URI() above?
264
- let searchParams = ( new URL ( details . url ) ) . searchParams ;
265
-
266
- self . pixelCookieShareAccounting ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) ;
267
- } ) ;
268
- } ,
269
-
270
- /**
271
- * Checks for cookie sharing: requests to third-party domains
272
- * that include high entropy data from first-party cookies.
273
- *
274
- * Only catches plain-text verbatim sharing (b64 encoding etc. defeat it).
275
- *
276
- * Assumes any long string that doesn't contain URL fragments
277
- * or stopwords is an identifier.
278
- *
279
- * Doesn't catch cookie syncing (3rd party -> 3rd party),
280
- * but most of those tracking cookies should be blocked anyway.
281
- */
282
- pixelCookieShareAccounting : function ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) {
283
- const TRACKER_ENTROPY_THRESHOLD = 33 ,
284
- MIN_STR_LEN = 8 ;
285
-
286
- let self = this ;
287
-
288
- for ( let p of searchParams ) {
289
- let key = p [ 0 ] ,
290
- value = p [ 1 ] ;
291
-
292
- // the argument must be sufficiently long
293
- if ( ! value || value . length < MIN_STR_LEN ) {
294
- continue ;
295
- }
296
-
297
- // check if this argument is derived from a high-entropy first-party cookie
298
- for ( let cookie of cookies ) {
299
- // the cookie value must be sufficiently long
300
- if ( ! cookie . value || cookie . value . length < MIN_STR_LEN ) {
301
- continue ;
302
- }
303
-
304
- // find the longest common substring between this arg and the cookies
305
- // associated with the document
306
- let substrings = utils . findCommonSubstrings ( cookie . value , value ) || [ ] ;
307
- for ( let s of substrings ) {
308
- // ignore the substring if it's part of the first-party URL. sometimes
309
- // content servers take the url of the page they're hosting content
310
- // for as an argument. e.g.
311
- // https://example-cdn.com/content?u=http://example.com/index.html
312
- if ( tab_url . indexOf ( s ) != - 1 ) {
313
- continue ;
314
- }
315
-
316
- // elements of the user agent string are also commonly included in
317
- // both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
318
- // This is not a special tracking risk since third parties can see
319
- // this info anyway.
320
- if ( navigator . userAgent . indexOf ( s ) != - 1 ) {
321
- continue ;
322
- }
323
-
324
- // Sometimes the entire url and then some is included in the
325
- // substring -- the common string might be "https://example.com/:true"
326
- // In that case, we only care about the information around the URL.
327
- if ( s . indexOf ( tab_url ) != - 1 ) {
328
- s = s . replace ( tab_url , "" ) ;
329
- }
330
-
331
- // During testing we found lots of common values like "homepage",
332
- // "referrer", etc. were being flagged as high entropy. This searches
333
- // for a few of those and removes them before we go further.
334
- let lower = s . toLowerCase ( ) ;
335
- lowEntropyQueryValues . forEach ( function ( qv ) {
336
- let start = lower . indexOf ( qv ) ;
337
- if ( start != - 1 ) {
338
- s = s . replace ( s . substring ( start , start + qv . length ) , "" ) ;
339
- }
340
- } ) ;
341
-
342
- // at this point, since we might have removed things, make sure the
343
- // string is still long enough to bother with
344
- if ( s . length < MIN_STR_LEN ) {
345
- continue ;
346
- }
347
-
348
- // compute the entropy of this common substring. if it's greater than
349
- // our threshold, record the tracking action and exit the function.
350
- let entropy = utils . estimateMaxEntropy ( s ) ;
351
- if ( entropy > TRACKER_ENTROPY_THRESHOLD ) {
352
- log ( "Found high-entropy cookie share from" , tab_base , "to" , request_host ,
353
- ":" , entropy , "bits\n cookie:" , cookie . name , '=' , cookie . value ,
354
- "\n arg:" , key , "=" , value , "\n substring:" , s ) ;
355
- self . _recordPrevalence ( request_host , request_base , tab_base ) ;
356
-
357
- // record pixel cookie sharing
358
- badger . storage . recordTrackingDetails (
359
- request_base , tab_base , 'pixelcookieshare' ) ;
360
-
361
- return ;
362
- }
363
- }
364
- }
365
- }
366
- } ,
367
-
368
196
/**
369
197
* Wraps _recordPrevalence for use outside of webRequest listeners.
370
198
*
@@ -660,51 +488,6 @@ var lowEntropyCookieValues = {
660
488
"zu" :8
661
489
} ;
662
490
663
- const lowEntropyQueryValues = [
664
- "https" ,
665
- "http" ,
666
- "://" ,
667
- "%3A%2F%2F" ,
668
- "www" ,
669
- "url" ,
670
- "undefined" ,
671
- "impression" ,
672
- "session" ,
673
- "homepage" ,
674
- "client" ,
675
- "version" ,
676
- "business" ,
677
- "title" ,
678
- "get" ,
679
- "site" ,
680
- "name" ,
681
- "category" ,
682
- "account_id" ,
683
- "smartadserver" ,
684
- "front" ,
685
- "page" ,
686
- "view" ,
687
- "first" ,
688
- "visit" ,
689
- "platform" ,
690
- "language" ,
691
- "automatic" ,
692
- "disabled" ,
693
- "landing" ,
694
- "entertainment" ,
695
- "amazon" ,
696
- "official" ,
697
- "webvisor" ,
698
- "anonymous" ,
699
- "across" ,
700
- "narrative" ,
701
- "\":null" ,
702
- "\":false" ,
703
- "\":\"" ,
704
- "\",\"" ,
705
- "\",\"" ,
706
- ] ;
707
-
708
491
/**
709
492
* Extract cookies from onBeforeSendHeaders
710
493
*
@@ -820,9 +603,6 @@ function startListeners() {
820
603
badger . heuristicBlocking . checkForTrackingCookies ( details ) ;
821
604
}
822
605
823
- // check for pixel cookie sharing if the response appears to be for an image pixel
824
- badger . heuristicBlocking . checkForPixelCookieSharing ( details ) ;
825
-
826
606
} , { urls : [ "http://*/*" , "https://*/*" ] } , extraInfoSpec ) ;
827
607
}
828
608
0 commit comments