@@ -192,178 +192,6 @@ HeuristicBlocker.prototype = {
192192 }
193193 } ,
194194
195- /**
196- * Calls the pixel cookie sharing checking function
197- * iff the request is for an image in the top-level frame,
198- * and the request URL has querystring parameters.
199- *
200- * @param {Object } details webRequest onResponseStarted details object
201- */
202- checkForPixelCookieSharing : function ( details ) {
203- if ( ! badger . isLearningEnabled ( details . tabId ) ) {
204- return ;
205- }
206-
207- if ( details . type != 'image' || details . frameId !== 0 || details . url . indexOf ( '?' ) == - 1 ) {
208- return ;
209- }
210-
211- let self = this ,
212- tab_base = self . tabOrigins [ details . tabId ] ;
213- if ( ! tab_base ) {
214- return ;
215- }
216- let tab_url = self . tabUrls [ details . tabId ] ;
217-
218- let request_host = ( new URI ( details . url ) ) . host ;
219- // CNAME uncloaking
220- if ( utils . hasOwn ( badger . cnameDomains , request_host ) ) {
221- request_host = badger . cnameDomains [ request_host ] ;
222- }
223- let request_base = getBaseDomain ( request_host ) ;
224-
225- let initiator_url = getInitiatorUrl ( tab_url , details ) ;
226- if ( initiator_url ) {
227- tab_url = initiator_url ;
228- tab_base = getBaseDomain ( extractHostFromURL ( initiator_url ) ) ;
229- }
230-
231- // ignore first-party requests
232- if ( ! utils . isThirdPartyDomain ( request_base , tab_base ) ) {
233- return ;
234- }
235-
236- // short-circuit if we already observed this eTLD+1 tracking on this site
237- let firstParties = self . storage . getStore ( 'snitch_map' ) . getItem ( request_base ) ;
238- if ( firstParties && firstParties . includes ( tab_base ) ) {
239- return ;
240- }
241-
242- // short-circuit if we already made a decision for this FQDN
243- let action = self . storage . getBestAction ( request_host ) ;
244- if ( action != constants . NO_TRACKING && action != constants . ALLOW ) {
245- return ;
246- }
247-
248- // get all non-HttpOnly cookies for the top-level frame
249- // and pass those to the pixel cookie-share accounting function
250- let config = {
251- url : tab_url
252- } ;
253- if ( badger . firstPartyDomainPotentiallyRequired ) {
254- config . firstPartyDomain = null ;
255- }
256- chrome . cookies . getAll ( config , function ( cookies ) {
257- cookies = cookies . filter ( cookie => ! cookie . httpOnly ) ;
258- if ( cookies . length < 1 ) {
259- return ;
260- }
261-
262- // TODO refactor with new URI() above?
263- let searchParams = ( new URL ( details . url ) ) . searchParams ;
264-
265- self . pixelCookieShareAccounting ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) ;
266- } ) ;
267- } ,
268-
269- /**
270- * Checks for cookie sharing: requests to third-party domains
271- * that include high entropy data from first-party cookies.
272- *
273- * Only catches plain-text verbatim sharing (b64 encoding etc. defeat it).
274- *
275- * Assumes any long string that doesn't contain URL fragments
276- * or stopwords is an identifier.
277- *
278- * Doesn't catch cookie syncing (3rd party -> 3rd party),
279- * but most of those tracking cookies should be blocked anyway.
280- */
281- pixelCookieShareAccounting : function ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) {
282- const TRACKER_ENTROPY_THRESHOLD = 33 ,
283- MIN_STR_LEN = 8 ;
284-
285- let self = this ;
286-
287- for ( let p of searchParams ) {
288- let key = p [ 0 ] ,
289- value = p [ 1 ] ;
290-
291- // the argument must be sufficiently long
292- if ( ! value || value . length < MIN_STR_LEN ) {
293- continue ;
294- }
295-
296- // check if this argument is derived from a high-entropy first-party cookie
297- for ( let cookie of cookies ) {
298- // the cookie value must be sufficiently long
299- if ( ! cookie . value || cookie . value . length < MIN_STR_LEN ) {
300- continue ;
301- }
302-
303- // find the longest common substring between this arg and the cookies
304- // associated with the document
305- let substrings = utils . findCommonSubstrings ( cookie . value , value ) || [ ] ;
306- for ( let s of substrings ) {
307- // ignore the substring if it's part of the first-party URL. sometimes
308- // content servers take the url of the page they're hosting content
309- // for as an argument. e.g.
310- // https://example-cdn.com/content?u=http://example.com/index.html
311- if ( tab_url . indexOf ( s ) != - 1 ) {
312- continue ;
313- }
314-
315- // elements of the user agent string are also commonly included in
316- // both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
317- // This is not a special tracking risk since third parties can see
318- // this info anyway.
319- if ( navigator . userAgent . indexOf ( s ) != - 1 ) {
320- continue ;
321- }
322-
323- // Sometimes the entire url and then some is included in the
324- // substring -- the common string might be "https://example.com/:true"
325- // In that case, we only care about the information around the URL.
326- if ( s . indexOf ( tab_url ) != - 1 ) {
327- s = s . replace ( tab_url , "" ) ;
328- }
329-
330- // During testing we found lots of common values like "homepage",
331- // "referrer", etc. were being flagged as high entropy. This searches
332- // for a few of those and removes them before we go further.
333- let lower = s . toLowerCase ( ) ;
334- lowEntropyQueryValues . forEach ( function ( qv ) {
335- let start = lower . indexOf ( qv ) ;
336- if ( start != - 1 ) {
337- s = s . replace ( s . substring ( start , start + qv . length ) , "" ) ;
338- }
339- } ) ;
340-
341- // at this point, since we might have removed things, make sure the
342- // string is still long enough to bother with
343- if ( s . length < MIN_STR_LEN ) {
344- continue ;
345- }
346-
347- // compute the entropy of this common substring. if it's greater than
348- // our threshold, record the tracking action and exit the function.
349- let entropy = utils . estimateMaxEntropy ( s ) ;
350- if ( entropy > TRACKER_ENTROPY_THRESHOLD ) {
351- log ( "Found high-entropy cookie share from" , tab_base , "to" , request_host ,
352- ":" , entropy , "bits\n cookie:" , cookie . name , '=' , cookie . value ,
353- "\n arg:" , key , "=" , value , "\n substring:" , s ) ;
354- self . _recordPrevalence ( request_host , request_base , tab_base ) ;
355-
356- // record pixel cookie sharing
357- badger . storage . recordTrackingDetails (
358- request_base , tab_base , 'pixelcookieshare' ) ;
359-
360- return ;
361- }
362- }
363- }
364- }
365- } ,
366-
367195 /**
368196 * Wraps _recordPrevalence for use outside of webRequest listeners.
369197 *
@@ -660,51 +488,6 @@ var lowEntropyCookieValues = {
660488 "zu" :8
661489} ;
662490
663- const lowEntropyQueryValues = [
664- "https" ,
665- "http" ,
666- "://" ,
667- "%3A%2F%2F" ,
668- "www" ,
669- "url" ,
670- "undefined" ,
671- "impression" ,
672- "session" ,
673- "homepage" ,
674- "client" ,
675- "version" ,
676- "business" ,
677- "title" ,
678- "get" ,
679- "site" ,
680- "name" ,
681- "category" ,
682- "account_id" ,
683- "smartadserver" ,
684- "front" ,
685- "page" ,
686- "view" ,
687- "first" ,
688- "visit" ,
689- "platform" ,
690- "language" ,
691- "automatic" ,
692- "disabled" ,
693- "landing" ,
694- "entertainment" ,
695- "amazon" ,
696- "official" ,
697- "webvisor" ,
698- "anonymous" ,
699- "across" ,
700- "narrative" ,
701- "\":null" ,
702- "\":false" ,
703- "\":\"" ,
704- "\",\"" ,
705- "\",\"" ,
706- ] ;
707-
708491/**
709492 * Extract cookies from onBeforeSendHeaders
710493 *
@@ -820,9 +603,6 @@ function startListeners() {
820603 badger . heuristicBlocking . checkForTrackingCookies ( details ) ;
821604 }
822605
823- // check for pixel cookie sharing if the response appears to be for an image pixel
824- badger . heuristicBlocking . checkForPixelCookieSharing ( details ) ;
825-
826606 } , { urls : [ "http://*/*" , "https://*/*" ] } , extraInfoSpec ) ;
827607}
828608
0 commit comments