Skip to content

Commit 75e52c1

Browse files
committed
Remove pixel cookie sharing detection for now
1 parent b2e52ba commit 75e52c1

File tree

4 files changed

+3
-284
lines changed

4 files changed

+3
-284
lines changed

src/js/heuristicblocking.js

+3-162
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,8 @@ HeuristicBlocker.prototype = {
101101
* Use updateTrackerPrevalence for non-webRequest initiated bookkeeping.
102102
*
103103
* @param {Object} details request/response details
104-
* @param {Boolean} check_for_cookie_share whether to check for cookie sharing
105104
*/
106-
heuristicBlockingAccounting: function (details, check_for_cookie_share) {
105+
heuristicBlockingAccounting: function (details) {
107106
// ignore requests that are outside a tabbed window
108107
if (details.tabId < 0 || !badger.isLearningEnabled(details.tabId)) {
109108
return {};
@@ -144,119 +143,6 @@ HeuristicBlocker.prototype = {
144143
self._recordPrevalence(request_host, request_origin, tab_origin);
145144
return {};
146145
}
147-
148-
// check for cookie sharing iff this is an image in the top-level frame, and the request URL has parameters
149-
if (check_for_cookie_share && details.type == 'image' && details.frameId === 0 && details.url.indexOf('?') > -1) {
150-
// get all non-HttpOnly cookies for the top-level frame
151-
// and pass those to the cookie-share accounting function
152-
let tab_url = self.tabUrls[details.tabId];
153-
154-
let config = {
155-
url: tab_url
156-
};
157-
if (badger.firstPartyDomainPotentiallyRequired) {
158-
config.firstPartyDomain = null;
159-
}
160-
161-
chrome.cookies.getAll(config, function (cookies) {
162-
cookies = cookies.filter(cookie => !cookie.httpOnly);
163-
if (cookies.length >= 1) {
164-
self.pixelCookieShareAccounting(tab_url, tab_origin, details.url, request_host, request_origin, cookies);
165-
}
166-
});
167-
}
168-
},
169-
170-
/**
171-
* Checks for cookie sharing: requests to third-party domains that include
172-
* high entropy data from first-party cookies (associated with the top-level
173-
* frame). Only catches plain-text verbatim sharing (b64 encoding + the like
174-
* defeat it). Assumes any long string that doesn't contain URL fragments or
175-
* stopwords is an identifier. Doesn't catch cookie syncing (3rd party -> 3rd
176-
* party), but most of those tracking cookies should be blocked anyway.
177-
*
178-
* @param details are those from onBeforeSendHeaders
179-
* @param cookies are the result of chrome.cookies.getAll()
180-
* @returns {*}
181-
*/
182-
pixelCookieShareAccounting: function (tab_url, tab_origin, request_url, request_host, request_origin, cookies) {
183-
let params = (new URL(request_url)).searchParams,
184-
TRACKER_ENTROPY_THRESHOLD = 33,
185-
MIN_STR_LEN = 8;
186-
187-
for (let p of params) {
188-
let key = p[0],
189-
value = p[1];
190-
191-
// the argument must be sufficiently long
192-
if (!value || value.length < MIN_STR_LEN) {
193-
continue;
194-
}
195-
196-
// check if this argument is derived from a high-entropy first-party cookie
197-
for (let cookie of cookies) {
198-
// the cookie value must be sufficiently long
199-
if (!cookie.value || cookie.value.length < MIN_STR_LEN) {
200-
continue;
201-
}
202-
203-
// find the longest common substring between this arg and the cookies
204-
// associated with the document
205-
let substrings = utils.findCommonSubstrings(cookie.value, value) || [];
206-
for (let s of substrings) {
207-
// ignore the substring if it's part of the first-party URL. sometimes
208-
// content servers take the url of the page they're hosting content
209-
// for as an argument. e.g.
210-
// https://example-cdn.com/content?u=http://example.com/index.html
211-
if (tab_url.indexOf(s) != -1) {
212-
continue;
213-
}
214-
215-
// elements of the user agent string are also commonly included in
216-
// both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
217-
// This is not a special tracking risk since third parties can see
218-
// this info anyway.
219-
if (navigator.userAgent.indexOf(s) != -1) {
220-
continue;
221-
}
222-
223-
// Sometimes the entire url and then some is included in the
224-
// substring -- the common string might be "https://example.com/:true"
225-
// In that case, we only care about the information around the URL.
226-
if (s.indexOf(tab_url) != -1) {
227-
s = s.replace(tab_url, "");
228-
}
229-
230-
// During testing we found lots of common values like "homepage",
231-
// "referrer", etc. were being flagged as high entropy. This searches
232-
// for a few of those and removes them before we go further.
233-
let lower = s.toLowerCase();
234-
lowEntropyQueryValues.forEach(function (qv) {
235-
let start = lower.indexOf(qv);
236-
if (start != -1) {
237-
s = s.replace(s.substring(start, start + qv.length), "");
238-
}
239-
});
240-
241-
// at this point, since we might have removed things, make sure the
242-
// string is still long enough to bother with
243-
if (s.length < MIN_STR_LEN) {
244-
continue;
245-
}
246-
247-
// compute the entropy of this common substring. if it's greater than
248-
// our threshold, record the tracking action and exit the function.
249-
let entropy = utils.estimateMaxEntropy(s);
250-
if (entropy > TRACKER_ENTROPY_THRESHOLD) {
251-
log("Found high-entropy cookie share from", tab_origin, "to", request_host,
252-
":", entropy, "bits\n cookie:", cookie.name, '=', cookie.value,
253-
"\n arg:", key, "=", value, "\n substring:", s);
254-
this._recordPrevalence(request_host, request_origin, tab_origin);
255-
return;
256-
}
257-
}
258-
}
259-
}
260146
},
261147

262148
/**
@@ -552,51 +438,6 @@ var lowEntropyCookieValues = {
552438
"zu":8
553439
};
554440

555-
const lowEntropyQueryValues = [
556-
"https",
557-
"http",
558-
"://",
559-
"%3A%2F%2F",
560-
"www",
561-
"url",
562-
"undefined",
563-
"impression",
564-
"session",
565-
"homepage",
566-
"client",
567-
"version",
568-
"business",
569-
"title",
570-
"get",
571-
"site",
572-
"name",
573-
"category",
574-
"account_id",
575-
"smartadserver",
576-
"front",
577-
"page",
578-
"view",
579-
"first",
580-
"visit",
581-
"platform",
582-
"language",
583-
"automatic",
584-
"disabled",
585-
"landing",
586-
"entertainment",
587-
"amazon",
588-
"official",
589-
"webvisor",
590-
"anonymous",
591-
"across",
592-
"narrative",
593-
"\":null",
594-
"\":false",
595-
"\":\"",
596-
"\",\"",
597-
"\",\"",
598-
];
599-
600441
/**
601442
* Extract cookies from onBeforeSendHeaders
602443
*
@@ -686,7 +527,7 @@ function startListeners() {
686527
extraInfoSpec.push('extraHeaders');
687528
}
688529
chrome.webRequest.onBeforeSendHeaders.addListener(function(details) {
689-
return badger.heuristicBlocking.heuristicBlockingAccounting(details, true);
530+
return badger.heuristicBlocking.heuristicBlockingAccounting(details);
690531
}, {urls: ["<all_urls>"]}, extraInfoSpec);
691532

692533
/**
@@ -705,7 +546,7 @@ function startListeners() {
705546
}
706547
}
707548
if (hasSetCookie) {
708-
return badger.heuristicBlocking.heuristicBlockingAccounting(details, false);
549+
return badger.heuristicBlocking.heuristicBlockingAccounting(details);
709550
}
710551
},
711552
{urls: ["<all_urls>"]}, extraInfoSpec);

src/js/utils.js

-43
Original file line numberDiff line numberDiff line change
@@ -200,48 +200,6 @@ function estimateMaxEntropy(str) {
200200
return max_bits;
201201
}
202202

203-
// Adapted from https://gist.github.com/jaewook77/cd1e3aa9449d7ea4fb4f
204-
// Find all common substrings more than 8 characters long, using DYNAMIC
205-
// PROGRAMMING
206-
function findCommonSubstrings(str1, str2) {
207-
/*
208-
Let D[i,j] be the length of the longest matching string suffix between
209-
str1[1]..str1[i] and a segment of str2 between str2[1]..str2[j].
210-
If the ith character in str1 doesn’t match the jth character in str2, then
211-
D[i,j] is zero to indicate that there is no matching suffix
212-
*/
213-
214-
// we only care about strings >= 8 chars
215-
let D = [], LCS = [], LCS_MIN = 8;
216-
217-
// runs in O(M x N) time!
218-
for (let i = 0; i < str1.length; i++) {
219-
D[i] = [];
220-
for (let j = 0; j < str2.length; j++) {
221-
if (str1[i] == str2[j]) {
222-
if (i == 0 || j == 0) {
223-
D[i][j] = 1;
224-
} else {
225-
D[i][j] = D[i-1][j-1] + 1;
226-
}
227-
228-
// store all common substrings longer than the minimum length
229-
if (D[i][j] == LCS_MIN) {
230-
LCS.push(str1.substring(i-D[i][j]+1, i+1));
231-
} else if (D[i][j] > LCS_MIN) {
232-
// remove the shorter substring and add the new, longer one
233-
LCS.pop();
234-
LCS.push(str1.substring(i-D[i][j]+1, i+1));
235-
}
236-
} else {
237-
D[i][j] = 0;
238-
}
239-
}
240-
}
241-
242-
return LCS;
243-
}
244-
245203
function oneSecond() {
246204
return 1000;
247205
}
@@ -468,7 +426,6 @@ let exports = {
468426
arrayBufferToBase64,
469427
estimateMaxEntropy,
470428
explodeSubdomains,
471-
findCommonSubstrings,
472429
getHostFromDomainInput,
473430
isRestrictedUrl,
474431
isThirdPartyDomain,

src/tests/tests/utils.js

-24
Original file line numberDiff line numberDiff line change
@@ -487,30 +487,6 @@ QUnit.test("getHostFromDomainInput", assert => {
487487
);
488488
});
489489

490-
// Tests algorithm used in the pixel tracking heuristic
491-
// It should return a common substring between two given values
492-
QUnit.test("findCommonSubstrings", assert => {
493-
494-
assert.deepEqual(
495-
utils.findCommonSubstrings('www.foo.bar', 'www.foob.ar'),
496-
[],
497-
"substrings under the length threshold of 8 are ignored"
498-
);
499-
500-
assert.equal(
501-
utils.findCommonSubstrings('foobar.com/foo/fizz/buzz/bar', 'foobar.com/foo/bizz/fuzz/bar')[0],
502-
'foobar.com/foo/',
503-
"returns longest matching value from the pair of URLs"
504-
);
505-
506-
assert.deepEqual(
507-
utils.findCommonSubstrings('foobar.com/fizz/buzz/bar/foo', 'foobar.com/fizzbuzz/buzz/bar/foo'),
508-
['foobar.com/fizz', "zz/buzz/bar/foo"],
509-
"returns multiple substrings if multiple are present in comparison"
510-
);
511-
512-
});
513-
514490
// used in pixel tracking heuristic, given a string the estimateMaxEntropy function
515491
// will return the estimated entropy value from it, based on logic parsing the string's length,
516492
// and classes of character complication included in the string

tests/selenium/cookie_sharing_test.py

-55
This file was deleted.

0 commit comments

Comments
 (0)