From 12d70efd2baab8af57419c285e945e2a4468fc3b Mon Sep 17 00:00:00 2001
From: Peter Perlepes
Date: Sat, 27 Jan 2024 16:32:40 +0200
Subject: [PATCH] Add an anonymization method to Node.js emitter and API (close
#1286)
#1287
---
.../docs/node-tracker/node-tracker.api.md | 3 +-
...server-anonymization_2024-01-25-21-37.json | 10 ++
trackers/node-tracker/src/emitter.ts | 32 +------
trackers/node-tracker/src/got_emitter.ts | 62 ++++--------
trackers/node-tracker/test/got_emitter.ts | 94 ++++++++++---------
5 files changed, 84 insertions(+), 117 deletions(-)
create mode 100644 common/changes/@snowplow/node-tracker/feature-add-nodejs-server-anonymization_2024-01-25-21-37.json
diff --git a/api-docs/docs/node-tracker/node-tracker.api.md b/api-docs/docs/node-tracker/node-tracker.api.md
index 081f51e82..83af9c3ba 100644
--- a/api-docs/docs/node-tracker/node-tracker.api.md
+++ b/api-docs/docs/node-tracker/node-tracker.api.md
@@ -213,6 +213,7 @@ export interface Emitter {
flush: () => void;
// (undocumented)
input: (payload: Payload) => void;
+ setAnonymization?: (shouldAnonymize: boolean) => void;
}
// @public
@@ -235,7 +236,7 @@ export interface FormSubmissionEvent {
}
// @public
-export function gotEmitter(endpoint: string, protocol?: HttpProtocol, port?: number, method?: HttpMethod, bufferSize?: number, retry?: number | Partial, cookieJar?: PromiseCookieJar | ToughCookieJar, callback?: (error?: RequestError, response?: Response) => void, agents?: Agents): Emitter;
+export function gotEmitter(endpoint: string, protocol?: HttpProtocol, port?: number, method?: HttpMethod, bufferSize?: number, retry?: number | Partial, cookieJar?: PromiseCookieJar | ToughCookieJar, callback?: (error?: RequestError, response?: Response) => void, agents?: Agents, serverAnonymization?: boolean): Emitter;
// @public (undocumented)
export enum HttpMethod {
diff --git a/common/changes/@snowplow/node-tracker/feature-add-nodejs-server-anonymization_2024-01-25-21-37.json b/common/changes/@snowplow/node-tracker/feature-add-nodejs-server-anonymization_2024-01-25-21-37.json
new file mode 100644
index 000000000..4a2a71151
--- /dev/null
+++ b/common/changes/@snowplow/node-tracker/feature-add-nodejs-server-anonymization_2024-01-25-21-37.json
@@ -0,0 +1,10 @@
+{
+ "changes": [
+ {
+ "packageName": "@snowplow/node-tracker",
+ "comment": "Add an anonymization method to Node.js emitter and API (close #1286)",
+ "type": "none"
+ }
+ ],
+ "packageName": "@snowplow/node-tracker"
+}
\ No newline at end of file
diff --git a/trackers/node-tracker/src/emitter.ts b/trackers/node-tracker/src/emitter.ts
index 0c66914d2..a6192ad87 100644
--- a/trackers/node-tracker/src/emitter.ts
+++ b/trackers/node-tracker/src/emitter.ts
@@ -1,38 +1,10 @@
-/*
- * Copyright (c) 2022 Snowplow Analytics Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
import { Payload } from '@snowplow/tracker-core';
export interface Emitter {
flush: () => void;
input: (payload: Payload) => void;
+ /** Set if the requests from the emitter should be anonymized. Read more about anonymization used at https://docs.snowplow.io/docs/collecting-data/collecting-from-own-applications/snowplow-tracker-protocol/going-deeper/http-headers/. */
+ setAnonymization?: (shouldAnonymize: boolean) => void;
}
export enum HttpProtocol {
diff --git a/trackers/node-tracker/src/got_emitter.ts b/trackers/node-tracker/src/got_emitter.ts
index 71ad0d3d5..bf82f6de3 100644
--- a/trackers/node-tracker/src/got_emitter.ts
+++ b/trackers/node-tracker/src/got_emitter.ts
@@ -1,33 +1,3 @@
-/*
- * Copyright (c) 2022 Snowplow Analytics Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
import util from 'util';
import got, { Response, RequestError, Agents, RequiredRetryOptions, ToughCookieJar, PromiseCookieJar } from 'got';
import { Payload, version } from '@snowplow/tracker-core';
@@ -46,6 +16,7 @@ import { Emitter, HttpProtocol, HttpMethod, preparePayload } from './emitter';
* @param cookieJar - Add a cookieJar to `got` - https://github.com/sindresorhus/got/blob/v11.5.2/readme.md#cookiejar
* @param callback - Callback called after a `got` request following retries - called with ErrorRequest (https://github.com/sindresorhus/got/blob/v11.5.2/readme.md#errors) and Response (https://github.com/sindresorhus/got/blob/v11.5.2/readme.md#response)
* @param agents - Set new http.Agent and https.Agent objects on `got` requests - https://github.com/sindresorhus/got/blob/v11.5.2/readme.md#agent
+ * @param serverAnonymization - If the request should undergo server anonymization.
*/
export function gotEmitter(
endpoint: string,
@@ -56,7 +27,8 @@ export function gotEmitter(
retry?: number | Partial,
cookieJar?: PromiseCookieJar | ToughCookieJar,
callback?: (error?: RequestError, response?: Response) => void,
- agents?: Agents
+ agents?: Agents,
+ serverAnonymization: boolean = false
): Emitter {
const maxBufferLength = bufferSize ?? (method === HttpMethod.GET ? 0 : 10);
const path = method === HttpMethod.GET ? '/i' : '/com.snowplowanalytics.snowplow/tp2';
@@ -103,6 +75,12 @@ export function gotEmitter(
return;
}
+ const headers = {
+ 'user-agent': `snowplow-nodejs-tracker/${version}`,
+ ...(serverAnonymization && { 'SP-Anonymous': '*' }),
+ ...(method === HttpMethod.POST && { 'content-type': 'application/json; charset=utf-8' }),
+ };
+
if (method === HttpMethod.POST) {
const postJson = {
schema: 'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
@@ -111,13 +89,10 @@ export function gotEmitter(
got
.post(targetUrl, {
json: postJson,
- headers: {
- 'content-type': 'application/json; charset=utf-8',
- 'user-agent': `snowplow-nodejs-tracker/${version}`,
- },
agent: agents,
- retry: retry,
- cookieJar: cookieJar,
+ headers,
+ retry,
+ cookieJar,
})
.then(handleSuccess, handleFailure);
} else {
@@ -125,12 +100,10 @@ export function gotEmitter(
got
.get(targetUrl, {
searchParams: preparePayload(bufferCopy[i]),
- headers: {
- 'user-agent': `snowplow-nodejs-tracker/${version}`,
- },
agent: agents,
- retry: retry,
- cookieJar: cookieJar,
+ headers,
+ retry,
+ cookieJar,
})
.then(handleSuccess, handleFailure);
}
@@ -148,11 +121,16 @@ export function gotEmitter(
}
};
+ const setAnonymization = (shouldAnonymize: boolean) => {
+ serverAnonymization = shouldAnonymize;
+ };
+
return {
/**
* Send all events queued in the buffer to the collector
*/
flush,
input,
+ setAnonymization,
};
}
diff --git a/trackers/node-tracker/test/got_emitter.ts b/trackers/node-tracker/test/got_emitter.ts
index d48fd2cf5..c88968e57 100644
--- a/trackers/node-tracker/test/got_emitter.ts
+++ b/trackers/node-tracker/test/got_emitter.ts
@@ -1,33 +1,3 @@
-/*
- * Copyright (c) 2022 Snowplow Analytics Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
import test from 'ava';
import sinon from 'sinon';
import nock from 'nock';
@@ -35,25 +5,61 @@ import { HttpMethod, HttpProtocol, gotEmitter } from '../src/index';
const endpoint = 'd3rkrsqld9gmqf.cloudfront.net';
-nock(new RegExp('https*://' + endpoint))
- .persist()
- .filteringPath(() => '/')
- .get('/')
- .reply(200, (uri) => uri);
-
-nock(new RegExp('https*://' + endpoint))
- .matchHeader('content-type', 'application/json; charset=utf-8')
- .persist()
- .filteringRequestBody(() => '*')
- .post('/com.snowplowanalytics.snowplow/tp2', '*')
- .reply(200, (_uri, body: Record) => (body['data'] as Array)[0]);
-
test.before(() => {
nock.disableNetConnect();
});
-test.after(() => {
+test.beforeEach(() => {
+ nock(new RegExp('https*://' + endpoint))
+ .filteringPath(() => '/')
+ .get('/')
+ .reply(200, (uri) => uri);
+
+ nock(new RegExp('https*://' + endpoint))
+ .matchHeader('content-type', 'application/json; charset=utf-8')
+ .filteringRequestBody(() => '*')
+ .post('/com.snowplowanalytics.snowplow/tp2', '*')
+ .reply(200, (_uri: string, body: Record) => (body['data'] as Array)[0]);
+});
+
+test.afterEach(() => {
+ nock.cleanAll();
+});
+
+test.serial('gotEmitter should allow anonymization headers', async (t) => {
nock.cleanAll();
+
+ nock(new RegExp('https*://' + endpoint), {
+ reqheaders: {
+ 'SP-Anonymous': '*',
+ },
+ })
+ .filteringPath(() => '/')
+ .get('/')
+ .once()
+ .reply(200, (uri) => uri);
+
+ await new Promise((resolve, reject) => {
+ const e = gotEmitter(
+ endpoint,
+ HttpProtocol.HTTPS,
+ 80,
+ HttpMethod.GET,
+ undefined,
+ undefined,
+ undefined,
+ function (error, response) {
+ nock.cleanAll();
+ t.is(error, undefined);
+ t.pass();
+ if (error) reject(error);
+ else resolve(response);
+ },
+ undefined,
+ true
+ );
+ e.input({});
+ });
});
test('gotEmitter should send an HTTP GET request', async (t) => {