Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions functions/env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
NUM_SHARDS_INSTANCE_COUNT=
NUM_SHARDS_VOTE_COUNT=
GRAPH_API_VERSION=
ENVIRONMENT=
SIMILARITY_THRESHOLD=
TYPESENSE_HOST=
EMBEDDER_HOST=
CHECKER1_ID=
CHECKER1_TELEGRAM_ID=
CHECKER1_PHONE_NUMBER=
TYPESENSE_PORT=
TYPESENSE_PROTOCOL=
TELEGRAM_REPORT_CHANNEL_ID=
TEST_IMAGE_URL=
HASHIDS_SALT=
WEBHOOK_PATH_WHATSAPP=
WEBHOOK_PATH_TELEGRAM=
CHECKER_APP_HOST=
WEBHOOK_PATH_TYPEFORM=
CHECKERS_GROUP_LINK=
USERS_WHATSAPP_NUMBER=
CHECKERS_CHAT_ID=
TYPEFORM_URL=

#EXTERNAL APIS
VIRUS_TOTAL_API_KEY=
187 changes: 110 additions & 77 deletions functions/src/definitions/eventHandlers/userGenericMessageHandlers.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as admin from "firebase-admin"
import * as functions from "firebase-functions"
import { validateURLs } from '../../utils/utils';
import { onMessagePublished } from "firebase-functions/v2/pubsub"
import { Timestamp } from "firebase-admin/firestore"
import { checkNewlyJoined } from "../../validators/common/checkNewlyJoined"
Expand Down Expand Up @@ -70,13 +71,15 @@ const userGenericMessageHandlerWhatsapp = async function (

const isNewlyJoined = checkNewlyJoined(userSnap, messageTimestamp)

console.log(`Message is of type "${type}"`)
switch (type) {
//only two types: text or image
case "text":
// info on WhatsApp text message payload: https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages
if (!message.text) {
break
}
console.log(`Text message is "${message.text}"`)
const textNormalised = normalizeSpaces(message.text).toLowerCase() //normalise spaces needed cos of potential &nbsp when copying message on desktop whatsapp
if (
checkTemplate(
Expand All @@ -87,16 +90,17 @@ const userGenericMessageHandlerWhatsapp = async function (
textNormalised,
responses?.REFERRAL_PREPOPULATED_PREFIX_1.toLowerCase()
)

) {
step = "text_prepopulated"
if (isFirstTimeUser) {
await referralHandler(userSnap, message.text, from)
} else {
await sendMenuMessage(userSnap, "MENU_PREFIX", "whatsapp", null, null)
}
console.log(`step ${step}`)
break
}

step = await newTextInstanceHandler({
userSnap,
source: message.source,
Expand Down Expand Up @@ -169,6 +173,8 @@ async function newTextInstanceHandler({
let hasMatch = false
let messageRef: FirebaseFirestore.DocumentReference | null = null
let messageUpdateObj: MessageData | null = null
let validatedURLS: any;

const machineCategory = (await classifyText(text)) ?? "error"
if (from && isFirstTimeUser && machineCategory.includes("irrelevant")) {
await userSnap.ref.update({
Expand All @@ -181,6 +187,7 @@ async function newTextInstanceHandler({
let embedding
let textHash = hashMessage(text)
// 1 - check if the exact same message exists in database

try {
;({ embedding, similarity } = await calculateSimilarity(
text,
Expand Down Expand Up @@ -236,45 +243,58 @@ async function newTextInstanceHandler({
rationalisation = await rationaliseMessage(text, machineCategory)
}
messageRef = db.collection("messages").doc()
messageUpdateObj = {
machineCategory: machineCategory, //Can be "fake news" or "scam"
isMachineCategorised: isMachineAssessed,
originalText: text,
text: strippedMessage, //text
caption: null,
latestInstance: null,
firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type)
lastTimestamp: timestamp, //timestamp of latest instance (firestore timestamp data type)
lastRefreshedTimestamp: timestamp,
isPollStarted: false, //boolean, whether or not polling has started
isAssessed: isMachineAssessed, //boolean, whether or not we have concluded the voting
assessedTimestamp: null,
assessmentExpiry: null,
assessmentExpired: false,
truthScore: null, //float, the mean truth score
numberPointScale: 6,
isIrrelevant:
isMachineAssessed && machineCategory.includes("irrelevant")
? true
: null, //bool, if majority voted irrelevant then update this
isScam: isMachineAssessed && machineCategory === "scam" ? true : null,
isIllicit:
isMachineAssessed && machineCategory === "illicit" ? true : null,
isSpam: isMachineAssessed && machineCategory === "spam" ? true : null,
isLegitimate: null,
isUnsure: null,
isInfo: machineCategory === "info" ? true : null,
isSatire: null,
isHarmful: null,
isHarmless: null,
tags: {},
primaryCategory: isMachineAssessed
? machineCategory.split("_")[0] //in case of irrelevant_length, we want to store irrelevant
: null,
customReply: null, //string
instanceCount: 0,
rationalisation: rationalisation,

try {
validatedURLS = await validateURLs(text)
console.log('Validated URLs:', validatedURLS)

messageUpdateObj = {
machineCategory: machineCategory, //Can be "fake news" or "scam"
isMachineCategorised: isMachineAssessed,
originalText: text,
text: strippedMessage, //text
caption: null,
latestInstance: null,
firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type)
lastTimestamp: timestamp, //timestamp of latest instance (firestore timestamp data type)
lastRefreshedTimestamp: timestamp,
isPollStarted: false, //boolean, whether or not polling has started
isAssessed: isMachineAssessed, //boolean, whether or not we have concluded the voting
assessedTimestamp: null,
assessmentExpiry: null,
assessmentExpired: false,
truthScore: null, //float, the mean truth score
numberPointScale: 6,
isIrrelevant:
isMachineAssessed && machineCategory.includes("irrelevant")
? true
: null, //bool, if majority voted irrelevant then update this
isScam: isMachineAssessed && machineCategory === "scam" ? true : null,
isIllicit:
isMachineAssessed && machineCategory === "illicit" ? true : null,
isSpam: isMachineAssessed && machineCategory === "spam" ? true : null,
isLegitimate: null,
isUnsure: null,
isInfo: machineCategory === "info" ? true : null,
isSatire: null,
isHarmful: null,
isHarmless: null,
tags: {},
primaryCategory: isMachineAssessed
? machineCategory.split("_")[0] //in case of irrelevant_length, we want to store irrelevant
: null,
customReply: null, //string
instanceCount: 0,
rationalisation: rationalisation,
virtualTotalResults: validatedURLS
}
console.log('messageUpdateObj:', messageUpdateObj)
} catch (error) {
console.error('Error validating URLs:', error)
// You might want to handle the error, such as setting a default value
validatedURLS = null
}

} else {
messageRef = matchedParentMessageRef
}
Expand Down Expand Up @@ -369,6 +389,8 @@ async function newImageInstanceHandler({
let matchedInstanceSnap
let captionHash = caption ? hashMessage(caption) : null

let validatedURLS: any;

if (!mediaId) {
throw new Error(`No mediaId for whatsapp message with id ${id}`)
}
Expand Down Expand Up @@ -513,44 +535,55 @@ async function newImageInstanceHandler({
)
}
messageRef = db.collection("messages").doc()
messageUpdateObj = {
machineCategory: machineCategory,
isMachineCategorised: isMachineAssessed,
originalText: extractedMessage ?? null,
text: strippedMessage ?? null, //text
caption: caption ?? null,
latestInstance: null,
firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type)
lastTimestamp: timestamp, //timestamp of latest instance (firestore timestamp data type)
lastRefreshedTimestamp: timestamp,
isPollStarted: false, //boolean, whether or not polling has started
isAssessed: isMachineAssessed, //boolean, whether or not we have concluded the voting
assessedTimestamp: null,
assessmentExpiry: null,
assessmentExpired: false,
truthScore: null, //float, the mean truth score
numberPointScale: 6,
isIrrelevant:
isMachineAssessed && machineCategory.includes("irrelevant")
? true
: null, //bool, if majority voted irrelevant then update this
isScam: isMachineAssessed && machineCategory === "scam" ? true : null,
isIllicit:
isMachineAssessed && machineCategory === "illicit" ? true : null,
isSpam: isMachineAssessed && machineCategory === "spam" ? true : null,
isLegitimate: null,
isUnsure: null,
isInfo: !caption && machineCategory === "info" ? true : null,
isSatire: null,
isHarmful: null,
isHarmless: null,
tags: {},
primaryCategory: isMachineAssessed
? machineCategory.split("_")[0] //in case of irrelevant_length, we want to store irrelevant
: null,
customReply: null, //string
instanceCount: 0,
rationalisation: rationalisation,

try {
validatedURLS = await validateURLs(extractedMessage)
console.log('Validated URLs:', validatedURLS)

messageUpdateObj = {
machineCategory: machineCategory,
isMachineCategorised: isMachineAssessed,
originalText: extractedMessage ?? null,
text: strippedMessage ?? null, //text
caption: caption ?? null,
latestInstance: null,
firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type)
lastTimestamp: timestamp, //timestamp of latest instance (firestore timestamp data type)
lastRefreshedTimestamp: timestamp,
isPollStarted: false, //boolean, whether or not polling has started
isAssessed: isMachineAssessed, //boolean, whether or not we have concluded the voting
assessedTimestamp: null,
assessmentExpiry: null,
assessmentExpired: false,
truthScore: null, //float, the mean truth score
numberPointScale: 6,
isIrrelevant:
isMachineAssessed && machineCategory.includes("irrelevant")
? true
: null, //bool, if majority voted irrelevant then update this
isScam: isMachineAssessed && machineCategory === "scam" ? true : null,
isIllicit:
isMachineAssessed && machineCategory === "illicit" ? true : null,
isSpam: isMachineAssessed && machineCategory === "spam" ? true : null,
isLegitimate: null,
isUnsure: null,
isInfo: !caption && machineCategory === "info" ? true : null,
isSatire: null,
isHarmful: null,
isHarmless: null,
tags: {},
primaryCategory: isMachineAssessed
? machineCategory.split("_")[0] //in case of irrelevant_length, we want to store irrelevant
: null,
customReply: null, //string
instanceCount: 0,
rationalisation: rationalisation,
virtualTotalResults: validatedURLS
}
} catch (error) {
console.error('Error validating URLs:', error)
// You might want to handle the error, such as setting a default value
validatedURLS = null
}
} else {
if (matchType === "image" && matchedInstanceSnap) {
Expand Down
1 change: 1 addition & 0 deletions functions/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ export type MessageData = {
customReply: string | null
instanceCount: number
rationalisation: string | null // Assuming 'rationalisation' is a string; adjust as necessary if it's a different type.
virtualTotalResults: any | null
}

export type InstanceData = {
Expand Down
91 changes: 91 additions & 0 deletions functions/src/utils/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const axios = require('axios');
const { URLSearchParams } = require('url');

interface URLValidationResult {
url: string;
success: boolean;
data: any;
error?: any;
}

function addHttpsIfMissing(url: string) {
// Create a URL object to easily parse the URL
try {
const parsedUrl = new URL(url);

// If URL already has a scheme, return it as is
if (parsedUrl.protocol) {
return url;
}
} catch (e) {
// If URL parsing fails, it means it's a relative or invalid URL
// We need to handle this case
}

// Add https:// if the URL is missing a scheme
return `https://${url}`;
}

export function validateURLs(text: string): Promise<URLValidationResult[]> {
const urlRegex = /https?:\/\/[^\s/$.?#].[^\s]*/g;
const urls = text.match(urlRegex);
const results: URLValidationResult[] = [];

if (urls) {
// Create an array of promises for each URL request
const requests = urls.map((url, index) => {
url = addHttpsIfMissing(url)
console.log(`URL ${index + 1} is: "${url}"`);

const base64URL: string = Buffer.from(url).toString('base64');
const virusTotalURL: string = `https://www.virustotal.com/api/v3/urls/${base64URL}`;
console.log(`Calling API ${virusTotalURL} to get scan results of ${url}`);
const VIRUS_TOTAL_API_KEY = String(process.env.VIRUS_TOTAL_API_KEY);

//Print only the last 4 characters of the API key instead of the full key for security reasons
console.log(`VIRUS_TOTAL_API_KEY: ${VIRUS_TOTAL_API_KEY.slice(-4)}`);
const options = {
method: 'GET',
url: virusTotalURL,
headers: {
accept: 'application/json',
'x-apikey': VIRUS_TOTAL_API_KEY
}
};

return axios
.request(options)
.then((response: { data: any; }) => {
console.log(`Success calling ${virusTotalURL}`);
let data = JSON.stringify(response.data.data.attributes.total_votes)
console.error(data);
results.push({
url,
success: true,
data: data,
error: null,
});
})
.catch((error: { response: { data: any; }; }) => {
console.log(`Error calling ${virusTotalURL}`);
let data = JSON.stringify(error.response.data)
console.error(data);
results.push({
url,
success: false,
data: null,
error: data,
});
});
});

// Wait for all requests to complete and return results
return Promise.all(requests).then(() => {
console.log('All validate URLs requests completed. Results:', results);
return results;
});
} else {
// If no URLs are found, return an empty array
return Promise.resolve([]);
}
}