glincker
diff --git a/‎.github/workflows/ci-js.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci-js.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.npmrc‎
Lines changed: 1 addition & 0 deletions b/‎.npmrc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎package-lock.json‎
Lines changed: 564 additions & 376 deletions b/‎package-lock.json‎
Lines changed: 564 additions & 376 deletions
diff --git a/‎package.json‎
Lines changed: 10 additions & 0 deletions b/‎package.json‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎packages/js/src/filters/Filter.ts‎
Lines changed: 11 additions & 1 deletion b/‎packages/js/src/filters/Filter.ts‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎packages/js/src/nlp/contextAnalyzer.ts‎
Lines changed: 34 additions & 13 deletions b/‎packages/js/src/nlp/contextAnalyzer.ts‎
Lines changed: 34 additions & 13 deletions
diff --git a/‎packages/js/tests/context-optimization.test.ts‎
Lines changed: 33 additions & 0 deletions b/‎packages/js/tests/context-optimization.test.ts‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎packages/js/tests/repro_issue.test.ts‎
Lines changed: 53 additions & 0 deletions b/‎packages/js/tests/repro_issue.test.ts‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎packages/py/glin_profanity/data/dictionaries/Norwegian.json‎
Lines changed: 17 additions & 0 deletions b/‎packages/py/glin_profanity/data/dictionaries/Norwegian.json‎
Lines changed: 17 additions & 0 deletions
@@ -60,7 +60,7 @@ jobs:
           cache-dependency-path: package-lock.json
 
       - name: 🔧 Install dependencies
-        run: npm ci
+        run: npm ci --legacy-peer-deps
 
       - name: 📋 Lint & Type Check
         working-directory: ${{ env.PACKAGE_DIR }}
 
@@ -0,0 +1 @@
+legacy-peer-deps=true
@@ -61,6 +61,16 @@
     "react": "^18.3.1",
     "react-dom": "^18.3.1"
   },
+  "overrides": {
+    "@tensorflow-models/toxicity": {
+      "@tensorflow/tfjs-core": "^4.22.0",
+      "@tensorflow/tfjs-converter": "^4.22.0"
+    },
+    "@tensorflow/tfjs-core": "^4.22.0",
+    "@tensorflow/tfjs-converter": "^4.22.0",
+    "@tensorflow/tfjs-backend-cpu": "^4.22.0",
+    "@tensorflow/tfjs-backend-webgl": "^4.22.0"
+  },
   "devDependencies": {
     "@babel/core": "^7.25.2",
     "@babel/preset-env": "^7.25.3",
 
@@ -47,6 +47,7 @@ class Filter {
   private cacheResults: boolean;
   private maxCacheSize: number;
   private cache: Map<string, CheckProfanityResult>;
+  private regexCache: Map<string, RegExp>;
 
   /**
    * Creates a new Filter instance with the specified configuration.
@@ -113,6 +114,7 @@ class Filter {
     this.cacheResults = config?.cacheResults ?? false;
     this.maxCacheSize = config?.maxCacheSize ?? 1000;
     this.cache = new Map();
+    this.regexCache = new Map();
 
     // Build word dictionary
     let words: string[] = [];
@@ -202,6 +204,7 @@ class Filter {
    */
   public clearCache(): void {
     this.cache.clear();
+    this.regexCache.clear();
   }
 
   /**
@@ -292,10 +295,17 @@ class Filter {
   }
 
   private getRegex(word: string): RegExp {
+    if (this.regexCache.has(word)) {
+      const regex = this.regexCache.get(word)!;
+      regex.lastIndex = 0;
+      return regex;
+    }
     const flags = this.caseSensitive ? 'g' : 'gi';
     const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
     const boundary = this.wordBoundaries ? '\\b' : '';
-    return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
+    const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
+    this.regexCache.set(word, regex);
+    return regex;
   }
 
   private isFuzzyToleranceMatch(word: string, text: string): boolean {
 
@@ -41,6 +41,16 @@ const GAMING_POSITIVE = new Set([
   'build', 'loadout', 'strategy', 'tactic', 'play', 'move', 'combo'
 ]);
 
+// Words that are acceptable in gaming contexts but might be flagged otherwise
+const GAMING_ACCEPTABLE_WORDS = new Set([
+  'kill', 'killer', 'killed', 'killing',
+  'shoot', 'shot', 'shooting',
+  'die', 'dying', 'died', 'dead', 'death',
+  'badass', 'sick', 'insane', 'crazy', 'mad', 'beast', 'savage',
+  'suck', 'sucks',
+  'wtf', 'omg', 'hell', 'damn', 'crap'
+]);
+
 // Common positive phrases that might contain flagged words
 const POSITIVE_PHRASES = new Map([
   ['the bomb', 0.9], // "this movie is the bomb"
@@ -69,7 +79,9 @@ export class ContextAnalyzer {
   constructor(config: ContextConfig) {
     this.contextWindow = config.contextWindow;
     this.language = config.language;
-    this.domainWhitelists = new Set(config.domainWhitelists || []);
+    this.domainWhitelists = new Set(
+      (config.domainWhitelists || []).map(word => word.toLowerCase())
+    );
   }
 
   /**
@@ -122,12 +134,10 @@ export class ContextAnalyzer {
     };
   }
 
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   private checkPhraseContext(contextText: string, matchWord: string): ContextAnalysisResult | null {
-    // TODO: Use matchWord for more specific phrase matching in the future
     // Check positive phrases
     for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
-      if (contextText.includes(phrase)) {
+      if (phrase.includes(matchWord) && contextText.includes(phrase)) {
         return {
           contextScore: score,
           reason: `Positive phrase detected: "${phrase}"`,
@@ -136,7 +146,7 @@ export class ContextAnalyzer {
       }
     }
 
-    // Check negative phrases
+    // Check negative phrases (prefixes like "you are" that introduce profanity)
     for (const [phrase, score] of NEGATIVE_PHRASES.entries()) {
       if (contextText.includes(phrase)) {
         return {
@@ -150,25 +160,36 @@ export class ContextAnalyzer {
     return null;
   }
 
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   private isDomainWhitelisted(contextWords: string[], matchWord: string): boolean {
-    // TODO: Use matchWord for domain-specific filtering in the future
+    const normalizedMatchWord = matchWord.toLowerCase();
+
     // Check if any domain whitelist words are present
     for (const word of contextWords) {
-      if (this.domainWhitelists.has(word) || GAMING_POSITIVE.has(word)) {
+      // Check user-defined domain whitelists (permissive)
+      if (this.domainWhitelists.has(word)) {
         return true;
       }
+
+      // Check internal gaming whitelist (restrictive)
+      if (GAMING_POSITIVE.has(word)) {
+        if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
+          return true;
+        }
+      }
     }
     return false;
   }
 
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   private generateReason(score: number, contextWords: string[]): string {
-    // TODO: Use contextWords for more detailed reasoning in the future
+    const foundPositive = Array.from(new Set(contextWords.filter(word => POSITIVE_INDICATORS.has(word))));
+    const foundNegative = Array.from(new Set(contextWords.filter(word => NEGATIVE_INDICATORS.has(word))));
+
     if (score >= 0.7) {
-      return 'Positive context detected - likely not profanity';
+      const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(', ')})` : '';
+      return `Positive context detected${details} - likely not profanity`;
     } else if (score <= 0.3) {
-      return 'Negative context detected - likely profanity';
+      const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(', ')})` : '';
+      return `Negative context detected${details} - likely profanity`;
     } else {
       return 'Neutral context - uncertain classification';
     }
@@ -253,7 +274,7 @@ export class ContextAnalyzer {
    * Updates the domain whitelist for this analyzer instance
    */
   updateDomainWhitelist(newWhitelist: string[]): void {
-    this.domainWhitelists = new Set(newWhitelist);
+    this.domainWhitelists = new Set(newWhitelist.map(word => word.toLowerCase()));
   }
 
   /**
 
@@ -0,0 +1,33 @@
+import { Filter } from '../src/filters/Filter';
+
+describe('Context Optimization', () => {
+  let filter: Filter;
+
+  beforeEach(() => {
+    filter = new Filter({
+      enableContextAware: true,
+      languages: ['english'],
+    });
+  });
+
+  it('should NOT whitelist profanity based on unrelated positive phrases', () => {
+    // "the bomb" is a positive phrase for "bomb".
+    // "shit" is a profanity.
+    // If "the bomb" is present, it shouldn't whitelist "shit".
+    const text = 'The bomb exploded and shit happened';
+
+    const result = filter.checkProfanity(text);
+
+    // Should be flagged because "shit" is profanity and "the bomb" is irrelevant to "shit"
+    expect(result.containsProfanity).toBe(true);
+    expect(result.profaneWords).toContain('shit');
+  });
+
+  it('should still whitelist relevant positive phrases', () => {
+    const text = 'This movie is the bomb';
+    const result = filter.checkProfanity(text);
+
+    // Should NOT be flagged because "the bomb" is a whitelisted phrase for "bomb"
+    expect(result.containsProfanity).toBe(false);
+  });
+});
@@ -0,0 +1,53 @@
+import { Filter } from '../src/filters/Filter';
+
+describe('Domain Specific Whitelisting', () => {
+  let filter: Filter;
+
+  beforeEach(() => {
+    filter = new Filter({
+      enableContextAware: true,
+      contextWindow: 3,
+      confidenceThreshold: 0.7,
+      languages: ['english'],
+      logProfanity: false,
+    });
+  });
+
+  it('verifies baseline: profane word without context is flagged', () => {
+    const result = filter.checkProfanity('You are a bitch');
+    expect(result.containsProfanity).toBe(true);
+  });
+
+  it('correctly flags profanity even if gaming context is present (Fixed Behavior)', () => {
+    // "bitch" is profane.
+    // "player" is in GAMING_POSITIVE.
+    // "bitch" is NOT in GAMING_ACCEPTABLE_WORDS.
+    // So it should remain profane.
+    // Before the fix, "player" would cause isDomainWhitelisted to return true.
+    // After the fix, isDomainWhitelisted returns false.
+    // Sentiment analysis: "you" (negative) vs "player" (positive).
+    const text = 'You bitch player';
+    const result = filter.checkProfanity(text);
+
+    expect(result.containsProfanity).toBe(true);
+    expect(result.matches).toBeDefined();
+    expect(result.matches!.length).toBeGreaterThan(0);
+    // It should NOT be whitelisted
+    expect(result.matches![0].isWhitelisted).toBe(false);
+  });
+
+  it('whitelists acceptable gaming words in gaming context', () => {
+      const text = 'This game sucks'; // "sucks" is in GAMING_ACCEPTABLE_WORDS
+      const result = filter.checkProfanity(text);
+      expect(result.containsProfanity).toBe(false);
+      // Wait, if "sucks" is whitelisted, containsProfanity might be false OR true but isWhitelisted=true?
+      // checkProfanity implementation:
+      // if (contextResult.isWhitelisted) { continue; }
+      // So if whitelisted, it is NOT added to matches/profaneWords.
+      // So containsProfanity should be false.
+
+      const text2 = 'You are a badass player'; // "badass" is in GAMING_ACCEPTABLE_WORDS
+      const result2 = filter.checkProfanity(text2);
+      expect(result2.containsProfanity).toBe(false);
+  });
+});
@@ -0,0 +1,17 @@
+{
+  "words": [
+    "drittsekk",
+    "faen i helvete",
+    "fitte",
+    "jævla",
+    "kuk",
+    "kukene",
+    "kuker",
+    "nigger",
+    "pikk",
+    "sotrør",
+    "ståpikk",
+    "ståpikkene",
+    "ståpikker"
+  ]
+}