@@ -34,11 +34,21 @@ public function categorize(ReleaseContext $context): CategorizationResult
3434 return $ result ;
3535 }
3636
37+ $ analysis = $ this ->inspectSignals ($ name );
38+ if ($ this ->isZeroVowelLongToken ($ analysis ['coreName ' ])) {
39+ return $ this ->matched (Category::OTHER_HASHED , 0.78 , 'gibberish_zero_vowels ' );
40+ }
41+
3742 // Check for obfuscated/encoded patterns
3843 if ($ result = $ this ->checkObfuscated ($ name )) {
3944 return $ result ;
4045 }
4146
47+ // Check low-signal names that only contain random-looking tokens
48+ if ($ result = $ this ->checkLowSignal ($ name )) {
49+ return $ result ;
50+ }
51+
4252 // Check for gibberish patterns (character-analysis heuristics)
4353 if ($ result = $ this ->checkGibberish ($ name )) {
4454 return $ result ;
@@ -57,6 +67,50 @@ public function categorize(ReleaseContext $context): CategorizationResult
5767 return $ this ->noMatch ();
5868 }
5969
70+ /**
71+ * Inspect a release name for media signal markers used by the safety-net pipe.
72+ *
73+ * @return array{coreName: string, coreLength: int, signalScore: int, markers: list<string>, lowSignal: bool}
74+ */
75+ public function inspectSignals (ReleaseContext |string $ context ): array
76+ {
77+ $ name = $ context instanceof ReleaseContext ? $ context ->releaseName : $ context ;
78+ $ cleaned = $ this ->stripExtensionsForAnalysis ($ name );
79+ $ coreName = $ this ->getCoreNameWithoutSeparators ($ cleaned );
80+
81+ $ patterns = [
82+ 'season_episode ' => '/\bS\d{1,3}[._ -]?E\d{1,4}\b/i ' ,
83+ 'season_pack ' => '/\bS\d{1,3}\b/i ' ,
84+ 'resolution ' => '/\b(480p|576p|720p|1080[pi]?|2160p|4k|uhd)\b/i ' ,
85+ 'codec ' => '/\b(x264|x265|h\.?264|h\.?265|hevc|xvid|av1)\b/i ' ,
86+ 'source ' => '/\b(bluray|bdrip|brrip|hdtv|web[._ -]?dl|web[._ -]?rip|dvdrip|remux)\b/i ' ,
87+ 'audio ' => '/\b(aac|ac3|ddp|dts|flac|mp3)\b/i ' ,
88+ 'scene_tag ' => '/\b(proper|repack|internal|limited|complete|dubbed|subbed|readnfo)\b/i ' ,
89+ 'year ' => '/\b(19|20)\d{2}\b/ ' ,
90+ 'release_group ' => '/-[A-Za-z0-9][A-Za-z0-9._-]{1,20}$/ ' ,
91+ 'known_extension ' => '/\.(mkv|avi|mp4|mp3|flac|iso|epub|pdf|exe|nzb|rar|7z)$/i ' ,
92+ ];
93+
94+ $ markers = [];
95+ foreach ($ patterns as $ marker => $ pattern ) {
96+ if (preg_match ($ pattern , $ name )) {
97+ $ markers [] = $ marker ;
98+ }
99+ }
100+
101+ $ signalScore = count ($ markers );
102+ $ isCoreToken = preg_match ('/^[A-Za-z0-9+\/_=-]+$/ ' , $ coreName ) === 1 ;
103+ $ lowSignal = $ signalScore === 0 && $ isCoreToken && strlen ($ coreName ) >= 12 ;
104+
105+ return [
106+ 'coreName ' => $ coreName ,
107+ 'coreLength ' => strlen ($ coreName ),
108+ 'signalScore ' => $ signalScore ,
109+ 'markers ' => $ markers ,
110+ 'lowSignal ' => $ lowSignal ,
111+ ];
112+ }
113+
60114 protected function checkHash (string $ name ): ?CategorizationResult
61115 {
62116 // MD5 hash (32 hex characters)
@@ -79,6 +133,10 @@ protected function checkHash(string $name): ?CategorizationResult
79133 return $ this ->matched (Category::OTHER_HASHED , 0.95 , 'hash_generic ' );
80134 }
81135
136+ if ($ this ->isBase64LikeToken ($ name )) {
137+ return $ this ->matched (Category::OTHER_HASHED , 0.9 , 'hash_base64_like ' );
138+ }
139+
82140 // Strip extensions and separators for core-name checks
83141 $ cleaned = $ this ->stripExtensionsForAnalysis ($ name );
84142 $ coreName = $ this ->getCoreNameWithoutSeparators ($ cleaned );
@@ -115,7 +173,18 @@ protected function checkObfuscated(string $name): ?CategorizationResult
115173
116174 // Only punctuation and numbers with no clear structure
117175 if ($ this ->isObfuscatedPunctuation ($ name )) {
118- return $ this ->matched (Category::OTHER_MISC , 0.5 , 'obfuscated_pattern ' );
176+ $ analysis = $ this ->inspectSignals ($ name );
177+ $ hashLike = $ this ->isBase64LikeToken ($ name )
178+ || $ this ->isBoundedGenericHash ($ name )
179+ || $ this ->isZeroVowelLongToken ($ analysis ['coreName ' ], 12 )
180+ || $ analysis ['lowSignal ' ];
181+
182+ return $ this ->matched (
183+ $ hashLike ? Category::OTHER_HASHED : Category::OTHER_MISC ,
184+ $ hashLike ? 0.75 : 0.5 ,
185+ 'obfuscated_pattern ' ,
186+ ['signal_score ' => $ analysis ['signalScore ' ], 'markers ' => $ analysis ['markers ' ]]
187+ );
119188 }
120189
121190 return null ;
@@ -146,6 +215,34 @@ protected function checkGibberish(string $name): ?CategorizationResult
146215 return $ this ->matched (Category::OTHER_HASHED , 0.7 , 'gibberish_random_digits ' );
147216 }
148217
218+ if ($ this ->isZeroVowelLongToken ($ coreName )) {
219+ return $ this ->matched (Category::OTHER_HASHED , 0.78 , 'gibberish_zero_vowels ' );
220+ }
221+
222+ return null ;
223+ }
224+
225+ protected function checkLowSignal (string $ name ): ?CategorizationResult
226+ {
227+ $ analysis = $ this ->inspectSignals ($ name );
228+
229+ if ($ this ->isZeroVowelLongToken ($ analysis ['coreName ' ])) {
230+ return null ;
231+ }
232+
233+ if ($ analysis ['lowSignal ' ] && $ analysis ['coreLength ' ] >= 20 ) {
234+ return $ this ->matched (
235+ Category::OTHER_HASHED ,
236+ 0.8 ,
237+ 'gibberish_no_signal ' ,
238+ [
239+ 'signal_score ' => $ analysis ['signalScore ' ],
240+ 'markers ' => $ analysis ['markers ' ],
241+ 'core_length ' => $ analysis ['coreLength ' ],
242+ ]
243+ );
244+ }
245+
149246 return null ;
150247 }
151248
0 commit comments