@@ -11,6 +11,13 @@ const DESCRIPTION_FIELD_PATTERNS = [
1111const MIN_DESCRIPTION_LENGTH = 30 ;
1212const MAX_DESCRIPTION_LENGTH = 10000 ;
1313const SKIP_FIELDS = [ "id" , "idHash" , "link" , "guid" , "title" ] ;
14+ const SKIP_FIELD_PATTERNS = [ / ^ p r o c e s s e d : : c a t e g o r i e s $ / i, / ^ c a t e g o r i e s _ _ \d + $ / i] ;
15+
16+ function shouldSkipField ( field : string ) : boolean {
17+ if ( SKIP_FIELDS . includes ( field ) ) return true ;
18+
19+ return SKIP_FIELD_PATTERNS . some ( ( pattern ) => pattern . test ( field ) ) ;
20+ }
1421
1522function isValidDescriptionValue ( value : unknown ) : boolean {
1623 if ( ! value || typeof value !== "string" ) return false ;
@@ -28,7 +35,7 @@ function isValidDescriptionValue(value: unknown): boolean {
2835export function detectDescriptionField ( articleSample : Record < string , unknown > ) : string | null {
2936 for ( const pattern of DESCRIPTION_FIELD_PATTERNS ) {
3037 for ( const [ field , value ] of Object . entries ( articleSample ) ) {
31- if ( SKIP_FIELDS . includes ( field ) ) continue ;
38+ if ( shouldSkipField ( field ) ) continue ;
3239
3340 if ( pattern . test ( field ) && isValidDescriptionValue ( value ) ) {
3441 return field ;
@@ -40,7 +47,7 @@ export function detectDescriptionField(articleSample: Record<string, unknown>):
4047 let bestLength = MIN_DESCRIPTION_LENGTH ;
4148
4249 for ( const [ field , value ] of Object . entries ( articleSample ) ) {
43- if ( SKIP_FIELDS . includes ( field ) ) continue ;
50+ if ( shouldSkipField ( field ) ) continue ;
4451
4552 if ( isValidDescriptionValue ( value ) ) {
4653 const { length } = value as string ;
0 commit comments