@@ -6,6 +6,28 @@ import { RequestWidget } from "../../types";
66
77const BATCH_SIZE = 5000 ;
88
9+ function sanitizeRemoteValue ( value : string ) : string {
10+ if ( ! value ) return "" ;
11+
12+ return (
13+ value
14+ // Remove control characters and zero-width characters
15+ . replace ( / [ \x00 - \x1F \x7F - \x9F \uFEFF \uFFFE \uFFFF ] / g, "" )
16+ // Remove potentially dangerous characters
17+ . replace ( / [ \\ ' " ( ) ; | [ \] { } ] / g, "" )
18+ // Keep only printable characters and common emojis
19+ . replace ( / [ ^ \p{ L} \p{ N} \p{ P} \p{ Z} \p{ Emoji} \s - ] / gu, "" )
20+ // Trim whitespace
21+ . trim ( )
22+ ) ;
23+ }
24+
25+ const toArray = ( value : string | string [ ] | undefined ) => {
26+ if ( ! value ) return [ ] ;
27+ if ( Array . isArray ( value ) ) return value . map ( sanitizeRemoteValue ) ;
28+ return [ sanitizeRemoteValue ( value ) ] ;
29+ } ;
30+
931const buildData = ( doc : RequestWidget , widgets : { [ key : string ] : string } ) => {
1032 const widgetId = widgets [ doc . widgetId . toString ( ) ] ;
1133 if ( ! widgetId ) {
@@ -14,16 +36,16 @@ const buildData = (doc: RequestWidget, widgets: { [key: string]: string }) => {
1436 }
1537 const obj = {
1638 old_id : doc . _id . toString ( ) ,
17- domain : Array . isArray ( doc . query ?. domain ) ? doc . query . domain : doc . query ?. domain ? [ doc . query . domain ] : [ ] ,
18- organization : Array . isArray ( doc . query ?. organization ) ? doc . query . organization : doc . query ?. organization ? [ doc . query . organization ] : [ ] ,
19- department : Array . isArray ( doc . query ?. department ) ? doc . query . department : doc . query ?. department ? [ doc . query . department ] : [ ] ,
20- schedule : Array . isArray ( doc . query ?. schedule ) ? doc . query . schedule : doc . query ?. schedule ? [ doc . query . schedule ] : [ ] ,
21- remote : Array . isArray ( doc . query ?. remote ) ? doc . query . remote : doc . query ?. remote ? [ doc . query . remote ] : [ ] ,
22- action : Array . isArray ( doc . query ?. action ) ? doc . query . action : doc . query ?. action ? [ doc . query . action ] : [ ] ,
23- beneficiary : Array . isArray ( doc . query ?. beneficiary ) ? doc . query . beneficiary : doc . query ?. beneficiary ? [ doc . query . beneficiary ] : [ ] ,
24- country : Array . isArray ( doc . query ?. country ) ? doc . query . country : doc . query ?. country ? [ doc . query . country ] : [ ] ,
25- minor : Array . isArray ( doc . query ?. minor ) ? doc . query . minor : doc . query ?. minor ? [ doc . query . minor ] : [ ] ,
26- accessibility : Array . isArray ( doc . query ?. accessibility ) ? doc . query . accessibility : doc . query ?. accessibility ? [ doc . query . accessibility ] : [ ] ,
39+ domain : toArray ( doc . query ?. domain ) ,
40+ organization : toArray ( doc . query ?. organization ) ,
41+ department : toArray ( doc . query ?. department ) ,
42+ schedule : toArray ( doc . query ?. schedule ) ,
43+ remote : toArray ( doc . query ?. remote ) ,
44+ action : toArray ( doc . query ?. action ) ,
45+ beneficiary : toArray ( doc . query ?. beneficiary ) ,
46+ country : toArray ( doc . query ?. country ) ,
47+ minor : toArray ( doc . query ?. minor ) ,
48+ accessibility : toArray ( doc . query ?. accessibility ) ,
2749 duration : doc . query ?. duration ? parseInt ( doc . query . duration ) : null ,
2850 start : doc . query ?. start ? new Date ( doc . query . start ) : null ,
2951 search : doc . query ?. search ?? null ,
@@ -43,52 +65,76 @@ const buildData = (doc: RequestWidget, widgets: { [key: string]: string }) => {
4365const handler = async ( ) => {
4466 try {
4567 const start = new Date ( ) ;
68+ console . log ( `[Widget-Requests] Started at ${ start . toISOString ( ) } .` ) ;
4669 let created = 0 ;
47- let page = 0 ;
70+ let offset = 20000 ;
71+ let processed = 0 ;
72+
73+ const count = await prisma . widgetQuery . count ( ) ;
74+ console . log ( `[Widget-Requests] Found ${ count } docs in database.` ) ;
75+
76+ const widgets = { } as { [ key : string ] : string } ;
77+ await prisma . widget . findMany ( { select : { id : true , old_id : true } } ) . then ( ( data ) => data . forEach ( ( d ) => ( widgets [ d . old_id ] = d . id ) ) ) ;
4878
4979 // Get data from 2 weeks ago
5080 const where = { createdAt : { $gte : new Date ( Date . now ( ) - 1000 * 60 * 60 * 24 * 7 ) } } ;
51- const total = await RequestWidgetModel . countDocuments ( where ) ;
52- let data = await RequestWidgetModel . find ( where )
53- . limit ( BATCH_SIZE )
54- . skip ( page * BATCH_SIZE )
55- . lean ( ) ;
56- console . log ( `[Widget-Requests] Found ${ total } docs to sync.` ) ;
81+ const countToSync = await RequestWidgetModel . countDocuments ( where ) ;
82+ console . log ( `[Widget-Requests] Found ${ countToSync } docs to sync.` ) ;
5783
58- const stored = await prisma . widgetQuery . count ( ) ;
59- console . log ( `[Widget-Requests] Found ${ stored } docs in database.` ) ;
84+ while ( true ) {
85+ const data = await RequestWidgetModel . find ( where ) . limit ( BATCH_SIZE ) . skip ( offset ) . lean ( ) ;
6086
61- const widgets = { } as { [ key : string ] : string } ;
62- await prisma . widget . findMany ( { select : { id : true , old_id : true } } ) . then ( ( data ) => data . forEach ( ( d ) => ( widgets [ d . old_id ] = d . id ) ) ) ;
87+ if ( data . length === 0 ) {
88+ break ;
89+ }
90+
91+ console . log ( `[Widget-Requests] Processing ${ data . length } docs.` ) ;
92+
93+ const dataToCreate : PgWidgetQuery [ ] = [ ] ;
94+
95+ const stored = { } as { [ key : string ] : boolean } ;
96+ await prisma . widgetQuery
97+ . findMany ( {
98+ where : { old_id : { in : data . map ( ( hit ) => hit . _id . toString ( ) ) } } ,
99+ select : { old_id : true } ,
100+ } )
101+ . then ( ( data ) => data . forEach ( ( d ) => ( stored [ d . old_id ] = true ) ) ) ;
63102
64- while ( data && data . length ) {
65- const dataToCreate = [ ] ;
66103 for ( const doc of data ) {
104+ if ( stored [ doc . _id . toString ( ) ] ) {
105+ continue ;
106+ }
67107 const obj = buildData ( doc as RequestWidget , widgets ) ;
68108 if ( ! obj ) {
69109 continue ;
70110 }
111+
71112 dataToCreate . push ( obj ) ;
72113 }
73114
74115 // Create data
75116 if ( dataToCreate . length ) {
76117 console . log ( `[Widget-Requests] Creating ${ dataToCreate . length } docs...` ) ;
77- const res = await prisma . widgetQuery . createMany ( {
78- data : dataToCreate ,
79- skipDuplicates : true ,
80- } ) ;
81- created += res . count ;
82- console . log ( `[Widget-Requests] Created ${ res . count } docs.` ) ;
118+ try {
119+ const res = await prisma . widgetQuery . createMany ( {
120+ data : dataToCreate ,
121+ skipDuplicates : true ,
122+ } ) ;
123+ created += res . count ;
124+ console . log ( `[Widget-Requests] Created ${ res . count } docs.` ) ;
125+ } catch ( error ) {
126+ console . log ( error ) ;
127+ console . log ( JSON . stringify ( dataToCreate , null , 2 ) ) ;
128+ throw error ;
129+ // captureException(error, "[Widget-Requests] Error while creating docs.");
130+ }
83131 }
84132
85- page ++ ;
86- data = await RequestWidgetModel . find ( where )
87- . limit ( BATCH_SIZE )
88- . skip ( page * BATCH_SIZE )
89- . lean ( ) ;
133+ processed += data . length ;
134+ offset += BATCH_SIZE ;
90135 }
91136
137+ console . log ( `[Widget-Requests] Processed ${ processed } docs, ${ created } created` ) ;
92138 console . log ( `[Widget-Requests] Ended at ${ new Date ( ) . toISOString ( ) } in ${ ( Date . now ( ) - start . getTime ( ) ) / 1000 } s.` ) ;
93139 return { created } ;
94140 } catch ( error ) {
0 commit comments