1- import { XMLParser } from "fast-xml-parser" ;
2-
31import { captureException } from "../../error" ;
42import ImportModel from "../../models/import" ;
53import PublisherModel from "../../models/publisher" ;
64
7- import { Schema } from "mongoose" ;
85import MissionModel from "../../models/mission" ;
9- import { Import , Mission , MissionXML , Publisher } from "../../types" ;
6+ import { Import , Mission , Publisher } from "../../types" ;
107import { enrichWithGeoloc } from "./geoloc" ;
11- import { buildMission } from "./mission" ;
8+ import { buildData } from "./mission" ;
129import { verifyOrganization } from "./organization" ;
13- import { bulkDB } from "./utils/db" ;
14-
15- const parseXML = ( xmlString : string ) => {
16- const parser = new XMLParser ( ) ;
17-
18- const options = {
19- attributeNamePrefix : "@_" ,
20- textNodeName : "#text" ,
21- ignoreAttributes : true ,
22- ignoreNameSpace : false ,
23- allowBooleanAttributes : false ,
24- parseNodeValue : true ,
25- parseAttributeValue : false ,
26- trimValues : true ,
27- cdataPositionChar : "\\c" ,
28- parseTrueNumberOnly : false ,
29- arrayMode : false , //"strict"
30- stopNodes : [ "parse-me-as-string" ] ,
31- isArray : ( name : string , jpath : string , isLeafNode : boolean , isAttribute : boolean ) => {
32- if ( jpath === "source.mission.addresses.address" ) {
33- return true ;
34- }
35- return false ;
36- } ,
37- } ;
38-
39- const res = parser . parse ( xmlString , options ) ;
40-
41- if ( ! res . source || ! res . source . mission ) {
42- return ;
43- }
44- if ( res . source . mission && ! Array . isArray ( res . source . mission ) ) {
45- res . source . mission = [ res . source . mission ] ;
46- }
47-
48- // Remove duplicates clientId
49- const clientId = new Set ( ) ;
50- const unique = [ ] as MissionXML [ ] ;
51- const data = res . source . mission as MissionXML [ ] ;
10+ import { bulkDB , cleanDB } from "./utils/db" ;
11+ import { parseXML } from "./utils/xml" ;
5212
53- data . forEach ( ( mission ) => {
54- if ( ! clientId . has ( mission . clientId ) ) {
55- const addresses = mission . addresses as any ;
56- if ( addresses ?. address && Array . isArray ( addresses . address ) ) {
57- mission . addresses = addresses . address ;
58- } else if ( addresses ?. address ) {
59- mission . addresses = [ addresses . address ] ;
60- }
61- clientId . add ( mission . clientId ) ;
62- unique . push ( mission ) ;
63- }
64- } ) ;
65-
66- return unique ;
67- } ;
68-
69- const buildData = async ( startTime : Date , publisher : Publisher , missionXML : MissionXML ) => {
70- try {
71- const missionDB = await MissionModel . findOne ( {
72- publisherId : publisher . _id ,
73- clientId : missionXML . clientId ,
74- } ) ;
75-
76- const mission = buildMission ( publisher , missionXML , missionDB ?. toObject ( ) ) ;
77- if ( missionDB ) {
78- mission . _id = missionDB . _id as Schema . Types . ObjectId ;
79- mission . createdAt = missionDB . createdAt ;
80- }
81- mission . deleted = false ;
82- mission . deletedAt = null ;
83- mission . lastSyncAt = startTime ;
84- mission . publisherId = publisher . _id . toString ( ) ;
85- mission . publisherName = publisher . name ;
86- mission . publisherLogo = publisher . logo ;
87- mission . publisherUrl = publisher . url ;
88- mission . updatedAt = startTime ;
89-
90- mission . organizationVerificationStatus = missionDB ?. organizationVerificationStatus ;
91- if ( missionDB && missionDB . statusCommentHistoric && Array . isArray ( missionDB . statusCommentHistoric ) ) {
92- if ( missionDB . statusCode !== mission . statusCode ) {
93- mission . statusCommentHistoric = [ ...missionDB . statusCommentHistoric , { status : mission . statusCode , comment : mission . statusComment , date : mission . updatedAt } ] ;
94- }
95- } else {
96- mission . statusCommentHistoric = [ { status : mission . statusCode , comment : mission . statusComment , date : mission . updatedAt } ] ;
97- }
98-
99- return mission ;
100- } catch ( error ) {
101- console . log ( "ici" , error ) ;
102- captureException ( error , `Error while parsing mission ${ missionXML . clientId } ` ) ;
103- }
104- } ;
13+ const CHUNK_SIZE = 2000 ;
10514
10615const importPublisher = async ( publisher : Publisher , start : Date ) => {
10716 if ( ! publisher ) {
@@ -134,9 +43,7 @@ const importPublisher = async (publisher: Publisher, start: Date) => {
13443 console . log ( `[${ publisher . name } ] Parse xml from ${ publisher . feed } ` ) ;
13544 const missionsXML = parseXML ( xml ) ;
13645 if ( ! missionsXML || ! missionsXML . length ) {
137- console . log ( `[${ publisher . name } ] Empty xml` ) ;
138-
139- console . log ( `[${ publisher . name } ] Mongo cleaning...` ) ;
46+ console . log ( `[${ publisher . name } ] Empty xml, mongo cleaning...` ) ;
14047 const mongoRes = await MissionModel . updateMany ( { publisherId : publisher . _id , deletedAt : null , updatedAt : { $lt : start } } , { deleted : true , deletedAt : new Date ( ) } ) ;
14148 console . log ( `[${ publisher . name } ] Mongo cleaning deleted ${ mongoRes . modifiedCount } ` ) ;
14249 obj . endedAt = new Date ( ) ;
@@ -147,75 +54,59 @@ const importPublisher = async (publisher: Publisher, start: Date) => {
14754 // GET COUNT MISSIONS IN DB
14855 const missionsDB = await MissionModel . countDocuments ( {
14956 publisherId : publisher . _id ,
150- deleted : false ,
57+ deletedAt : null ,
15158 } ) ;
15259 console . log ( `[${ publisher . name } ] Found ${ missionsDB } missions in DB` ) ;
15360
154- // BUILD NEW MISSIONS
155- const missions = [ ] as Mission [ ] ;
156- const promises = [ ] as Promise < Mission | undefined > [ ] ;
157- for ( let j = 0 ; j < missionsXML . length ; j ++ ) {
158- const missionXML = missionsXML [ j ] ;
159- promises . push ( buildData ( obj . startedAt , publisher , missionXML ) ) ;
160-
161- if ( j % 50 === 0 ) {
61+ for ( let i = 0 ; i < missionsXML . length ; i += CHUNK_SIZE ) {
62+ console . log ( `[${ publisher . name } ] Processing chunk ${ i / CHUNK_SIZE + 1 } of ${ Math . ceil ( missionsXML . length / CHUNK_SIZE ) } ` ) ;
63+ const chunk = missionsXML . slice ( i , i + CHUNK_SIZE ) ;
64+ // BUILD NEW MISSIONS
65+ const missions = [ ] as Mission [ ] ;
66+ const promises = [ ] as Promise < Mission | undefined > [ ] ;
67+ for ( let j = 0 ; j < chunk . length ; j ++ ) {
68+ const missionXML = chunk [ j ] ;
69+ promises . push ( buildData ( obj . startedAt , publisher , missionXML ) ) ;
70+
71+ if ( j % 50 === 0 ) {
72+ const res = await Promise . all ( promises ) ;
73+ res . filter ( ( e ) => e !== undefined ) . forEach ( ( e : Mission ) => missions . push ( e ) ) ;
74+ promises . length = 0 ;
75+ }
76+ }
77+ if ( promises . length > 0 ) {
16278 const res = await Promise . all ( promises ) ;
16379 res . filter ( ( e ) => e !== undefined ) . forEach ( ( e : Mission ) => missions . push ( e ) ) ;
164- promises . length = 0 ;
16580 }
166- }
167- if ( promises . length > 0 ) {
168- const res = await Promise . all ( promises ) ;
169- res . filter ( ( e ) => e !== undefined ) . forEach ( ( e : Mission ) => missions . push ( e ) ) ;
170- }
17181
172- // GEOLOC
173- const resultGeoloc = await enrichWithGeoloc ( publisher , missions ) ;
174- resultGeoloc . forEach ( ( r ) => {
175- const mission = missions . find ( ( m ) => m . clientId . toString ( ) === r . clientId . toString ( ) ) ;
176- if ( mission && r . addressIndex < mission . addresses . length ) {
177- const address = mission . addresses [ r . addressIndex ] ;
178- address . street = r . street ;
179- address . city = r . city ;
180- address . postalCode = r . postalCode ;
181- address . departmentCode = r . departmentCode ;
182- address . departmentName = r . departmentName ;
183- address . region = r . region ;
184- if ( r . location ?. lat && r . location ?. lon ) {
185- address . location = { lat : r . location . lat , lon : r . location . lon } ;
186- address . geoPoint = r . geoPoint ;
82+ // GEOLOC
83+ const resultGeoloc = await enrichWithGeoloc ( publisher , missions ) ;
84+ resultGeoloc . forEach ( ( r ) => {
85+ const mission = missions . find ( ( m ) => m . clientId . toString ( ) === r . clientId . toString ( ) ) ;
86+ if ( mission && r . addressIndex < mission . addresses . length ) {
87+ const address = mission . addresses [ r . addressIndex ] ;
88+ address . street = r . street ;
89+ address . city = r . city ;
90+ address . postalCode = r . postalCode ;
91+ address . departmentCode = r . departmentCode ;
92+ address . departmentName = r . departmentName ;
93+ address . region = r . region ;
94+ if ( r . location ?. lat && r . location ?. lon ) {
95+ address . location = { lat : r . location . lat , lon : r . location . lon } ;
96+ address . geoPoint = r . geoPoint ;
97+ }
98+ address . geolocStatus = r . geolocStatus ;
18799 }
188- address . geolocStatus = r . geolocStatus ;
189- }
190- } ) ;
100+ } ) ;
191101
192- // RNA
193- console . log ( `[Organization] Starting organization verification for ${ missions . length } missions` ) ;
194- const resultRNA = await verifyOrganization ( missions ) ;
195- console . log ( `[Organization] Received ${ resultRNA . length } verification results` ) ;
196-
197- resultRNA . forEach ( ( r ) => {
198- const mission = missions . find ( ( m ) => m . clientId . toString ( ) === r . clientId . toString ( ) ) ;
199- if ( mission ) {
200- mission . organizationId = r . organizationId ;
201- mission . organizationNameVerified = r . organizationNameVerified ;
202- mission . organizationRNAVerified = r . organizationRNAVerified ;
203- mission . organizationSirenVerified = r . organizationSirenVerified ;
204- mission . organizationSiretVerified = r . organizationSiretVerified ;
205- mission . organizationAddressVerified = r . organizationAddressVerified ;
206- mission . organizationCityVerified = r . organizationCityVerified ;
207- mission . organizationPostalCodeVerified = r . organizationPostalCodeVerified ;
208- mission . organizationDepartmentCodeVerified = r . organizationDepartmentCodeVerified ;
209- mission . organizationDepartmentNameVerified = r . organizationDepartmentNameVerified ;
210- mission . organizationRegionVerified = r . organizationRegionVerified ;
211- mission . organizationVerificationStatus = r . organizationVerificationStatus ;
212- } else {
213- console . log ( `[Organization Warning] Could not find mission for clientId: ${ r . clientId } ` ) ;
214- }
215- } ) ;
102+ // RNA
103+ await verifyOrganization ( missions ) ;
104+ // BULK WRITE
105+ await bulkDB ( missions , publisher , obj ) ;
106+ }
216107
217- // BULK WRITE
218- await bulkDB ( missions , publisher , obj ) ;
108+ // CLEAN DB
109+ await cleanDB ( publisher , obj ) ;
219110
220111 // STATS
221112 obj . missionCount = await MissionModel . countDocuments ( {
0 commit comments