@@ -2,7 +2,7 @@ import fs from 'fs';
22import { pipeline } from 'stream/promises' ;
33import { Readable } from 'stream' ;
44import crypto from 'crypto' ;
5- import { Client } from '@elastic/elasticsearch' ;
5+ import elasticsearch from '@elastic/elasticsearch' ;
66
77// eslint-import-resolve does not support `exports` in package.json.
88// eslint-disable-next-line import/no-unresolved
@@ -12,7 +12,7 @@ import JSZip from 'jszip';
1212const ELASTICSEARCH_URL = 'http://localhost:62223' ;
1313const OUTPUT_DIR = './data' ;
1414
15- const client = new Client ( {
15+ const client = new elasticsearch . Client ( {
1616 node : ELASTICSEARCH_URL ,
1717} ) ;
1818
@@ -29,32 +29,24 @@ function sha256(input) {
2929async function * scanIndex ( index ) {
3030 let processedCount = 0 ;
3131
32- let initialResult = await client . search ( {
32+ const { body : initialResult } = await client . search ( {
3333 index,
3434 size : 200 ,
3535 scroll : '5m' ,
36- track_total_hits : true ,
3736 } ) ;
3837
39- const totalCount = initialResult . hits . total . value ;
40- let scrollId = initialResult . _scroll_id ;
38+ const totalCount = initialResult . hits . total ;
4139
4240 for ( const hit of initialResult . hits . hits ) {
4341 processedCount += 1 ;
4442 yield hit ;
4543 }
4644
4745 while ( processedCount < totalCount ) {
48- const scrollResult = await client . scroll ( {
49- scrollId,
46+ const { body : scrollResult } = await client . scroll ( {
47+ scrollId : initialResult . _scroll_id ,
5048 scroll : '5m' ,
5149 } ) ;
52- scrollId = scrollResult . _scroll_id ;
53-
54- if ( scrollResult . hits . hits . length === 0 ) {
55- break ;
56- }
57-
5850 for ( const hit of scrollResult . hits . hits ) {
5951 processedCount += 1 ;
6052 yield hit ;
@@ -427,61 +419,43 @@ function writeFile(fileName) {
427419/**
428420 * Main process
429421 */
430- async function main ( ) {
431- try {
432- await Promise . all ( [
433- pipeline ( scanIndex ( 'articles' ) , dumpArticles , writeFile ( 'articles.csv' ) ) ,
434- pipeline (
435- scanIndex ( 'articles' ) ,
436- dumpArticleReplies ,
437- writeFile ( 'article_replies.csv' )
438- ) ,
439- pipeline (
440- scanIndex ( 'articles' ) ,
441- dumpArticleHyperlinks ,
442- writeFile ( 'article_hyperlinks.csv' )
443- ) ,
444- pipeline (
445- scanIndex ( 'articles' ) ,
446- dumpArticleCategories ,
447- writeFile ( 'article_categories.csv' )
448- ) ,
449- pipeline ( scanIndex ( 'replies' ) , dumpReplies , writeFile ( 'replies.csv' ) ) ,
450- pipeline (
451- scanIndex ( 'replies' ) ,
452- dumpReplyHyperlinks ,
453- writeFile ( 'reply_hyperlinks.csv' )
454- ) ,
455- pipeline (
456- scanIndex ( 'replyrequests' ) ,
457- dumpReplyRequests ,
458- writeFile ( 'reply_requests.csv' )
459- ) ,
460- pipeline (
461- scanIndex ( 'categories' ) ,
462- dumpCategories ,
463- writeFile ( 'categories.csv' )
464- ) ,
465- pipeline (
466- scanIndex ( 'articlereplyfeedbacks' ) ,
467- dumpArticleReplyFeedbacks ,
468- writeFile ( 'article_reply_feedbacks.csv' )
469- ) ,
470- pipeline (
471- scanIndex ( 'analytics' ) ,
472- dumpAnalytics ,
473- writeFile ( 'analytics.csv' )
474- ) ,
475- pipeline (
476- scanIndex ( 'users' ) ,
477- dumpUsers ,
478- writeFile ( 'anonymized_users.csv' )
479- ) ,
480- ] ) ;
481- } catch ( e ) {
482- console . error ( e ) ;
483- process . exit ( 1 ) ;
484- }
485- }
486-
487- main ( ) ;
422+ pipeline ( scanIndex ( 'articles' ) , dumpArticles , writeFile ( 'articles.csv' ) ) ;
423+ pipeline (
424+ scanIndex ( 'articles' ) ,
425+ dumpArticleReplies ,
426+ writeFile ( 'article_replies.csv' )
427+ ) ;
428+ pipeline (
429+ scanIndex ( 'articles' ) ,
430+ dumpArticleHyperlinks ,
431+ writeFile ( 'article_hyperlinks.csv' )
432+ ) ;
433+ pipeline (
434+ scanIndex ( 'articles' ) ,
435+ dumpArticleCategories ,
436+ writeFile ( 'article_categories.csv' )
437+ ) ;
438+
439+ pipeline ( scanIndex ( 'replies' ) , dumpReplies , writeFile ( 'replies.csv' ) ) ;
440+ pipeline (
441+ scanIndex ( 'replies' ) ,
442+ dumpReplyHyperlinks ,
443+ writeFile ( 'reply_hyperlinks.csv' )
444+ ) ;
445+
446+ pipeline (
447+ scanIndex ( 'replyrequests' ) ,
448+ dumpReplyRequests ,
449+ writeFile ( 'reply_requests.csv' )
450+ ) ;
451+
452+ pipeline ( scanIndex ( 'categories' ) , dumpCategories , writeFile ( 'categories.csv' ) ) ;
453+
454+ pipeline (
455+ scanIndex ( 'articlereplyfeedbacks' ) ,
456+ dumpArticleReplyFeedbacks ,
457+ writeFile ( 'article_reply_feedbacks.csv' )
458+ ) ;
459+
460+ pipeline ( scanIndex ( 'analytics' ) , dumpAnalytics , writeFile ( 'analytics.csv' ) ) ;
461+ pipeline ( scanIndex ( 'users' ) , dumpUsers , writeFile ( 'anonymized_users.csv' ) ) ;
0 commit comments