Skip to content
This repository was archived by the owner on Aug 15, 2023. It is now read-only.

Commit 71f2501

Browse files
authored
fix(shared): replace 'jsdom' with 'linkedom' to prevent the parser from crashing for memory allocation failure (#859)
1 parent 027d3af commit 71f2501

File tree

64 files changed

+249
-138
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+249
-138
lines changed

.dockerignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ __mocks__
66
node_modules
77
docker
88
!docker/guardoni/guardoni.config.json
9-
#.yarn/cache
9+
#.yarn/cache
10+
**/__tests__
11+
**/fixtures

.vscode/launch.json

+27
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,33 @@
2929
},
3030
"args": ["."],
3131
"outputCapture": "std"
32+
},
33+
{
34+
"type": "node",
35+
"request": "attach",
36+
"name": "Attach to yt:server:watch (yt:watch)",
37+
"protocol": "inspector",
38+
"port": 4320,
39+
"restart": true,
40+
"cwd": "${workspaceRoot}/platforms/yttrex/backend"
41+
},
42+
{
43+
"type": "node",
44+
"request": "attach",
45+
"name": "Attach to yt:parserv:watch",
46+
"protocol": "inspector",
47+
"port": 4321,
48+
"restart": true,
49+
"cwd": "${workspaceRoot}/platforms/yttrex/backend"
50+
},
51+
{
52+
"type": "node",
53+
"request": "attach",
54+
"name": "Attach to yt:leaveserv:watch",
55+
"protocol": "inspector",
56+
"port": 4322,
57+
"restart": true,
58+
"cwd": "${workspaceRoot}/platforms/yttrex/backend"
3259
}
3360
]
3461
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Dockerfile.ytbackend

-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,5 @@ WORKDIR /app/platforms/yttrex/backend
4343
RUN yarn workspaces focus --production
4444

4545
ENV key=fuffa
46-
ENV DEBUG=@trex*
4746

4847
CMD ["yarn", "start"]

docker-compose.yml

+31-2
Original file line numberDiff line numberDiff line change
@@ -65,18 +65,47 @@ services:
6565
tty: true # docker run -t
6666

6767
yt-backend:
68+
image: trex:yt-backend
6869
build:
6970
dockerfile: Dockerfile.ytbackend
7071
context: .
71-
container_name: yt-backend
72+
73+
yt-server:
74+
image: trex:yt-backend
75+
container_name: yt-server
76+
command: yarn start
7277
ports:
7378
- '9000:9000'
79+
env_file:
80+
- ./platforms/yttrex/backend/.env
7481
environment:
75-
- DEBUG=*
82+
- BACKEND_DEBUG=yttrex*,@trex*
83+
- mongoHost=mongodb
84+
depends_on:
85+
- yt-backend
86+
- mongodb
87+
- mongo-yt-indexes
88+
deploy:
89+
resources:
90+
limits:
91+
memory: 1000M
92+
93+
yt-parser:
94+
image: trex:yt-backend
95+
container_name: yt-parser
96+
command: yarn parserv
97+
env_file:
98+
- ./platforms/yttrex/backend/.env
99+
environment:
100+
- PARSER_DEBUG=@trex:htmls:*,-@trex:htmls:debug
76101
- mongoHost=mongodb
77102
depends_on:
78103
- mongodb
79104
- mongo-yt-indexes
105+
deploy:
106+
resources:
107+
limits:
108+
memory: 2048M
80109

81110
tk-backend:
82111
build:

packages/shared/src/models/MetadataBase.ts

+1-3
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@ export const MetadataBase = t.type(
1111
href: t.string,
1212
/**
1313
* The supporter publicKey
14-
*
15-
* TODO: it may be replaced by the supporter id
1614
*/
17-
blang: t.union([t.string, t.null, t.undefined]),
1815
supporter: t.string,
16+
blang: t.union([t.string, t.null, t.undefined]),
1917
researchTag: t.union([t.string, t.undefined]),
2018
experimentId: t.union([t.string, t.undefined]),
2119
/**

packages/shared/src/providers/__tests__/parser.provider.spec.ts

+3-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import {
99
GetParserProvider,
1010
ParserProviderContext,
1111
wrapDissector,
12-
} from '../parser.provider';
12+
} from '../parser/parser.provider';
1313

1414
const logger = GetLogger('parser-spec');
1515

@@ -112,7 +112,7 @@ describe('Parser Provider', () => {
112112

113113
const output = getSuccessfulOutput<
114114
Contribution,
115-
Metadata,
115+
any,
116116
any,
117117
{ [key: string]: any }
118118
>(
@@ -134,9 +134,8 @@ describe('Parser Provider', () => {
134134

135135
expect(output).toMatchObject({
136136
[source.id.substring(0, 6)]: {
137-
...metadata,
138137
failures: '[]',
139-
nature: { type: metadata.type },
138+
nature: metadata.type,
140139
count: JSON.stringify({ metadata: 1, source: 1 }),
141140
},
142141
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export * from './parser.provider';

packages/shared/src/providers/parser.provider.ts renamed to packages/shared/src/providers/parser/parser.provider.ts

+20-8
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ import subMinutes from 'date-fns/subMinutes';
44
import * as t from 'io-ts';
55
import _ from 'lodash';
66
import { MongoClient } from 'mongodb';
7-
import { Logger, trexLogger } from '../logger';
8-
import { sleep } from '../utils/promise.utils';
9-
import type * as mongo3 from './mongo.provider';
7+
import { Logger, trexLogger } from '../../logger';
8+
import { sleep } from '../../utils/promise.utils';
9+
import type * as mongo3 from '../mongo.provider';
1010

1111
/**
1212
* The parser configuration
@@ -491,7 +491,8 @@ export const parseContributions =
491491
ctx.log.debug('Parsed %O', result);
492492
const oldMetadata = await ctx.getMetadata(entry);
493493
const metadata = ctx.buildMetadata(result, oldMetadata);
494-
ctx.log.debug('Metadata %O', metadata);
494+
495+
// ctx.log.debug('Metadata %O', metadata);
495496

496497
if (metadata) {
497498
const m = await ctx.saveResults(result.source, metadata);
@@ -513,6 +514,15 @@ export const parseContributions =
513514
return results;
514515
};
515516

517+
const getMemoryUsed = (): NodeJS.MemoryUsage => {
518+
const used = process.memoryUsage();
519+
const memoryLog: any = {};
520+
for (const key in used) {
521+
memoryLog[key] = Math.round(((used as any)[key] / 1024 / 1024) * 100) / 100;
522+
}
523+
return memoryLog;
524+
};
525+
516526
/* yes a better way might exist */
517527
let previousFrequency = 0;
518528

@@ -552,6 +562,8 @@ export const executionLoop =
552562
stop,
553563
processedCounter
554564
);
565+
ctx.log.info('Memory usage %O (MB)', getMemoryUsed());
566+
555567
let htmlFilter: Record<string, any> = {
556568
savingTime: {
557569
$gt: lastExecution,
@@ -663,14 +675,14 @@ export const getSuccessfulOutput = <
663675
): any => {
664676
return output.reduce((acc, { source, metadata, failures, log, count }) => {
665677
const index = getEntryId(source).substring(0, 6);
678+
const { id, nature } = (metadata as any) ?? {};
679+
const n: any = nature;
666680
return {
667681
...acc,
668682
[index]: {
669683
...log,
670-
// log: JSON.stringify(log),
671-
// findings: markOutputField(findings),
672-
// metadata: (metadata as any)?.id ?? null,
673-
...metadata,
684+
id,
685+
nature: n?.nature?.type ?? n?.type,
674686
failures: JSON.stringify(
675687
Object.entries(failures).map(([key, value]) => ({
676688
[key]: value.message,

packages/shared/src/test/utils/parser.utils.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
ParserFn,
1111
ParserProviderContext,
1212
printResultOutput,
13-
} from '../../providers/parser.provider';
13+
} from '../../providers/parser/parser.provider';
1414

1515
/**
1616
* Read fixtures file from path

platforms/tktrex/backend/bin/parser.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ nconf.argv().env().file({ file: 'config/settings.json' });
99

1010
import { FixtureReporter } from '@shared/parser/reporters/FixtureReporter';
1111
import * as mongo3 from '@shared/providers/mongo.provider';
12-
import { GetParserProvider } from '@shared/providers/parser.provider';
12+
import { GetParserProvider } from '@shared/providers/parser';
1313
import { TKMetadata } from '@tktrex/shared/models/metadata';
1414
import { parsers } from '@tktrex/shared/parser/parsers';
1515
import { HTMLSource } from '@tktrex/shared/parser/source';

platforms/tktrex/backend/lib/parser.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ import {
44
GetMetadataFn,
55
ParserProviderContextDB,
66
SaveResults,
7-
} from '@shared/providers/parser.provider';
7+
} from '@shared/providers/parser';
88
import { sanitizeHTML } from '@shared/utils/html.utils';
99
import { TKMetadata } from '@tktrex/shared/models/metadata';
1010
import { TKParserConfig } from '@tktrex/shared/parser/config';
1111
import { HTMLSource } from '@tktrex/shared/parser/source';
1212
import D from 'debug';
13-
import { JSDOM } from 'jsdom';
1413
import _ from 'lodash';
1514
import nconf from 'nconf';
15+
import { parseHTML } from 'linkedom';
1616

1717
const debug = D('lib:parserchain');
1818

@@ -35,7 +35,7 @@ export const parserConfig: TKParserConfig = {
3535

3636
export const addDom: ContributionAndDOMFn<HTMLSource> = (e) => ({
3737
...e,
38-
jsdom: new JSDOM(sanitizeHTML(e.html.html)).window.document,
38+
jsdom: parseHTML(sanitizeHTML(e.html.html)).window.document,
3939
});
4040

4141
export const getLastHTMLs =

platforms/tktrex/backend/package.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"geoip-lite": "^1.4.6",
3535
"io-ts": "^2.2.16",
3636
"io-ts-types": "^0.5.19",
37-
"jsdom": "^16.7.0",
37+
"linkedom": "^0.14.21",
3838
"lodash": "^4.17.21",
3939
"module-alias": "^2.2.2",
4040
"moment": "^2.29.4",
@@ -50,7 +50,6 @@
5050
"@types/eslint": "^8",
5151
"@types/express": "^4.17.13",
5252
"@types/geoip-lite": "^1.4.1",
53-
"@types/jsdom": "^16",
5453
"@types/lodash": "^4.14.186",
5554
"@types/module-alias": "^2",
5655
"@types/node": "^16.11.68",

platforms/tktrex/backend/routes/__tests__/personal.e2e.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import bs58 from '@shared/providers/bs58.provider';
66
import {
77
GetParserProvider,
88
ParserProvider,
9-
} from '@shared/providers/parser.provider';
9+
} from '@shared/providers/parser';
1010
import { fc } from '@shared/test';
1111
import {
1212
readFixtureJSON,

platforms/tktrex/shared/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
"dependencies": {
3232
"fp-ts": "^2.11.9",
3333
"io-ts": "^2.2.16",
34-
"linkedom": "^0.14.4",
34+
"linkedom": "^0.14.21",
3535
"mongodb": "^4.12.1",
3636
"ts-endpoint": "^2.0.0"
3737
},

platforms/tktrex/shared/src/parser/config.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserConfiguration } from '@shared/providers/parser.provider';
1+
import { ParserConfiguration } from '@shared/providers/parser';
22

33
/**
44
* The TK Parser configuration interface

platforms/tktrex/shared/src/parser/metadata.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { BuildMetadataFn } from '@shared/providers/parser.provider';
1+
import { BuildMetadataFn } from '@shared/providers/parser';
22
import _ from 'lodash';
33
import { TKMetadata } from '../models/metadata';
44
import { TKParsers } from './parsers';

platforms/tktrex/shared/src/parser/parsers/author.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import { TKParserConfig } from '../config';
33
import { HTMLSource } from '../source';
44

platforms/tktrex/shared/src/parser/parsers/description.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import {
22
ContributionWithDOM,
33
ParserFn,
4-
} from '@shared/providers/parser.provider';
4+
} from '@shared/providers/parser';
55
import { TKParserConfig } from '../config';
66
import { HTMLSource } from '../source';
77
import _ from 'lodash';

platforms/tktrex/shared/src/parser/parsers/downloader.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import _ from 'lodash';
33
import D from 'debug';
44
import { HTMLSource } from '../source';

platforms/tktrex/shared/src/parser/parsers/foryou.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import D from 'debug';
33
import { MediaFile } from '../../models/metadata/MediaFile';
44
import { TKParserConfig } from '../config';

platforms/tktrex/shared/src/parser/parsers/hashtags.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { map } from 'lodash';
2-
import { ParserFn } from '@shared/providers/parser.provider';
2+
import { ParserFn } from '@shared/providers/parser';
33
import type { HTMLSource } from '../source';
44
import type { TKParserConfig } from '../config';
55
import D from 'debug';

platforms/tktrex/shared/src/parser/parsers/metrics.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import { HTMLSource } from '../source';
33
import { Metrics } from '../../models/metadata/Metrics';
44
import { TKParserConfig } from '../config';

platforms/tktrex/shared/src/parser/parsers/music.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import { TKParserConfig } from '../config';
33
import { HTMLSource } from '../source';
44
import D from 'debug';

platforms/tktrex/shared/src/parser/parsers/native.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import {
22
ContributionWithDOM,
33
ParserFn,
4-
} from '@shared/providers/parser.provider';
4+
} from '@shared/providers/parser';
55
import D from 'debug';
66
import { HTMLSource } from '../source';
77
import { TKParserConfig } from '../config';

platforms/tktrex/shared/src/parser/parsers/nature.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import { throwEitherError } from '@shared/utils/fp.utils';
33
import { Nature } from '../../models/Nature';
44
import { TKParserConfig } from '../config';

platforms/tktrex/shared/src/parser/parsers/profile.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { trexLogger } from '@shared/logger';
2-
import { ParserFn } from '@shared/providers/parser.provider';
2+
import { ParserFn } from '@shared/providers/parser';
33
import { throwEitherError } from '@shared/utils/fp.utils';
44
import _ from 'lodash';
55
import { NativeVideoN } from '../../models/Nature';

platforms/tktrex/shared/src/parser/parsers/search.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ParserFn } from '@shared/providers/parser.provider';
1+
import { ParserFn } from '@shared/providers/parser';
22
import { throwEitherError } from '@shared/utils/fp.utils';
33
import { TKParserConfig } from '../config';
44
import { HTMLSource } from '../source';

platforms/yttrex/backend/__tests__/parser/html/parseHome.e2e.ts

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ describe('Parser: home', () => {
156156
thumbnailHref,
157157
recommendedRelativeSeconds,
158158
publicationTime,
159+
elems,
159160
...s
160161
}) => ({
161162
...s,

platforms/yttrex/backend/__tests__/parser/html/parseVideo.e2e.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { GetTest, Test } from '../../../tests/Test';
22

3-
import { ParserProviderContextDB } from '@shared/providers/parser.provider';
3+
import { ParserProviderContextDB } from '@shared/providers/parser';
44
import {
55
readFixtureJSON,
66
readFixtureJSONPaths,

0 commit comments

Comments
 (0)