Skip to content

Commit 41473d3

Browse files
committed
Use @streamparser/json if the input is too large to fit in a V8 string.
1 parent f0248da commit 41473d3

3 files changed

Lines changed: 71 additions & 8 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
"@fluent/langneg": "^0.7.0",
7777
"@fluent/react": "^0.15.2",
7878
"@lezer/highlight": "^1.2.3",
79+
"@streamparser/json": "^0.0.22",
7980
"@tgwf/co2": "^0.18.0",
8081
"array-move": "^3.0.1",
8182
"array-range": "^1.0.1",

src/profile-logic/process-profile.ts

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1984,6 +1984,41 @@ function attemptToFixProcessedProfileThroughMutation(
19841984
return profile;
19851985
}
19861986

1987+
function decodeUtf8WithNiceError(bytes: Uint8Array): string {
1988+
try {
1989+
const textDecoder = new TextDecoder(undefined, { fatal: true });
1990+
return textDecoder.decode(bytes);
1991+
} catch (e) {
1992+
console.error('Source exception:', e);
1993+
throw new Error(
1994+
'The profile array buffer could not be parsed as a UTF-8 string.'
1995+
);
1996+
}
1997+
}
1998+
1999+
async function parseJSONFromBytes(bytes: Uint8Array): Promise<any> {
2000+
const V8_STRING_MAX_SIZE = 512 * 1024 * 1024 - 24; // 512 MiB - 24
2001+
if (bytes.byteLength < V8_STRING_MAX_SIZE) {
2002+
const jsonString = decodeUtf8WithNiceError(bytes);
2003+
return JSON.parse(jsonString);
2004+
}
2005+
2006+
// The payload is too large to fit in a single string (in V8), so we can't decode
2007+
// it and call JSON.parse on it. Use a streaming JSON parser instead. This is
2008+
// much slower than native JSON.parse, so we only do it when necessary.
2009+
const { JSONParser } = await import('@streamparser/json');
2010+
const parser = new JSONParser({ paths: ['$'] });
2011+
let result: any;
2012+
parser.onValue = ({ value }) => {
2013+
result = value;
2014+
};
2015+
parser.write(bytes);
2016+
if (!parser.isEnded) {
2017+
throw new Error('Input terminated before end of JSON');
2018+
}
2019+
return result;
2020+
}
2021+
19872022
/**
19882023
* Take some arbitrary profile file from some data source, and turn it into
19892024
* the processed profile format.
@@ -2034,20 +2069,42 @@ export async function unserializeProfileOfArbitraryFormat(
20342069
await import('./import/simpleperf');
20352070
arbitraryFormat = convertSimpleperfTraceProfile(profileBytes);
20362071
} else {
2037-
try {
2038-
const textDecoder = new TextDecoder(undefined, { fatal: true });
2039-
arbitraryFormat = await textDecoder.decode(profileBytes);
2040-
} catch (e) {
2041-
console.error('Source exception:', e);
2042-
throw new Error(
2043-
'The profile array buffer could not be parsed as a UTF-8 string.'
2072+
// Probably a string-based format.
2073+
// We don't want to materialize a string for the entire profileBytes
2074+
// here, in case we want to use the streaming JSON parser later. But
2075+
// to detect perf script + flamegraph, we need to look at some text,
2076+
// so let's decode the first 4096 bytes and detect the format based
2077+
// on the first one or two lines.
2078+
const CHARCODE_LINE_BREAK = 10; // '\n'.charCodeAt(0)
2079+
const firstPage = profileBytes.subarray(0, 4096);
2080+
const firstLineBreakPos = firstPage.indexOf(CHARCODE_LINE_BREAK);
2081+
const secondLineBreakPos =
2082+
firstLineBreakPos !== -1
2083+
? firstPage.indexOf(CHARCODE_LINE_BREAK, firstLineBreakPos + 1)
2084+
: -1;
2085+
const sniffEnd =
2086+
secondLineBreakPos !== -1 ? secondLineBreakPos : firstPage.byteLength;
2087+
// Non-fatal: the cut may fall inside a multi-byte UTF-8 sequence;
2088+
// we only need enough text to recognize the format.
2089+
const firstTwoLinesAsText = new TextDecoder().decode(
2090+
firstPage.subarray(0, sniffEnd)
2091+
);
2092+
if (isPerfScriptFormat(firstTwoLinesAsText)) {
2093+
arbitraryFormat = convertPerfScriptProfile(
2094+
decodeUtf8WithNiceError(profileBytes)
2095+
);
2096+
} else if (isFlameGraphFormat(firstTwoLinesAsText)) {
2097+
arbitraryFormat = convertFlameGraphProfile(
2098+
decodeUtf8WithNiceError(profileBytes)
20442099
);
2100+
} else {
2101+
// Try parsing as JSON.
2102+
arbitraryFormat = await parseJSONFromBytes(profileBytes);
20452103
}
20462104
}
20472105
}
20482106

20492107
if (typeof arbitraryFormat === 'string') {
2050-
// The profile could be JSON or the output from `perf script`. Try `perf script` first.
20512108
if (isPerfScriptFormat(arbitraryFormat)) {
20522109
arbitraryFormat = convertPerfScriptProfile(arbitraryFormat);
20532110
} else if (isFlameGraphFormat(arbitraryFormat)) {

yarn.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2120,6 +2120,11 @@
21202120
dependencies:
21212121
"@sinonjs/commons" "^3.0.1"
21222122

2123+
"@streamparser/json@^0.0.22":
2124+
version "0.0.22"
2125+
resolved "https://registry.yarnpkg.com/@streamparser/json/-/json-0.0.22.tgz#8ddcbcc8c3ca77aeadf80af47f54a64c8739a037"
2126+
integrity sha512-b6gTSBjJ8G8SuO3Gbbj+zXbVx8NSs1EbpbMKpzGLWMdkR+98McH9bEjSz3+0mPJf68c5nxa3CrJHp5EQNXM6zQ==
2127+
21232128
"@surma/rollup-plugin-off-main-thread@^2.2.3":
21242129
version "2.2.3"
21252130
resolved "https://registry.yarnpkg.com/@surma/rollup-plugin-off-main-thread/-/rollup-plugin-off-main-thread-2.2.3.tgz#ee34985952ca21558ab0d952f00298ad2190c053"

0 commit comments

Comments
 (0)