|
1 | | -import { pipeline, Readable } from 'stream' |
2 | | -import esort from 'external-sorting' |
3 | | -import tmp from 'tmp' |
| 1 | +import { pipeline } from 'stream/promises' |
| 2 | +import { Readable } from 'stream' |
4 | 3 | import { sync as commandExistsSync } from 'command-exists' |
5 | | - |
6 | 4 | import split2 from 'split2' |
7 | 5 | import fs from 'fs' |
8 | 6 | import { spawn } from 'child_process' |
9 | 7 | import { TrixInputTransform } from './TrixInputTransform' |
10 | 8 | import { TrixOutputTransform } from './TrixOutputTransform' |
| 9 | +import { sortLinesExternal } from './sortLines' |
11 | 10 |
|
12 | | -// Characters that may be part of a word |
13 | | -const wordMiddleChars = [] as boolean[] |
14 | | -const wordBeginChars = [] as boolean[] |
| 11 | +const isWin = |
| 12 | + typeof process === 'undefined' ? false : process.platform === 'win32' |
15 | 13 |
|
16 | | -function isalpha(c: string) { |
17 | | - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') |
18 | | -} |
| 14 | +const useExternalSort = !isWin && commandExistsSync('sort') |
19 | 15 |
|
20 | | -function isdigit(c: string) { |
21 | | - return c >= '0' && c <= '9' |
22 | | -} |
| 16 | +async function makeIxWithExternalSort( |
| 17 | + fileStream: Readable, |
| 18 | + outIxFilename: string, |
| 19 | +) { |
| 20 | + const out = fs.createWriteStream(outIxFilename) |
| 21 | + const sort = spawn('sort', ['-k1,1'], { |
| 22 | + env: { ...process.env, LC_ALL: 'C' }, |
| 23 | + }) |
23 | 24 |
|
24 | | -function isalnum(c: string) { |
25 | | - return isalpha(c) || isdigit(c) |
26 | | -} |
| 25 | + sort.on('error', function onSortError(err) { |
| 26 | + throw err |
| 27 | + }) |
27 | 28 |
|
28 | | -function initCharTables() { |
29 | | - for (let c = 0; c < 256; ++c) { |
30 | | - if (isalnum(String.fromCharCode(c))) { |
31 | | - wordBeginChars[c] = true |
32 | | - wordMiddleChars[c] = true |
33 | | - } |
34 | | - } |
35 | | - wordBeginChars['_'.charCodeAt(0)] = wordMiddleChars['_'.charCodeAt(0)] = true |
36 | | - wordMiddleChars['.'.charCodeAt(0)] = true |
37 | | - wordMiddleChars['-'.charCodeAt(0)] = true |
38 | | -} |
| 29 | + const inputDone = pipeline( |
| 30 | + fileStream, |
| 31 | + split2(), |
| 32 | + new TrixInputTransform(), |
| 33 | + sort.stdin, |
| 34 | + ) |
39 | 35 |
|
40 | | -const isWin = |
41 | | - typeof process !== 'undefined' ? process.platform === 'win32' : false |
| 36 | + const outputDone = pipeline( |
| 37 | + sort.stdout, |
| 38 | + split2(), |
| 39 | + new TrixOutputTransform(), |
| 40 | + out, |
| 41 | + ) |
42 | 42 |
|
43 | | -export async function makeIxStream( |
44 | | - fileStream: Readable, |
45 | | - outIxFilename: string, |
46 | | -) { |
47 | | - return new Promise((resolve, reject) => { |
48 | | - initCharTables() |
| 43 | + await Promise.all([inputDone, outputDone]) |
| 44 | +} |
49 | 45 |
|
50 | | - const out = fs.createWriteStream(outIxFilename) |
| 46 | +async function makeIxWithJsSort(fileStream: Readable, outIxFilename: string) { |
| 47 | + const out = fs.createWriteStream(outIxFilename) |
51 | 48 |
|
52 | | - // see https://stackoverflow.com/questions/68835344/ for explainer of |
53 | | - // writer |
| 49 | + // Transform input |
| 50 | + const transformedInput = fileStream.pipe(split2()).pipe(new TrixInputTransform()) |
54 | 51 |
|
55 | | - // override locale to C, but keep other env vars |
56 | | - if (commandExistsSync('sort') && !isWin) { |
57 | | - const sort = spawn('sort', ['-k1,1'], { |
58 | | - env: { ...process.env, LC_ALL: 'C' }, |
59 | | - }) |
60 | | - pipeline( |
61 | | - fileStream, |
62 | | - split2(), |
63 | | - new TrixInputTransform(), |
64 | | - sort.stdin, |
65 | | - err => { |
66 | | - if (err) { |
67 | | - reject(err) |
68 | | - } |
69 | | - }, |
70 | | - ) |
| 52 | + // Sort lines using external merge sort |
| 53 | + const sortedOutput = split2() |
| 54 | + const sortDone = sortLinesExternal(transformedInput, sortedOutput) |
71 | 55 |
|
72 | | - pipeline(sort.stdout, split2(), new TrixOutputTransform(), out, err => { |
73 | | - if (err) { |
74 | | - reject(err) |
75 | | - } else { |
76 | | - resolve(true) |
77 | | - } |
78 | | - }) |
79 | | - } else { |
80 | | - const dir = tmp.dirSync({ |
81 | | - prefix: 'jbrowse-trix-sort', |
82 | | - }) |
83 | | - const tempDir = dir.name |
84 | | - const output = split2() |
| 56 | + // Transform sorted output and write to file |
| 57 | + const writeDone = pipeline(sortedOutput, new TrixOutputTransform(), out) |
85 | 58 |
|
86 | | - pipeline(output, new TrixOutputTransform(), out, err => { |
87 | | - if (err) { |
88 | | - reject(err) |
89 | | - } |
90 | | - }) |
91 | | - esort({ |
92 | | - input: pipeline(fileStream, split2(), new TrixInputTransform(), err => { |
93 | | - if (err) { |
94 | | - reject(err) |
95 | | - } |
96 | | - }), |
97 | | - output, |
98 | | - tempDir, |
99 | | - }) |
100 | | - .asc() |
101 | | - .then(resolve, reject) |
102 | | - } |
103 | | - }) |
| 59 | + await Promise.all([sortDone, writeDone]) |
| 60 | +} |
| 61 | + |
| 62 | +export async function makeIxStream( |
| 63 | + fileStream: Readable, |
| 64 | + outIxFilename: string, |
| 65 | +) { |
| 66 | + if (useExternalSort) { |
| 67 | + await makeIxWithExternalSort(fileStream, outIxFilename) |
| 68 | + } else { |
| 69 | + await makeIxWithJsSort(fileStream, outIxFilename) |
| 70 | + } |
104 | 71 | } |
105 | 72 |
|
106 | 73 | export async function makeIx(inFile: string, outIndex: string) { |
|
0 commit comments