1414 * limitations under the License.
1515 */
1616
17- import { Transform , Readable } from 'node:stream' ;
17+ import { Transform , Readable , TransformCallback } from 'node:stream' ;
1818import { createInterface } from 'node:readline' ;
1919import { pipeline } from 'node:stream/promises' ;
2020import * as fs from 'node:fs' ;
2121import { EOL } from 'node:os' ;
22- import { HttpApi } from '@jsforce/jsforce-node/lib/http-api.js ' ;
22+ import { fetch } from 'undici ' ;
2323import { HttpResponse } from '@jsforce/jsforce-node' ;
2424import {
2525 IngestJobV2Results ,
@@ -75,28 +75,91 @@ export enum ColumnDelimiter {
7575
7676export type ColumnDelimiterKeys = keyof typeof ColumnDelimiter ;
7777
78- async function bulkRequest ( conn : Connection , url : string ) : Promise < { body : string ; headers : HttpResponse [ 'headers' ] } > {
79- const httpApi = new HttpApi ( conn , {
80- responseType : 'text/plain' , // this ensures jsforce doesn't try parsing the body
81- } ) ;
78+ /**
79+ * Transform stream that skips the first line of CSV data (the header row).
80+ * Used when processing subsequent bulk result pages to avoid duplicate headers.
81+ */
82+ export class SkipFirstLineTransform extends Transform {
83+ private firstLineSkipped = false ;
84+ private buffer = '' ;
8285
83- let headers : HttpResponse [ 'headers' ] | undefined ;
86+ public constructor ( ) {
87+ super ( ) ;
88+ }
8489
85- httpApi . on ( 'response' , ( response : HttpResponse ) => {
86- headers = response . headers ;
87- } ) ;
90+ public _transform ( chunk : Buffer , _encoding : BufferEncoding , callback : TransformCallback ) : void {
91+ if ( this . firstLineSkipped ) {
92+ // After first line is skipped, pass through all subsequent data
93+ callback ( null , chunk ) ;
94+ return ;
95+ }
96+
97+ // Buffer incoming data until we find the first newline
98+ this . buffer += chunk . toString ( 'utf8' ) ;
99+
100+ const newlineIndex = this . buffer . indexOf ( '\n' ) ;
101+
102+ if ( newlineIndex === - 1 ) {
103+ // No newline yet, keep buffering
104+ callback ( ) ;
105+ return ;
106+ }
107+
108+ // Found the newline, skip everything up to and including it
109+ const remainingData = this . buffer . slice ( newlineIndex + 1 ) ;
110+ this . firstLineSkipped = true ;
111+ this . buffer = '' ; // Clear buffer to free memory
112+
113+ callback ( null , Buffer . from ( remainingData , 'utf8' ) ) ;
114+ }
88115
89- const body = await httpApi . request < string > ( {
90- url : conn . normalizeUrl ( url ) ,
116+ public _flush ( callback : TransformCallback ) : void {
117+ // If we reach the end without finding a newline, clear buffer and finish
118+ this . buffer = '' ;
119+ callback ( ) ;
120+ }
121+ }
122+
123+ async function bulkRequest (
124+ conn : Connection ,
125+ url : string
126+ ) : Promise < { stream : Readable ; headers : HttpResponse [ 'headers' ] } > {
127+ // Bypass jsforce entirely and use undici fetch to avoid any buffering.
128+ // jsforce's Transport.httpRequest() adds a 'complete' listener which triggers readAll() buffering.
129+ // Using undici fetch directly gives us the raw response stream without any intermediate buffering.
130+
131+ const normalizedUrl = conn . normalizeUrl ( url ) ;
132+
133+ // Prepare request headers with authorization
134+ const headers : { [ name : string ] : string } = {
135+ 'content-Type' : 'text/csv' ,
136+ } ;
137+
138+ if ( conn . accessToken ) {
139+ headers . Authorization = `Bearer ${ conn . accessToken } ` ;
140+ }
141+
142+ const response = await fetch ( normalizedUrl , {
91143 method : 'GET' ,
144+ headers,
92145 } ) ;
93146
94- if ( ! headers ) throw new Error ( 'failed to get HTTP headers for bulk query' ) ;
147+ if ( ! response . ok ) {
148+ throw new Error ( `HTTP ${ response . status } : ${ response . statusText } ` ) ;
149+ }
95150
96- return {
97- body,
98- headers,
99- } ;
151+ if ( ! response . body ) {
152+ throw new Error ( 'No body was returned' ) ;
153+ }
154+ const stream = Readable . fromWeb ( response . body ) ;
155+
156+ // Extract headers in the format jsforce expects
157+ const responseHeaders : HttpResponse [ 'headers' ] = { } ;
158+ response . headers . forEach ( ( value : string , key : string ) => {
159+ responseHeaders [ key ] = value ;
160+ } ) ;
161+
162+ return { stream, headers : responseHeaders } ;
100163}
101164
102165export async function exportRecords (
@@ -124,6 +187,9 @@ export async function exportRecords(
124187
125188 let recordsWritten = 0 ;
126189
190+ // refresh here because `bulkRequest` uses undici for fetching results.
191+ await conn . refreshAuth ( ) ;
192+
127193 while ( locator !== 'null' ) {
128194 // we can't parallelize this because we:
129195 // 1. need to get 1 batch to know the locator for the next one
@@ -151,7 +217,7 @@ export async function exportRecords(
151217
152218 // eslint-disable-next-line no-await-in-loop
153219 await pipeline (
154- Readable . from ( res . body ) ,
220+ res . stream ,
155221 new csvParse ( { columns : true , delimiter : ColumnDelimiter [ outputInfo . columnDelimiter ] } ) ,
156222 new Transform ( {
157223 objectMode : true ,
@@ -173,18 +239,15 @@ export async function exportRecords(
173239 await pipeline (
174240 locator
175241 ? [
176- // Skip the 1st row (CSV header) by finding the index of the first `LF`
177- // occurence and move the position 1 char ahead.
178- //
179- // CSVs using `CRLF` are still handled correctly because `CR` and `LF` are different chars in the string.
180- Readable . from ( res . body . slice ( res . body . indexOf ( '\n' ) + 1 ) ) ,
242+ res . stream ,
243+ new SkipFirstLineTransform ( ) ,
181244 fs . createWriteStream ( outputInfo . filePath , {
182245 // Open file for appending. The file is created if it does not exist.
183246 // https://nodejs.org/api/fs.html#file-system-flags
184247 flags : 'a' , // append mode
185248 } ) ,
186249 ]
187- : [ Readable . from ( res . body ) , fs . createWriteStream ( outputInfo . filePath ) ]
250+ : [ res . stream , fs . createWriteStream ( outputInfo . filePath ) ]
188251 ) ;
189252 }
190253
0 commit comments