Skip to content

Commit d20d588

Browse files
authored
Merge branch 'main' into enh/config_wildcards
2 parents 84bf2bc + bd92283 commit d20d588

File tree

19 files changed

+397
-86
lines changed

19 files changed

+397
-86
lines changed

deno.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"@std/io": "jsr:@std/[email protected]",
4242
"@std/log": "jsr:@std/[email protected]",
4343
"@std/path": "jsr:@std/[email protected]",
44+
"@std/streams": "jsr:@std/[email protected]",
4445
"@std/yaml": "jsr:@std/yaml@^1.0.4"
4546
},
4647
"tasks": {

deno.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/files/deno.test.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ import { readAll, readerFromStreamReader } from '@std/io'
33
import { basename, dirname, fromFileUrl, join } from '@std/path'
44
import { EOL } from '@std/fs'
55
import type { FileTree } from '../types/filetree.ts'
6-
import { BIDSFileDeno, readFileTree, UnicodeDecodeError } from './deno.ts'
6+
import { BIDSFileDeno, readFileTree } from './deno.ts'
7+
import { UnicodeDecodeError } from './streams.ts'
78
import { requestReadPermission } from '../setup/requestPermissions.ts'
89
import { FileIgnoreRules } from './ignore.ts'
910

src/files/deno.ts

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,9 @@ import { type BIDSFile, FileTree } from '../types/filetree.ts'
77
import { requestReadPermission } from '../setup/requestPermissions.ts'
88
import { FileIgnoreRules, readBidsIgnore } from './ignore.ts'
99
import { logger } from '../utils/logger.ts'
10+
import { createUTF8Stream } from './streams.ts'
1011
export { type BIDSFile, FileTree }
1112

12-
/**
13-
* Thrown when a text file is decoded as UTF-8 but contains UTF-16 characters
14-
*/
15-
export class UnicodeDecodeError extends Error {
16-
constructor(message: string) {
17-
super(message)
18-
this.name = 'UnicodeDecode'
19-
}
20-
}
21-
2213
/**
2314
* Deno implementation of BIDSFile
2415
*/
@@ -67,27 +58,17 @@ export class BIDSFileDeno implements BIDSFile {
6758
* Read the entire file and decode as utf-8 text
6859
*/
6960
async text(): Promise<string> {
70-
const streamReader = this.stream
71-
.pipeThrough(new TextDecoderStream('utf-8'))
72-
.getReader()
73-
let data = ''
61+
const reader = this.stream.pipeThrough(createUTF8Stream()).getReader()
62+
const chunks: string[] = []
7463
try {
75-
// Read once to check for unicode issues
76-
const { done, value } = await streamReader.read()
77-
// Check for UTF-16 BOM
78-
if (value && value.startsWith('\uFFFD')) {
79-
throw new UnicodeDecodeError('This file appears to be UTF-16')
80-
}
81-
if (done) return data
82-
data += value
83-
// Continue reading the rest of the file if no unicode issues were found
8464
while (true) {
85-
const { done, value } = await streamReader.read()
86-
if (done) return data
87-
data += value
65+
const { done, value } = await reader.read()
66+
if (done) break
67+
chunks.push(value)
8868
}
69+
return chunks.join('')
8970
} finally {
90-
streamReader.releaseLock()
71+
reader.releaseLock()
9172
}
9273
}
9374

src/files/filetree.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@ import { FileIgnoreRules } from './ignore.ts'
55

66
const nullFile = {
77
size: 0,
8-
stream: new ReadableStream(),
8+
stream: new ReadableStream({
9+
start(controller) {
10+
controller.close()
11+
}
12+
}),
913
text: () => Promise.resolve(''),
1014
readBytes: async (size: number, offset?: number) => new Uint8Array(),
1115
parent: new FileTree('', '/'),

src/files/json.test.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { type assert, assertObjectMatch } from '@std/assert'
2-
import type { BIDSFileDeno, UnicodeDecodeError } from './deno.ts'
32
import type { BIDSFile } from '../types/filetree.ts'
43
import type { FileIgnoreRules } from './ignore.ts'
54

src/files/streams.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { assert, assertEquals } from '@std/assert'
2+
import { createUTF8Stream, UnicodeDecodeError } from './streams.ts'
3+
import { streamFromUint8Array, streamFromString } from '../tests/utils.ts'
4+
5+
Deno.test('createUTF8Stream', async (t) => {
6+
await t.step('should return a TransformStream with UTF8StreamTransformer', () => {
7+
const stream = createUTF8Stream()
8+
assertEquals(stream instanceof TransformStream, true)
9+
})
10+
11+
await t.step('should correctly transform UTF-8 input', async () => {
12+
const rawstream = streamFromString('Hello, world!')
13+
const reader = rawstream.pipeThrough(createUTF8Stream()).getReader()
14+
const { value } = await reader.read()
15+
assertEquals(value, 'Hello, world!')
16+
17+
await reader.cancel()
18+
})
19+
20+
await t.step('should throw UnicodeDecodeError for UTF-16 input', async () => {
21+
const rawStream = streamFromUint8Array(new Uint8Array([0xFF, 0xFE, 0x00, 0x00]))
22+
23+
let reader
24+
try {
25+
// The exception can't be localized to either of the following lines
26+
// but is raised before the second returns
27+
reader = rawStream.pipeThrough(createUTF8Stream()).getReader()
28+
const { value } = await reader.read()
29+
assert(false, 'Expected UnicodeDecodeError, got ' + value)
30+
} catch (e: any) {
31+
assertEquals(e instanceof UnicodeDecodeError, true)
32+
assertEquals(e?.message, 'This file appears to be UTF-16')
33+
} finally {
34+
if (reader) await reader.cancel
35+
}
36+
})
37+
})

src/files/streams.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/**
2+
* Thrown when a text file is decoded as UTF-8 but contains UTF-16 characters
3+
*/
4+
export class UnicodeDecodeError extends Error {
5+
constructor(message: string) {
6+
super(message)
7+
this.name = 'UnicodeDecode'
8+
}
9+
}
10+
11+
/**
12+
* A transformer that ensures the input stream is valid UTF-8 and throws
13+
* a UnicodeDecodeError if UTF-16 BOM is detected
14+
*/
15+
export class UTF8StreamTransformer implements Transformer<Uint8Array, string> {
16+
private decoder: TextDecoder
17+
private firstChunk: boolean
18+
19+
constructor() {
20+
this.decoder = new TextDecoder('utf-8')
21+
this.firstChunk = true
22+
}
23+
24+
transform(chunk: Uint8Array, controller: TransformStreamDefaultController<string>) {
25+
// Check first chunk for UTF-16 BOM
26+
if (this.firstChunk) {
27+
const decoded = this.decoder.decode(chunk, { stream: true })
28+
if (decoded.startsWith('\uFFFD')) {
29+
throw new UnicodeDecodeError('This file appears to be UTF-16')
30+
}
31+
this.firstChunk = false
32+
controller.enqueue(decoded)
33+
} else {
34+
controller.enqueue(this.decoder.decode(chunk, { stream: true }))
35+
}
36+
}
37+
38+
flush(controller: TransformStreamDefaultController<string>) {
39+
const final = this.decoder.decode()
40+
if (final) {
41+
controller.enqueue(final)
42+
}
43+
}
44+
}
45+
46+
/**
47+
* Creates a TransformStream that validates and decodes UTF-8 text
48+
*/
49+
export function createUTF8Stream() {
50+
return new TransformStream(new UTF8StreamTransformer())
51+
}

0 commit comments

Comments
 (0)