Skip to content

Commit 224d450

Browse files
committed
fmt
1 parent c8a16f4 commit 224d450

3 files changed

Lines changed: 24 additions & 7 deletions

File tree

ingesters/src/utils/RecursiveMarkdownSplitter.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,9 @@ export class RecursiveMarkdownSplitter {
219219
* Active source becomes the first URL and applies from the end of the block
220220
* until the start of the next Sources block (or end of document).
221221
*/
222-
private parseSourceRanges(markdown: string): Array<{ start: number; end: number; url: string }> {
222+
private parseSourceRanges(
223+
markdown: string,
224+
): Array<{ start: number; end: number; url: string }> {
223225
const lines = markdown.split('\n');
224226
const ranges: Array<{ start: number; end: number; url: string }> = [];
225227

@@ -247,7 +249,11 @@ export class RecursiveMarkdownSplitter {
247249
};
248250

249251
// Locate all source blocks (start/end + first URL)
250-
const blocks: Array<{ blockStartLine: number; blockEndLine: number; firstUrl?: string }> = [];
252+
const blocks: Array<{
253+
blockStartLine: number;
254+
blockEndLine: number;
255+
firstUrl?: string;
256+
}> = [];
251257
for (let i = 0; i < lines.length; i++) {
252258
if (!isDashLine(lines[i]!)) continue;
253259
// Scan ahead for Sources: header within the dashed block
@@ -789,7 +795,8 @@ export class RecursiveMarkdownSplitter {
789795
if (i > 0 && this.options.overlap > 0) {
790796
const prevSegment = segments[i - 1]!;
791797
const desired = Math.max(
792-
prevSegment.end - Math.min(this.options.overlap, prevSegment.end - prevSegment.start),
798+
prevSegment.end -
799+
Math.min(this.options.overlap, prevSegment.end - prevSegment.start),
793800
prevSegment.start,
794801
);
795802
chunkStartAbs = desired;

ingesters/src/utils/__tests__/RecursiveMarkdownSplitter.noStartInsideCodeBlock.test.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { RecursiveMarkdownSplitter } from '../RecursiveMarkdownSplitter';
22

3-
function getCodeBlockRanges(text: string): Array<{ start: number; end: number }> {
3+
function getCodeBlockRanges(
4+
text: string,
5+
): Array<{ start: number; end: number }> {
46
const ranges: Array<{ start: number; end: number }> = [];
57
const re = /```[\s\S]*?```/g;
68
let m: RegExpExecArray | null;
@@ -10,13 +12,19 @@ function getCodeBlockRanges(text: string): Array<{ start: number; end: number }>
1012
return ranges;
1113
}
1214

13-
function isInside(pos: number, ranges: Array<{ start: number; end: number }>): boolean {
15+
function isInside(
16+
pos: number,
17+
ranges: Array<{ start: number; end: number }>,
18+
): boolean {
1419
return ranges.some((r) => pos > r.start && pos < r.end);
1520
}
1621

1722
describe('RecursiveMarkdownSplitter - No chunk starts inside code block', () => {
1823
it('ensures chunk starts are never within fenced code blocks even with overlap', () => {
19-
const longCode = Array.from({ length: 60 }, (_, i) => `line ${i} of code`).join('\n');
24+
const longCode = Array.from(
25+
{ length: 60 },
26+
(_, i) => `line ${i} of code`,
27+
).join('\n');
2028
const md = `# Section One\n\nIntro paragraph text that will be part of the first section.\n\n\n## Subsection\n\nSome text before a large code block.\n\n
2129
\`\`\`cairo
2230
fn initializer(ref self: ContractState, owner: ContractAddress) {

ingesters/src/utils/__tests__/RecursiveMarkdownSplitter.sources.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ More details here.`;
3939

4040
// Find chunk under the first H1
4141
const introChunk = chunks.find((c) =>
42-
c.content.includes('# The Cairo Book: Introduction and Learning Resources'),
42+
c.content.includes(
43+
'# The Cairo Book: Introduction and Learning Resources',
44+
),
4345
);
4446
expect(introChunk).toBeDefined();
4547
expect(introChunk!.meta.sourceLink).toBe(

0 commit comments

Comments
 (0)