|
1 | 1 | import type { DiffFileMap, DiffHunk, DiffInfo } from '../models/diff-types.ts' |
2 | 2 |
|
3 | 3 | /** |
4 | | - * Parses a git diff to extract changed lines and their hunks |
5 | | - * @param diff Git diff string |
6 | | - * @returns Map of file paths to their diff information |
| 4 | + * Parses hunk header to extract metadata |
| 5 | + * @param hunk Raw hunk string |
| 6 | + * @returns Parsed hunk header information or null if invalid |
7 | 7 | */ |
8 | | -export function parseDiff(diff: string): DiffFileMap { |
9 | | - const fileMap = new Map<string, DiffInfo>() |
| 8 | +function parseHunkHeader( |
| 9 | + hunk: string |
| 10 | +): { header: string; startLine: number; lineCount: number } | null { |
| 11 | + const hunkHeaderMatch = hunk.match(/^[ -+](-\d+,\d+ \+\d+,\d+) @@(.*)/) |
| 12 | + if (!hunkHeaderMatch) return null |
10 | 13 |
|
11 | | - // Split the diff into file sections |
12 | | - const fileSections = diff.split('diff --git ') |
| 14 | + const hunkHeader = `@@${hunkHeaderMatch[1]} @@${hunkHeaderMatch[2] || ''}` |
13 | 15 |
|
14 | | - for (let i = 1; i < fileSections.length; i++) { |
15 | | - const section = fileSections[i] |
| 16 | + // Parse the hunk range for new file (+) |
| 17 | + const newRangeMatch = hunkHeaderMatch[1].match(/\+(\d+),?(\d+)?/) |
| 18 | + if (!newRangeMatch) return null |
16 | 19 |
|
17 | | - // Extract file path |
18 | | - const filePathMatch = section.match(/a\/(.+?) b\//) |
19 | | - if (!filePathMatch) continue |
| 20 | + const startLine = parseInt(newRangeMatch[1], 10) |
| 21 | + const lineCount = parseInt(newRangeMatch[2] || '1', 10) |
20 | 22 |
|
21 | | - const filePath = filePathMatch[1] |
22 | | - const changedLines = new Set<number>() |
23 | | - const hunks: DiffHunk[] = [] |
| 23 | + return { header: hunkHeader, startLine, lineCount } |
| 24 | +} |
24 | 25 |
|
25 | | - // Extract hunks |
26 | | - const hunkSections = section.split('\n@@').slice(1) |
| 26 | +/** |
| 27 | + * Processes lines within a hunk to identify changed lines |
| 28 | + * @param lines Array of hunk lines |
| 29 | + * @param startLine Starting line number for the hunk |
| 30 | + * @returns Set of changed line numbers |
| 31 | + */ |
| 32 | +function processHunkLines(lines: string[], startLine: number): Set<number> { |
| 33 | + const changedLines = new Set<number>() |
| 34 | + let currentLineNumber = startLine |
27 | 35 |
|
28 | | - for (const hunk of hunkSections) { |
29 | | - // Extract hunk header |
30 | | - const hunkHeaderMatch = hunk.match(/^[ -+](-\d+,\d+ \+\d+,\d+) @@(.*)/) |
31 | | - if (!hunkHeaderMatch) continue |
| 36 | + // Process each line in the hunk to track changed lines |
| 37 | + for (let i = 1; i < lines.length; i++) { |
| 38 | + const line = lines[i] |
32 | 39 |
|
33 | | - const hunkHeader = `@@${hunkHeaderMatch[1]} @@${hunkHeaderMatch[2] || ''}` |
| 40 | + // Skip removed lines (they don't exist in the new file) |
| 41 | + if (line.startsWith('-')) continue |
34 | 42 |
|
35 | | - // Parse the hunk range for new file (+) |
36 | | - const newRangeMatch = hunkHeaderMatch[1].match(/\+(\d+),?(\d+)?/) |
37 | | - if (!newRangeMatch) continue |
| 43 | + // For added lines, track the line number |
| 44 | + if (line.startsWith('+')) { |
| 45 | + changedLines.add(currentLineNumber) |
| 46 | + } |
38 | 47 |
|
39 | | - const startLine = parseInt(newRangeMatch[1], 10) |
40 | | - const lineCount = parseInt(newRangeMatch[2] || '1', 10) |
41 | | - const endLine = startLine + lineCount - 1 |
| 48 | + // Increment line number for context and added lines |
| 49 | + if (line.startsWith('+') || !line.startsWith('-')) { |
| 50 | + currentLineNumber++ |
| 51 | + } |
| 52 | + } |
42 | 53 |
|
43 | | - // Split hunk into lines |
44 | | - const lines = hunk.split('\n') |
45 | | - let currentLineNumber = startLine |
| 54 | + return changedLines |
| 55 | +} |
46 | 56 |
|
47 | | - // Process each line in the hunk to track changed lines |
48 | | - for (let j = 1; j < lines.length; j++) { |
49 | | - const line = lines[j] |
| 57 | +/** |
| 58 | + * Processes a single hunk to extract hunk info and changed lines |
| 59 | + * @param hunk Raw hunk string |
| 60 | + * @returns Hunk information and changed lines, or null if invalid |
| 61 | + */ |
| 62 | +function processHunk( |
| 63 | + hunk: string |
| 64 | +): { hunkInfo: DiffHunk; changedLines: Set<number> } | null { |
| 65 | + const headerInfo = parseHunkHeader(hunk) |
| 66 | + if (!headerInfo) return null |
| 67 | + |
| 68 | + const { header, startLine, lineCount } = headerInfo |
| 69 | + const endLine = startLine + lineCount - 1 |
| 70 | + |
| 71 | + // Split hunk into lines and process them |
| 72 | + const lines = hunk.split('\n') |
| 73 | + const changedLines = processHunkLines(lines, startLine) |
| 74 | + |
| 75 | + const hunkInfo: DiffHunk = { |
| 76 | + startLine, |
| 77 | + endLine, |
| 78 | + header |
| 79 | + } |
50 | 80 |
|
51 | | - // Skip removed lines (they don't exist in the new file) |
52 | | - if (line.startsWith('-')) continue |
| 81 | + return { hunkInfo, changedLines } |
| 82 | +} |
53 | 83 |
|
54 | | - // For added lines, track the line number |
55 | | - if (line.startsWith('+')) { |
56 | | - // This is an added/modified line |
57 | | - changedLines.add(currentLineNumber) |
58 | | - } |
| 84 | +/** |
| 85 | + * Parses a single file section from the diff |
| 86 | + * @param section Raw file section string |
| 87 | + * @returns File path and diff info, or null if invalid |
| 88 | + */ |
| 89 | +function parseFileSection( |
| 90 | + section: string |
| 91 | +): { filePath: string; diffInfo: DiffInfo } | null { |
| 92 | + // Extract file path |
| 93 | + const filePathMatch = section.match(/a\/(.+?) b\//) |
| 94 | + if (!filePathMatch) return null |
59 | 95 |
|
60 | | - // Increment line number for context and added lines |
61 | | - if (line.startsWith('+') || !line.startsWith('-')) { |
62 | | - currentLineNumber++ |
63 | | - } |
64 | | - } |
| 96 | + const filePath = filePathMatch[1] |
| 97 | + const allChangedLines = new Set<number>() |
| 98 | + const hunks: DiffHunk[] = [] |
65 | 99 |
|
66 | | - // Store hunk information |
67 | | - hunks.push({ |
68 | | - startLine, |
69 | | - endLine, |
70 | | - header: hunkHeader |
71 | | - }) |
72 | | - } |
| 100 | + // Extract hunks |
| 101 | + const hunkSections = section.split('\n@@').slice(1) |
| 102 | + |
| 103 | + for (const hunk of hunkSections) { |
| 104 | + const result = processHunk(hunk) |
| 105 | + if (!result) continue |
| 106 | + |
| 107 | + const { hunkInfo, changedLines } = result |
| 108 | + |
| 109 | + // Merge changed lines from this hunk |
| 110 | + changedLines.forEach((line) => allChangedLines.add(line)) |
| 111 | + hunks.push(hunkInfo) |
| 112 | + } |
73 | 113 |
|
74 | | - fileMap.set(filePath, { changedLines, hunks }) |
| 114 | + return { |
| 115 | + filePath, |
| 116 | + diffInfo: { changedLines: allChangedLines, hunks } |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +/** |
| 121 | + * Parses a git diff to extract changed lines and their hunks |
| 122 | + * @param diff Git diff string |
| 123 | + * @returns Map of file paths to their diff information |
| 124 | + */ |
| 125 | +export function parseDiff(diff: string): DiffFileMap { |
| 126 | + const fileMap = new Map<string, DiffInfo>() |
| 127 | + const fileSections = diff.split('diff --git ').slice(1) |
| 128 | + |
| 129 | + for (const section of fileSections) { |
| 130 | + const result = parseFileSection(section) |
| 131 | + if (result) { |
| 132 | + fileMap.set(result.filePath, result.diffInfo) |
| 133 | + } |
75 | 134 | } |
76 | 135 |
|
77 | 136 | return fileMap |
|
0 commit comments