From 7c083a0649e5de263cbbadaaeef9da8e35fc4a98 Mon Sep 17 00:00:00 2001 From: Saga4 Date: Sun, 23 Mar 2025 02:17:32 +0530 Subject: [PATCH 1/3] Improve Accurancy of diff hunk parsing in parseAllHunks --- src/utils/diff-utils.ts | 140 ++++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 48 deletions(-) diff --git a/src/utils/diff-utils.ts b/src/utils/diff-utils.ts index 0ff2a406..fab7ae55 100644 --- a/src/utils/diff-utils.ts +++ b/src/utils/diff-utils.ts @@ -39,66 +39,100 @@ export function parsePatch(patch: string): Hunk[] { * from the original content that are changed. * @param diff Diff expressed in GNU diff format. * @returns Map - */ -export function parseAllHunks(diff: string): Map { + */export function parseAllHunks(diff: string): Map { const hunksByFile: Map = new Map(); + parseDiff(diff).forEach(file => { const filename = file.to ? file.to : file.from!; - const chunks = file.chunks.map(chunk => { - let oldStart = chunk.oldStart; - let newStart = chunk.newStart; - let normalLines = 0; - let changeSeen = false; - const newLines: string[] = []; - let previousLine: string | null = null; - let nextLine: string | null = null; - + + file.chunks.forEach(chunk => { + // Find the first and last modified lines + let firstModifiedLine = -1; + let lastModifiedLine = -1; + + // Track normal lines by their line numbers + const normalLinesByNumber = new Map(); + + // First pass: identify modified range and catalog normal lines chunk.changes.forEach(change => { - // strip off leading '+', '-', or ' ' and trailing carriage return - const content = change.content.substring(1).replace(/[\n\r]+$/g, ''); - if (change.type === 'normal') { - normalLines++; - if (changeSeen) { - if (nextLine === null) { - nextLine = content; - } - } else { - previousLine = content; - } - } else { - if (change.type === 'add') { - // strip off leading '+' and trailing carriage return - newLines.push(content); + if (change.content.includes('No newline at end of file')) { + return; + } + + if (change.type === 'add' || change.type === 'del') { + const lineNum = (change as any).ln || 0; + if (firstModifiedLine === -1 || lineNum < firstModifiedLine) { + firstModifiedLine = lineNum; } - if (!changeSeen) { - oldStart += normalLines; - newStart += normalLines; - changeSeen = true; + if (lineNum > lastModifiedLine) { + lastModifiedLine = lineNum; } + } else if (change.type === 'normal') { + // Store normal lines by their line number + const lineNum = (change as any).ln1 || (change as any).ln || 0; + normalLinesByNumber.set(lineNum, change.content.substring(1)); } }); - const newEnd = newStart + chunk.newLines - normalLines - 1; - const oldEnd = oldStart + chunk.oldLines - normalLines - 1; - let hunk: Hunk = { - oldStart: oldStart, - oldEnd: oldEnd, - newStart: newStart, - newEnd: newEnd, - newContent: newLines, - }; - if (previousLine) { - hunk = {...hunk, previousLine: previousLine}; + + // If no modifications, skip + if (firstModifiedLine === -1) return; + + // Collect all added lines + const addedLines: {ln: number, content: string}[] = []; + chunk.changes.forEach(change => { + if (change.type === 'add') { + addedLines.push({ + ln: (change as any).ln || 0, + content: change.content.substring(1) + }); + } + }); + + // Sort added lines by line number + addedLines.sort((a, b) => a.ln - b.ln); + + // Now build the new content, including both added lines and necessary normal lines + const newContent: string[] = []; + let currentLine = firstModifiedLine; + + while (currentLine <= lastModifiedLine) { + // Check if this is an added line + const addedLine = addedLines.find(line => line.ln === currentLine); + + if (addedLine) { + // Include the added line + newContent.push(addedLine.content); + } else { + // This must be a normal line within the modified range + // Include it to maintain code structure + const normalLine = normalLinesByNumber.get(currentLine); + if (normalLine) { + newContent.push(normalLine); + } + } + + currentLine++; } - if (nextLine) { - hunk = {...hunk, nextLine: nextLine}; + + // Create the hunk with the exact modified range + const hunk: Hunk = { + oldStart: firstModifiedLine, + oldEnd: lastModifiedLine, + newStart: firstModifiedLine, + newEnd: firstModifiedLine + newContent.length - 1, + newContent + }; + + // Add the hunk + if (!hunksByFile.has(filename)) { + hunksByFile.set(filename, []); } - return hunk; + hunksByFile.get(filename)!.push(hunk); }); - hunksByFile.set(filename, chunks); }); + return hunksByFile; } - /** * Given two texts, return the range of lines that are changed. * @param oldContent The original content. @@ -109,6 +143,16 @@ export function getSuggestedHunks( oldContent: string, newContent: string ): Hunk[] { + debugger; + console.log("===== getSuggestedHunks called ====="); + console.log("Original content:\n", oldContent); + console.log("New content:\n", newContent); + const diff = createPatch('unused', oldContent, newContent); - return parseAllHunks(diff).get('unused') || []; -} + console.log("Generated patch:\n", diff); + + const hunks = parseAllHunks(diff).get('unused') || []; + console.log("Parsed hunks:", JSON.stringify(hunks, null, 2)); + + return hunks; +} \ No newline at end of file From df6bb92396dea15be450075e9318409ab58f6414 Mon Sep 17 00:00:00 2001 From: Saga4 Date: Sun, 23 Mar 2025 02:23:15 +0530 Subject: [PATCH 2/3] remove debugging and fix --- src/utils/diff-utils.ts | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/utils/diff-utils.ts b/src/utils/diff-utils.ts index fab7ae55..136f2ad7 100644 --- a/src/utils/diff-utils.ts +++ b/src/utils/diff-utils.ts @@ -39,12 +39,11 @@ export function parsePatch(patch: string): Hunk[] { * from the original content that are changed. * @param diff Diff expressed in GNU diff format. * @returns Map - */export function parseAllHunks(diff: string): Map { + */ +export function parseAllHunks(diff: string): Map { const hunksByFile: Map = new Map(); - parseDiff(diff).forEach(file => { const filename = file.to ? file.to : file.from!; - file.chunks.forEach(chunk => { // Find the first and last modified lines let firstModifiedLine = -1; @@ -143,16 +142,6 @@ export function getSuggestedHunks( oldContent: string, newContent: string ): Hunk[] { - debugger; - console.log("===== getSuggestedHunks called ====="); - console.log("Original content:\n", oldContent); - console.log("New content:\n", newContent); - const diff = createPatch('unused', oldContent, newContent); - console.log("Generated patch:\n", diff); - - const hunks = parseAllHunks(diff).get('unused') || []; - console.log("Parsed hunks:", JSON.stringify(hunks, null, 2)); - - return hunks; + return parseAllHunks(diff).get('unused') || []; } \ No newline at end of file From 9ab6e5e66102c4295275c7fb34098dae125958d5 Mon Sep 17 00:00:00 2001 From: Saga4 Date: Mon, 7 Apr 2025 04:51:12 +0530 Subject: [PATCH 3/3] fix for multiple edge cases --- src/utils/diff-utils.ts | 173 +++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 71 deletions(-) diff --git a/src/utils/diff-utils.ts b/src/utils/diff-utils.ts index 136f2ad7..da92c5a3 100644 --- a/src/utils/diff-utils.ts +++ b/src/utils/diff-utils.ts @@ -39,99 +39,130 @@ export function parsePatch(patch: string): Hunk[] { * from the original content that are changed. * @param diff Diff expressed in GNU diff format. * @returns Map + * ToDO: Need to Handle Distant Changes with some proximity threshold number */ export function parseAllHunks(diff: string): Map { const hunksByFile: Map = new Map(); parseDiff(diff).forEach(file => { const filename = file.to ? file.to : file.from!; + const hunks: Hunk[] = []; file.chunks.forEach(chunk => { - // Find the first and last modified lines - let firstModifiedLine = -1; - let lastModifiedLine = -1; - - // Track normal lines by their line numbers - const normalLinesByNumber = new Map(); - - // First pass: identify modified range and catalog normal lines + // Track different types of lines + const allAddedLines: {ln: number; content: string}[] = []; + const allDeletedLines: {ln: number; content: string}[] = []; + const allNormalLines: {ln: number; lnNew: number; content: string}[] = []; + // First pass: collect all changes by type chunk.changes.forEach(change => { if (change.content.includes('No newline at end of file')) { return; } - - if (change.type === 'add' || change.type === 'del') { - const lineNum = (change as any).ln || 0; - if (firstModifiedLine === -1 || lineNum < firstModifiedLine) { - firstModifiedLine = lineNum; - } - if (lineNum > lastModifiedLine) { - lastModifiedLine = lineNum; - } - } else if (change.type === 'normal') { - // Store normal lines by their line number - const lineNum = (change as any).ln1 || (change as any).ln || 0; - normalLinesByNumber.set(lineNum, change.content.substring(1)); - } - }); - - // If no modifications, skip - if (firstModifiedLine === -1) return; - - // Collect all added lines - const addedLines: {ln: number, content: string}[] = []; - chunk.changes.forEach(change => { + const content = change.content.substring(1).replace(/[\n\r]+$/g, ''); + if (change.type === 'add') { - addedLines.push({ + allAddedLines.push({ + ln: (change as any).ln || 0, + content: content, + }); + } else if (change.type === 'del') { + allDeletedLines.push({ ln: (change as any).ln || 0, - content: change.content.substring(1) + content: content, + }); + } else if (change.type === 'normal') { + allNormalLines.push({ + ln: (change as any).ln1 || 0, + lnNew: (change as any).ln2 || 0, // New file line number + content: content, }); } }); - - // Sort added lines by line number - addedLines.sort((a, b) => a.ln - b.ln); - - // Now build the new content, including both added lines and necessary normal lines + // If no modifications, skip + if (allAddedLines.length === 0 && allDeletedLines.length === 0) return; + + // Sort lines by line number as ParseDiff does not guarantee order + allAddedLines.sort((a, b) => a.ln - b.ln); + allDeletedLines.sort((a, b) => a.ln - b.ln); + allNormalLines.sort((a, b) => a.ln - b.ln); + + // Identify the range to replace + let startLineToReplace: number; + let endLineToReplace: number; + if (allDeletedLines.length > 0) { + // If there are deletions, start with their range + const lastDelLine = allDeletedLines[allDeletedLines.length - 1].ln; + const lastAddedLine = + allAddedLines.length > 0 + ? Math.max(...allAddedLines.map(a => a.ln)) + : -1; + + // Find neutral lines between additions and deletions + // Find the full change range + const allChangeLines = [...allAddedLines, ...allDeletedLines]; + const earliestChangeLine = Math.min( + ...allChangeLines.map(line => line.ln) + ); + + // Include all normal lines that fall within this range + const relevantNormalLines = allNormalLines.filter( + normal => + normal.ln >= earliestChangeLine && + (normal.ln < lastDelLine || normal.lnNew < lastAddedLine) + ); + // Calculate the full replacement range including relevant normal lines + const allRelevantLines = [...allDeletedLines, ...relevantNormalLines]; + startLineToReplace = Math.min(...allRelevantLines.map(line => line.ln)); + endLineToReplace = Math.max(...allRelevantLines.map(line => line.ln)); + } else { + // Pure additions (no deletions) + // Use the first added line as the insertion point + startLineToReplace = allAddedLines[0].ln; + endLineToReplace = startLineToReplace; + } + // Now build the new content const newContent: string[] = []; - let currentLine = firstModifiedLine; - - while (currentLine <= lastModifiedLine) { - // Check if this is an added line - const addedLine = addedLines.find(line => line.ln === currentLine); - - if (addedLine) { - // Include the added line - newContent.push(addedLine.content); - } else { - // This must be a normal line within the modified range - // Include it to maintain code structure - const normalLine = normalLinesByNumber.get(currentLine); - if (normalLine) { - newContent.push(normalLine); - } + + // Normal processing: include additions and normal lines in the right order + const linesToInclude: {ln: number; content: string}[] = []; + + // Add all the additions to our map + allAddedLines.forEach(line => { + linesToInclude.push({ln: line.ln, content: line.content}); + }); + // Add relevant normal lines that should be preserved + allNormalLines.forEach(line => { + // Only include normal lines within our replacement range if they haven't been replaced by additions + if (line.ln >= startLineToReplace && line.ln <= endLineToReplace) { + linesToInclude.push({ln: line.lnNew, content: line.content}); } - - currentLine++; - } - - // Create the hunk with the exact modified range + }); + + // Order the lines and build the final content + linesToInclude + .sort((a, b) => a.ln - b.ln) + .forEach(line => { + newContent.push(line.content); + }); + + // Create the hunk with the replacement range const hunk: Hunk = { - oldStart: firstModifiedLine, - oldEnd: lastModifiedLine, - newStart: firstModifiedLine, - newEnd: firstModifiedLine + newContent.length - 1, - newContent + oldStart: startLineToReplace, + oldEnd: endLineToReplace, + newStart: startLineToReplace, + newEnd: startLineToReplace + newContent.length - 1, + newContent, }; - - // Add the hunk - if (!hunksByFile.has(filename)) { - hunksByFile.set(filename, []); - } - hunksByFile.get(filename)!.push(hunk); + + hunks.push(hunk); }); + + if (hunks.length > 0) { + hunksByFile.set(filename, hunks); + } }); - + return hunksByFile; } + /** * Given two texts, return the range of lines that are changed. * @param oldContent The original content. @@ -144,4 +175,4 @@ export function getSuggestedHunks( ): Hunk[] { const diff = createPatch('unused', oldContent, newContent); return parseAllHunks(diff).get('unused') || []; -} \ No newline at end of file +}