From 31b2fb0f5e0ae433073b0512c4d84edaf24061f1 Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 13:49:22 +0100 Subject: [PATCH 1/8] fix: make --include-dir and --include-files work additively Previously, when both --include-dir and --include-files were specified, includeFiles would clear the includeDirs patterns, making them mutually exclusive. Now they combine additively - files from both filters are included in the output. --- packages/sdk/src/files.ts | 66 ++++++++++++++++++--------------- packages/sdk/test/files.test.ts | 63 +++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 29 deletions(-) diff --git a/packages/sdk/src/files.ts b/packages/sdk/src/files.ts index ff2b35c..05fe1c0 100644 --- a/packages/sdk/src/files.ts +++ b/packages/sdk/src/files.ts @@ -50,17 +50,6 @@ export async function collectFiles( // Build glob patterns const patterns: string[] = []; - // Handle include directories - if (includeDirs?.length) { - patterns.push( - ...includeDirs.map( - (dir) => `${escapeGlobPath(toRelativePattern(dir))}/**/*` - ) - ); - } else { - patterns.push("**/*"); - } - // Handle exclude directories const ignore = [ ...(excludeDirs?.map( @@ -72,29 +61,48 @@ export async function collectFiles( }) || []), ]; - // Handle file extensions - if (extensionSet) { - const exts = [...extensionSet]; - patterns.length = 0; // Clear patterns if we have specific extensions - if (includeDirs?.length) { - for (const dir of includeDirs) { - for (const ext of exts) { - patterns.push(`${escapeGlobPath(toRelativePattern(dir))}/**/*${ext}`); - } + // Determine if we have any include filters + const hasIncludeDirs = includeDirs?.length; + const hasIncludeFiles = includeFiles?.length; + const hasExtensions = extensionSet && extensionSet.size > 0; + + // If no include filters specified, include everything + if (!hasIncludeDirs && !hasIncludeFiles) { + if (hasExtensions) { + // Only specific extensions from everywhere + for (const ext of extensionSet!) { + patterns.push(`**/*${ext}`); } } else { - for (const ext of exts) { - patterns.push(`**/*${ext}`); + // Everything + patterns.push("**/*"); + } + } else { + // Build patterns from includeDirs + if (hasIncludeDirs) { + if (hasExtensions) { + // Specific extensions from specific directories + for (const dir of includeDirs!) { + for (const ext of extensionSet!) { + patterns.push( + `${escapeGlobPath(toRelativePattern(dir))}/**/*${ext}` + ); + } + } + } else { + // All files from specific directories + patterns.push( + ...includeDirs!.map( + (dir) => `${escapeGlobPath(toRelativePattern(dir))}/**/*` + ) + ); } } - } - // Handle include files - if (includeFiles?.length) { - patterns.length = 0; // Clear patterns if we have specific files - // Convert absolute paths to relative paths relative to baseDir for fast-glob - // fast-glob works better with relative paths when cwd is set - patterns.push(...includeFiles.map((file) => toRelativePattern(file))); + // Add specific includeFiles patterns (additive, not replacing) + if (hasIncludeFiles) { + patterns.push(...includeFiles!.map((file) => toRelativePattern(file))); + } } logVerbose(`Scanning with patterns: ${patterns.join(", ")}`, 2); diff --git a/packages/sdk/test/files.test.ts b/packages/sdk/test/files.test.ts index 3972899..318f550 100644 --- a/packages/sdk/test/files.test.ts +++ b/packages/sdk/test/files.test.ts @@ -203,6 +203,69 @@ describe("File Collection", () => { expect(relativePaths).not.toContain("src/lib/other.ts"); }); + test("should combine includeDirs and includeFiles additively", async () => { + // Create directory structure simulating a real project + await mkdir(join(tempDir, "crates", "core", "src"), { recursive: true }); + await mkdir(join(tempDir, "crates", "engine", "src"), { recursive: true }); + await mkdir(join(tempDir, "crates", "utils", "src"), { recursive: true }); + await mkdir(join(tempDir, "src"), { recursive: true }); + + // Files in core (should be included via includeDirs) + await writeFile( + join(tempDir, "crates", "core", "src", "lib.rs"), + "core lib" + ); + await writeFile( + join(tempDir, "crates", "core", "src", "types.rs"), + "core types" + ); + + // Files in engine (should be included via includeFiles - single file) + await writeFile( + join(tempDir, "crates", "engine", "src", "lib.rs"), + "engine lib" + ); + await writeFile( + join(tempDir, "crates", "engine", "src", "other.rs"), + "engine other" + ); + + // Files in utils (should NOT be included) + await writeFile( + join(tempDir, "crates", "utils", "src", "helpers.rs"), + "utils helpers" + ); + + // Root src files (should NOT be included) + await writeFile(join(tempDir, "src", "main.rs"), "main"); + + const files = await collectFiles(tempDir, { + ig, + extensionSet: null, + excludeFiles: null, + includeFiles: [join(tempDir, "crates", "engine", "src", "lib.rs")], + excludeDirs: null, + includeDirs: [join(tempDir, "crates", "core", "src")], + verbose: 0, + }); + + // Should include: core/src/* (2 files) + engine/src/lib.rs (1 file) = 3 files + expect(files).toHaveLength(3); + const relativePaths = files + .map((f) => getRelativePath(f, tempDir)) + .sort(); + expect(relativePaths).toEqual([ + "crates/core/src/lib.rs", + "crates/core/src/types.rs", + "crates/engine/src/lib.rs", + ]); + + // Should NOT include files from other directories + expect(relativePaths).not.toContain("crates/engine/src/other.rs"); + expect(relativePaths).not.toContain("crates/utils/src/helpers.rs"); + expect(relativePaths).not.toContain("src/main.rs"); + }); + test("should handle exclude files pattern", async () => { await writeFile(join(tempDir, "file1.js"), "content1"); await writeFile(join(tempDir, "file2.test.js"), "content2"); From 1894d1571be7ff796b68ef4fb2246de9ebbb3c44 Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 14:17:48 +0100 Subject: [PATCH 2/8] test: add E2E tests for combined --include-dir and --include-files - Add integration tests verifying additive behavior of includeDirs + includeFiles - Add sample-rust-project in playground/ for manual testing - Tests verify: tree output, prompt, file inclusion, ignores working correctly --- .../test/integration/codebase-fixture.test.ts | 83 +++++++++++++++++++ packages/sdk/test/files.test.ts | 4 +- .../sample-rust-project/.codefetchignore | 5 ++ playground/sample-rust-project/.gitignore | 13 +++ playground/sample-rust-project/Cargo.toml | 9 ++ .../crates/core/src/errors.rs | 17 ++++ .../crates/core/src/lib.rs | 7 ++ .../crates/core/src/secret.rs | 2 + .../crates/core/src/types.rs | 9 ++ .../crates/engine/src/lib.rs | 10 +++ .../crates/engine/src/processor.rs | 10 +++ .../crates/utils/src/lib.rs | 4 + .../docs/arch/architecture.md | 12 +++ .../docs/arch/decisions.md | 7 ++ playground/sample-rust-project/src/main.rs | 4 + 15 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 playground/sample-rust-project/.codefetchignore create mode 100644 playground/sample-rust-project/.gitignore create mode 100644 playground/sample-rust-project/Cargo.toml create mode 100644 playground/sample-rust-project/crates/core/src/errors.rs create mode 100644 playground/sample-rust-project/crates/core/src/lib.rs create mode 100644 playground/sample-rust-project/crates/core/src/secret.rs create mode 100644 playground/sample-rust-project/crates/core/src/types.rs create mode 100644 playground/sample-rust-project/crates/engine/src/lib.rs create mode 100644 playground/sample-rust-project/crates/engine/src/processor.rs create mode 100644 playground/sample-rust-project/crates/utils/src/lib.rs create mode 100644 playground/sample-rust-project/docs/arch/architecture.md create mode 100644 playground/sample-rust-project/docs/arch/decisions.md create mode 100644 playground/sample-rust-project/src/main.rs diff --git a/packages/cli/test/integration/codebase-fixture.test.ts b/packages/cli/test/integration/codebase-fixture.test.ts index 92a8462..5fb31c1 100644 --- a/packages/cli/test/integration/codebase-fixture.test.ts +++ b/packages/cli/test/integration/codebase-fixture.test.ts @@ -267,4 +267,87 @@ describe("Integration: codebase-test fixture", () => { expect(content).toContain("button.js"); expect(content).toContain("utils"); }); + + it("combines --include-dir and --include-files additively", () => { + const result = spawnSync( + "node", + [ + cliPath, + "-o", + "combined-include.md", + "--include-dir", + "src/utils", + "--include-files", + "src/components/button.js", + "-t", + "3", + ], + { + cwd: FIXTURE_DIR, + encoding: "utf8", + stdio: ["inherit", "pipe", "pipe"], + } + ); + + expect(result.stderr).toBe(""); + expect(result.stdout).toContain("Output written to"); + + const outPath = join(CODEFETCH_DIR, "combined-include.md"); + expect(fs.existsSync(outPath)).toBe(true); + + const content = fs.readFileSync(outPath, "utf8"); + // Should include files from utils directory (via --include-dir) + expect(content).toContain("test1.ts"); + expect(content).toContain("test2.js"); + // Should include specific file (via --include-files) + expect(content).toContain("button.js"); + // Should NOT include other files not matching the patterns + expect(content).not.toContain("app.js"); + expect(content).not.toContain("header.js"); + expect(content).not.toContain("container.js"); + // Project tree should be present + expect(content).toMatch(/Project Structure:/); + }); + + it("combines multiple --include-dir directories with --include-files", () => { + const result = spawnSync( + "node", + [ + cliPath, + "-o", + "multi-dir-include.md", + "--include-dir", + "src/utils,src/components/base", + "--include-files", + "src/app.js", + "-t", + "3", + ], + { + cwd: FIXTURE_DIR, + encoding: "utf8", + stdio: ["inherit", "pipe", "pipe"], + } + ); + + expect(result.stderr).toBe(""); + expect(result.stdout).toContain("Output written to"); + + const outPath = join(CODEFETCH_DIR, "multi-dir-include.md"); + expect(fs.existsSync(outPath)).toBe(true); + + const content = fs.readFileSync(outPath, "utf8"); + // Should include files from utils directory + expect(content).toContain("test1.ts"); + expect(content).toContain("test2.js"); + // Should include files from components/base directory + expect(content).toContain("container.js"); + // Should include the specific file app.js + expect(content).toContain("app.js"); + // Should NOT include files from other directories + expect(content).not.toContain("button.js"); + expect(content).not.toContain("header.js"); + // Project tree should be present + expect(content).toMatch(/Project Structure:/); + }); }); diff --git a/packages/sdk/test/files.test.ts b/packages/sdk/test/files.test.ts index 318f550..3012696 100644 --- a/packages/sdk/test/files.test.ts +++ b/packages/sdk/test/files.test.ts @@ -206,7 +206,9 @@ describe("File Collection", () => { test("should combine includeDirs and includeFiles additively", async () => { // Create directory structure simulating a real project await mkdir(join(tempDir, "crates", "core", "src"), { recursive: true }); - await mkdir(join(tempDir, "crates", "engine", "src"), { recursive: true }); + await mkdir(join(tempDir, "crates", "engine", "src"), { + recursive: true, + }); await mkdir(join(tempDir, "crates", "utils", "src"), { recursive: true }); await mkdir(join(tempDir, "src"), { recursive: true }); diff --git a/playground/sample-rust-project/.codefetchignore b/playground/sample-rust-project/.codefetchignore new file mode 100644 index 0000000..9f226c2 --- /dev/null +++ b/playground/sample-rust-project/.codefetchignore @@ -0,0 +1,5 @@ +# Codefetch specific ignores +*.log +temp/ +cache/ +secret.rs diff --git a/playground/sample-rust-project/.gitignore b/playground/sample-rust-project/.gitignore new file mode 100644 index 0000000..c8a1e6c --- /dev/null +++ b/playground/sample-rust-project/.gitignore @@ -0,0 +1,13 @@ +# Build artifacts +target/ +dist/ + +# Node modules +node_modules/ + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store diff --git a/playground/sample-rust-project/Cargo.toml b/playground/sample-rust-project/Cargo.toml new file mode 100644 index 0000000..fd27205 --- /dev/null +++ b/playground/sample-rust-project/Cargo.toml @@ -0,0 +1,9 @@ +[workspace] +members = ["crates/*"] + +[package] +name = "sample-rust-project" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/playground/sample-rust-project/crates/core/src/errors.rs b/playground/sample-rust-project/crates/core/src/errors.rs new file mode 100644 index 0000000..5138a80 --- /dev/null +++ b/playground/sample-rust-project/crates/core/src/errors.rs @@ -0,0 +1,17 @@ +// Core error types +use std::fmt; + +#[derive(Debug)] +pub enum CoreError { + NotFound(String), + InvalidInput(String), +} + +impl fmt::Display for CoreError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CoreError::NotFound(msg) => write!(f, "Not found: {}", msg), + CoreError::InvalidInput(msg) => write!(f, "Invalid input: {}", msg), + } + } +} diff --git a/playground/sample-rust-project/crates/core/src/lib.rs b/playground/sample-rust-project/crates/core/src/lib.rs new file mode 100644 index 0000000..86b9632 --- /dev/null +++ b/playground/sample-rust-project/crates/core/src/lib.rs @@ -0,0 +1,7 @@ +// Core library - main entry point +pub mod types; +pub mod errors; + +pub fn init() { + println!("Core initialized"); +} diff --git a/playground/sample-rust-project/crates/core/src/secret.rs b/playground/sample-rust-project/crates/core/src/secret.rs new file mode 100644 index 0000000..abd9acb --- /dev/null +++ b/playground/sample-rust-project/crates/core/src/secret.rs @@ -0,0 +1,2 @@ +// This should be ignored by .codefetchignore +const SECRET_KEY: &str = "super-secret-key-12345"; diff --git a/playground/sample-rust-project/crates/core/src/types.rs b/playground/sample-rust-project/crates/core/src/types.rs new file mode 100644 index 0000000..72f99d4 --- /dev/null +++ b/playground/sample-rust-project/crates/core/src/types.rs @@ -0,0 +1,9 @@ +// Core types +pub struct Config { + pub name: String, + pub debug: bool, +} + +pub struct Context { + pub config: Config, +} diff --git a/playground/sample-rust-project/crates/engine/src/lib.rs b/playground/sample-rust-project/crates/engine/src/lib.rs new file mode 100644 index 0000000..94443fb --- /dev/null +++ b/playground/sample-rust-project/crates/engine/src/lib.rs @@ -0,0 +1,10 @@ +// Engine library - processing logic +use core::types::Context; + +pub fn process(ctx: &Context) { + println!("Processing with context: {:?}", ctx.config.name); +} + +pub fn run() { + println!("Engine running"); +} diff --git a/playground/sample-rust-project/crates/engine/src/processor.rs b/playground/sample-rust-project/crates/engine/src/processor.rs new file mode 100644 index 0000000..8c1f4e0 --- /dev/null +++ b/playground/sample-rust-project/crates/engine/src/processor.rs @@ -0,0 +1,10 @@ +// Engine processor - should NOT be included when using includeFiles for lib.rs only +pub struct Processor { + pub name: String, +} + +impl Processor { + pub fn new(name: &str) -> Self { + Processor { name: name.to_string() } + } +} diff --git a/playground/sample-rust-project/crates/utils/src/lib.rs b/playground/sample-rust-project/crates/utils/src/lib.rs new file mode 100644 index 0000000..beb5ced --- /dev/null +++ b/playground/sample-rust-project/crates/utils/src/lib.rs @@ -0,0 +1,4 @@ +// Utils library - should NOT be included +pub fn helper() -> String { + "helper".to_string() +} diff --git a/playground/sample-rust-project/docs/arch/architecture.md b/playground/sample-rust-project/docs/arch/architecture.md new file mode 100644 index 0000000..def7360 --- /dev/null +++ b/playground/sample-rust-project/docs/arch/architecture.md @@ -0,0 +1,12 @@ +# Architecture Document + +## Overview +This is the architecture document for the sample project. + +## Components +- Core: Base types and errors +- Engine: Processing logic +- Utils: Helper functions + +## Instructions +Please analyze this codebase and suggest improvements. diff --git a/playground/sample-rust-project/docs/arch/decisions.md b/playground/sample-rust-project/docs/arch/decisions.md new file mode 100644 index 0000000..ec8686e --- /dev/null +++ b/playground/sample-rust-project/docs/arch/decisions.md @@ -0,0 +1,7 @@ +# Decision Log + +## Decision 1: Use Rust +We chose Rust for memory safety and performance. + +## Decision 2: Modular Architecture +Split into crates for better separation of concerns. diff --git a/playground/sample-rust-project/src/main.rs b/playground/sample-rust-project/src/main.rs new file mode 100644 index 0000000..3e07cfe --- /dev/null +++ b/playground/sample-rust-project/src/main.rs @@ -0,0 +1,4 @@ +// Main application - should NOT be included +fn main() { + println!("Hello from main!"); +} From efd7ac5236429cf4c1b0bba5684deff1415ee05a Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 14:27:42 +0100 Subject: [PATCH 3/8] fix: support external prompt file paths and prepend prompts without placeholder - External paths (containing / or \) are now used directly instead of being nested under codefetch/prompts/ - Prompts without {{CURRENT_CODEBASE}} placeholder are prepended to the codebase content instead of replacing it --- packages/cli/src/commands/default.ts | 9 +++++++++ packages/cli/src/commands/open.ts | 9 +++++++++ packages/sdk/src/markdown.ts | 29 +++++++++++++++++++++++----- packages/sdk/src/template-parser.ts | 9 ++++++++- 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/commands/default.ts b/packages/cli/src/commands/default.ts index 517f2c6..62cb7f2 100644 --- a/packages/cli/src/commands/default.ts +++ b/packages/cli/src/commands/default.ts @@ -29,6 +29,15 @@ function getPromptFile( if (VALID_PROMPTS.has(config.defaultPromptFile)) { return config.defaultPromptFile; } + // Check if it's an external file path (contains path separator or is absolute) + // External paths should be used as-is, not nested under codefetch/prompts/ + if ( + config.defaultPromptFile.includes("/") || + config.defaultPromptFile.includes("\\") || + config.defaultPromptFile.startsWith(".") + ) { + return resolve(config.defaultPromptFile); + } return resolve(config.outputPath, "prompts", config.defaultPromptFile); } diff --git a/packages/cli/src/commands/open.ts b/packages/cli/src/commands/open.ts index 36fc19a..da5eaf8 100644 --- a/packages/cli/src/commands/open.ts +++ b/packages/cli/src/commands/open.ts @@ -35,6 +35,15 @@ function getPromptFile( if (VALID_PROMPTS.has(config.defaultPromptFile)) { return config.defaultPromptFile; } + // Check if it's an external file path (contains path separator or is absolute) + // External paths should be used as-is, not nested under codefetch/prompts/ + if ( + config.defaultPromptFile.includes("/") || + config.defaultPromptFile.includes("\\") || + config.defaultPromptFile.startsWith(".") + ) { + return resolve(config.defaultPromptFile); + } return resolve(config.outputPath, "prompts", config.defaultPromptFile); } diff --git a/packages/sdk/src/markdown.ts b/packages/sdk/src/markdown.ts index b1262b6..1991378 100644 --- a/packages/sdk/src/markdown.ts +++ b/packages/sdk/src/markdown.ts @@ -3,7 +3,11 @@ import { relative } from "pathe"; import type { TokenEncoder, TokenLimiter } from "./types"; import { generateProjectTree, generateProjectTreeFromFiles } from "./tree"; import { countTokens } from "./token-counter"; -import { processPromptTemplate, resolvePrompt } from "./template-parser"; +import { + processPromptTemplate, + resolvePrompt, + hasCodebasePlaceholder, +} from "./template-parser"; const CHUNK_SIZE = 64 * 1024; // 64KB optimal chunk size @@ -262,11 +266,26 @@ export async function generateMarkdown( onVerbose?.(`Final token count: ${tokenCounter.total}`, 2); - // Before final return, if we have a template with {{CURRENT_CODEBASE}}, replace it + // Before final return, process template with codebase content const content = markdownContent.join("\n"); - return promptTemplate === "" - ? content - : processPromptTemplate(promptTemplate, content, templateVars ?? {}); + + if (promptTemplate === "") { + return content; + } + + // If prompt template has {{CURRENT_CODEBASE}} placeholder, use template processing + // Otherwise, prepend the prompt to the codebase content + if (hasCodebasePlaceholder(promptTemplate)) { + return processPromptTemplate(promptTemplate, content, templateVars ?? {}); + } else { + // Process any other variables in the template, then prepend to content + const processedPrompt = await processPromptTemplate( + promptTemplate, + "", + templateVars ?? {} + ); + return processedPrompt.trim() + "\n\n" + content; + } } // Re-export for backward compatibility diff --git a/packages/sdk/src/template-parser.ts b/packages/sdk/src/template-parser.ts index 3c837e1..4a9e350 100644 --- a/packages/sdk/src/template-parser.ts +++ b/packages/sdk/src/template-parser.ts @@ -27,12 +27,19 @@ export async function processPromptTemplate( result = result.replace(new RegExp(`{{${key}}}`, "g"), value); } - // Always process CURRENT_CODEBASE first + // Replace CURRENT_CODEBASE placeholder with codebase content result = result.replace(/{{CURRENT_CODEBASE}}/g, codebase); return result; } +/** + * Check if a template contains the CURRENT_CODEBASE placeholder + */ +export function hasCodebasePlaceholder(template: string): boolean { + return template.includes("{{CURRENT_CODEBASE}}"); +} + export async function resolvePrompt( promptFile: string ): Promise { From 8c882491b3955f95da94cee502717ec2cb19d62c Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 14:53:01 +0100 Subject: [PATCH 4/8] feat: add XML tags for structured output sections - Wrap prompts in ... tags - Wrap file tree in ... tags - Wrap source code in ... tags This provides better structure for AI models to understand the different sections of the codebase output. --- packages/cli/test/unit/markdown.test.ts | 14 +++++++++++--- packages/sdk/src/markdown.ts | 21 ++++++++++++++------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/packages/cli/test/unit/markdown.test.ts b/packages/cli/test/unit/markdown.test.ts index 4c56b5e..c200701 100644 --- a/packages/cli/test/unit/markdown.test.ts +++ b/packages/cli/test/unit/markdown.test.ts @@ -8,7 +8,8 @@ const UTILS_DIR = join(FIXTURE_DIR, "src/utils"); describe("generateMarkdown with chunk-based token limit", () => { it("enforces maxTokens by chunk-based reading", async () => { - const MAX_TOKENS = 50; + // Note: XML tags (, ) add ~4 tokens overhead + const MAX_TOKENS = 55; const files = [join(UTILS_DIR, "test1.ts"), join(UTILS_DIR, "test2.js")]; const result = await generateMarkdown(files, { @@ -72,6 +73,12 @@ describe("generateMarkdown with chunk-based token limit", () => { disableLineNumbers: false, }); + // Check for XML tags + expect(markdown).toContain(""); + expect(markdown).toContain(""); + expect(markdown).toContain(""); + expect(markdown).toContain(""); + // Check content expect(markdown).toContain("Project Structure:"); expect(markdown).toMatch(/└── /); expect(markdown).toContain("test1.ts"); @@ -81,8 +88,9 @@ describe("generateMarkdown with chunk-based token limit", () => { it("respects token limits with project tree", async () => { const files = [join(UTILS_DIR, "test1.ts")]; + // Note: XML tags (, ) add overhead const markdown = await generateMarkdown(files, { - maxTokens: 20, + maxTokens: 40, verbose: 0, projectTree: 2, tokenEncoder: "simple", @@ -90,7 +98,7 @@ describe("generateMarkdown with chunk-based token limit", () => { }); const tokens = await countTokens(markdown, "simple"); - expect(tokens).toBeLessThanOrEqual(20); + expect(tokens).toBeLessThanOrEqual(40); }); }); diff --git a/packages/sdk/src/markdown.ts b/packages/sdk/src/markdown.ts index 1991378..8be25a0 100644 --- a/packages/sdk/src/markdown.ts +++ b/packages/sdk/src/markdown.ts @@ -147,8 +147,8 @@ export async function generateMarkdown( // Handle inline prompt (direct string) or file-based prompt if (inlinePrompt) { onVerbose?.("Using inline prompt...", 2); - // For inline prompts, wrap with {{CURRENT_CODEBASE}} placeholder - promptTemplate = `${inlinePrompt}\n\n{{CURRENT_CODEBASE}}`; + // For inline prompts, wrap in tags with {{CURRENT_CODEBASE}} placeholder + promptTemplate = `\n${inlinePrompt}\n\n\n{{CURRENT_CODEBASE}}`; const promptTokens = await countTokens(promptTemplate, tokenEncoder); if (maxTokens && promptTokens > tokenCounter.remaining) { @@ -189,19 +189,23 @@ export async function generateMarkdown( const tree = projectTreeSkipIgnoreFiles ? generateProjectTree(treeBaseDir, projectTree) : generateProjectTreeFromFiles(treeBaseDir, files, projectTree); - const treeTokens = await countTokens(tree, tokenEncoder); + const treeWithTags = `\n${tree}\n`; + const treeTokens = await countTokens(treeWithTags, tokenEncoder); if (maxTokens && treeTokens > tokenCounter.remaining) { onVerbose?.(`Tree exceeds token limit, skipping`, 3); return ""; } - markdownContent.push(tree, ""); + markdownContent.push(treeWithTags, ""); tokenCounter.remaining -= treeTokens; tokenCounter.total += treeTokens; onVerbose?.(`Tokens used for tree: ${treeTokens}`, 3); } + // Start source_code section + markdownContent.push(""); + if (tokenLimiter === "truncated" && maxTokens) { // Calculate tokens per file to distribute evenly const tokensPerFile = Math.floor(tokenCounter.remaining / files.length); @@ -264,6 +268,9 @@ export async function generateMarkdown( } } + // Close source_code section + markdownContent.push(""); + onVerbose?.(`Final token count: ${tokenCounter.total}`, 2); // Before final return, process template with codebase content @@ -274,17 +281,17 @@ export async function generateMarkdown( } // If prompt template has {{CURRENT_CODEBASE}} placeholder, use template processing - // Otherwise, prepend the prompt to the codebase content + // Otherwise, wrap the prompt in tags and prepend to content if (hasCodebasePlaceholder(promptTemplate)) { return processPromptTemplate(promptTemplate, content, templateVars ?? {}); } else { - // Process any other variables in the template, then prepend to content + // Process any other variables in the template, then wrap in tags and prepend const processedPrompt = await processPromptTemplate( promptTemplate, "", templateVars ?? {} ); - return processedPrompt.trim() + "\n\n" + content; + return `\n${processedPrompt.trim()}\n\n\n${content}`; } } From 3203f04120487ba1c6ff2e833d7bee5c4062feda Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 17:33:59 +0100 Subject: [PATCH 5/8] docs: update README and changelogs for v2.2.0 - Document XML-structured output format with , , tags - Document additive --include-dir and --include-files behavior - Document external prompt file path support - Update all changelogs (root, cli, sdk) for v2.2.0 --- CHANGELOG.md | 15 +++++++++++ README.md | 55 ++++++++++++++++++++++++++++++++++++--- packages/cli/CHANGELOG.md | 12 +++++++++ packages/sdk/CHANGELOG.md | 14 ++++++++++ 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c6c497..51fd818 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## 2.2.0 + +### Added +- **XML-structured output format** - Output now uses semantic XML tags for better AI parsing: + - `...` - Wraps the prompt/instructions + - `...` - Wraps the project tree structure + - `...` - Wraps all source code files +- **Additive `--include-dir` and `--include-files`** - These options now work together additively instead of being mutually exclusive. Use both to include specific directories PLUS specific files. +- **External prompt file support** - Prompt files with paths (e.g., `-p docs/arch/prompt.md`) are now correctly resolved from the project root instead of requiring them to be in `codefetch/prompts/` + +### Fixed +- Fixed `--include-dir` and `--include-files` being mutually exclusive - now they combine additively +- Fixed external prompt file paths not being found when containing directory separators +- Fixed prompts without `{{CURRENT_CODEBASE}}` placeholder not including the codebase content + ## 2.1.2 ### Fixed diff --git a/README.md b/README.md index eed7c1b..f13c0e4 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,10 @@ npx codefetch --include-files "src/components/AgentPanel.tsx,src/lib/llm/**/*" - # Include src directory, exclude test files npx codefetch --include-dir src --exclude-files "*.test.ts" -o src-no-tests.md + +# Combine --include-dir and --include-files (additive!) +# This includes ALL files from crates/core/src PLUS the specific lib.rs file +npx codefetch --include-dir crates/core/src --include-files "crates/engine/src/lib.rs" -o combined.md ``` Dry run (only output to console) @@ -289,10 +293,20 @@ Inline prompts are automatically appended with the codebase content. #### Custom Prompt Files -Create custom prompts in `codefetch/prompts/` directory: +You can use custom prompt files in two ways: -1. Create a markdown file (e.g., `codefetch/prompts/my-prompt.md`) -2. Use it with `--prompt my-prompt.md` +**1. External prompt files (anywhere in your project):** +```bash +# Use a prompt file from anywhere in your project +npx codefetch -p docs/arch/review-prompt.md +npx codefetch --prompt ./prompts/security-audit.txt +``` + +**2. Prompt files in `codefetch/prompts/` directory:** +```bash +# Create codefetch/prompts/my-prompt.md, then use: +npx codefetch --prompt my-prompt.md +``` You can also set a default prompt in your `codefetch.config.mjs`: @@ -344,6 +358,41 @@ Codefetch uses a set of default ignore patterns to exclude common files and dire You can view the complete list of default patterns in [default-ignore.ts](packages/sdk/src/default-ignore.ts). +## Output Format + +Codefetch generates structured output using semantic XML tags to help AI models better understand the different sections: + +```xml + +Your prompt or instructions here... + + + +Project Structure: +└── src + ├── index.ts + └── utils + └── helpers.ts + + + +src/index.ts +```typescript +// Your code here +``` + +src/utils/helpers.ts +```typescript +// More code here +``` + +``` + +The XML structure provides: +- `` - Contains your prompt/instructions (from `-p` flag) +- `` - Contains the project tree visualization (from `-t` flag) +- `` - Contains all the source code files with their paths + ## Token Counting Codefetch supports different token counting methods to match various AI models: diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 53c88d8..74b4295 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 2.2.0 + +### Added +- **XML-structured output format** - Output now uses semantic XML tags for better AI parsing: + - `...` - Wraps the prompt/instructions + - `...` - Wraps the project tree structure + - `...` - Wraps all source code files +- **External prompt file support** - Prompt files with paths (e.g., `-p docs/arch/prompt.md`) are now correctly resolved from the project root + +### Fixed +- Fixed `getPromptFile` not resolving external file paths correctly when they contain directory separators + ## 2.1.2 ### Fixed diff --git a/packages/sdk/CHANGELOG.md b/packages/sdk/CHANGELOG.md index 617ea14..a08916b 100644 --- a/packages/sdk/CHANGELOG.md +++ b/packages/sdk/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## 2.2.0 + +### Added +- **XML-structured output format** - Output now uses semantic XML tags for better AI parsing: + - `...` - Wraps the prompt/instructions + - `...` - Wraps the project tree structure + - `...` - Wraps all source code files +- **Additive `includeDirs` and `includeFiles`** - These options now work together additively instead of being mutually exclusive +- Added `hasCodebasePlaceholder` helper function to `template-parser.ts` + +### Fixed +- Fixed `collectFiles` treating `includeDirs` and `includeFiles` as mutually exclusive - now they combine additively +- Fixed `processPromptTemplate` to prepend prompts without `{{CURRENT_CODEBASE}}` placeholder to the codebase content instead of replacing it + ## 2.0.4 ### Added From 5d89fe827d0f58ce3d7a395673452df93b468712 Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 17:45:05 +0100 Subject: [PATCH 6/8] chore: update pnpm-lock.yaml --- pnpm-lock.yaml | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 67583fb..5734e1c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -49,8 +49,8 @@ importers: specifier: ^2.0.1 version: 2.0.1(magicast@0.3.5) codefetch-sdk: - specifier: workspace:* - version: link:../sdk + specifier: ^2.1.0 + version: 2.1.2(magicast@0.3.5) consola: specifier: ^3.3.3 version: 3.3.3 @@ -101,8 +101,8 @@ importers: specifier: ^1.13.2 version: 1.13.2 codefetch-sdk: - specifier: workspace:* - version: link:../sdk + specifier: ^2.1.0 + version: 2.1.2(magicast@0.3.5) packages/sdk: dependencies: @@ -1705,6 +1705,9 @@ packages: resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} engines: {node: '>=12'} + codefetch-sdk@2.1.2: + resolution: {integrity: sha512-h3y8BJhwkfV++8rdi1XVddTkzoE6aWInTuPNAS2up4ZnWzTKOiZl7uso+BvJ1ohf3pcYlUDD5I48F1uUjHysiw==} + color-convert@2.0.1: resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} engines: {node: '>=7.0.0'} @@ -5076,6 +5079,19 @@ snapshots: strip-ansi: 6.0.1 wrap-ansi: 7.0.0 + codefetch-sdk@2.1.2(magicast@0.3.5): + dependencies: + adm-zip: 0.5.16 + c12: 2.0.1(magicast@0.3.5) + consola: 3.4.2 + defu: 6.1.4 + fast-glob: 3.3.3 + ignore: 7.0.0 + js-tiktoken: 1.0.16 + pathe: 2.0.3 + transitivePeerDependencies: + - magicast + color-convert@2.0.1: dependencies: color-name: 1.1.4 From 797a669e767c12165dfaa72f9c3ee8d599c67cd3 Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 17:47:00 +0100 Subject: [PATCH 7/8] docs: add pnpm lockfile troubleshooting to HOW-TO-RELEASE --- HOW-TO-RELEASE.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/HOW-TO-RELEASE.md b/HOW-TO-RELEASE.md index 2e7aa60..3baf625 100644 --- a/HOW-TO-RELEASE.md +++ b/HOW-TO-RELEASE.md @@ -160,6 +160,13 @@ Delete the GitHub Release if needed: `gh release delete vX.Y.Z` - `npm ERR! code E403` or auth failures: run `npm login` and retry - `gh` failures: `gh auth status`; ensure `repo` scope exists - Tag push rejected: pull/rebase or fast-forward `main`, then rerun +- **CI fails with `ERR_PNPM_OUTDATED_LOCKFILE`**: The lockfile is out of sync with `package.json`. This happens when dependencies change (e.g., `workspace:*` → `^2.1.0`). Fix it locally: + ```bash + pnpm install --no-frozen-lockfile + git add pnpm-lock.yaml + git commit -m "chore: update pnpm-lock.yaml" + git push + ``` ## Release Frequency Suggestions From c3c1eadc0928fe65b7e5b47ee7796144b31b37ab Mon Sep 17 00:00:00 2001 From: Kevin Kern Date: Thu, 27 Nov 2025 17:47:42 +0100 Subject: [PATCH 8/8] fix: use workspace:* for codefetch-sdk dependency --- packages/cli/package.json | 2 +- pnpm-lock.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index 2f5a2ce..15bacd8 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -41,7 +41,7 @@ "dependencies": { "@clack/prompts": "^0.11.0", "c12": "^2.0.1", - "codefetch-sdk": "^2.1.0", + "codefetch-sdk": "workspace:*", "consola": "^3.3.3", "ignore": "^7.0.0", "mri": "^1.2.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5734e1c..78114d0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -49,8 +49,8 @@ importers: specifier: ^2.0.1 version: 2.0.1(magicast@0.3.5) codefetch-sdk: - specifier: ^2.1.0 - version: 2.1.2(magicast@0.3.5) + specifier: workspace:* + version: link:../sdk consola: specifier: ^3.3.3 version: 3.3.3