From 03ff1740cbb9aee4ba70465e501c033752fa51fc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 21 Dec 2025 00:14:45 +0000 Subject: [PATCH 1/6] Initial plan From 2e080d430e4f9ba1f5667e822ffaeaf3fbf91b95 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 21 Dec 2025 00:20:11 +0000 Subject: [PATCH 2/6] Add support for converting relative image links to data URIs Co-authored-by: mapitman <4205286+mapitman@users.noreply.github.com> --- main.go | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/main.go b/main.go index 33edd2f..495e144 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package main import ( "crypto/rand" + "encoding/base64" _ "embed" "encoding/hex" "errors" @@ -58,6 +59,11 @@ func main() { markdown.Typographer(true)) markdownTokens := md.Parse(dat) + + // Convert relative image links to data URIs + baseDir := filepath.Dir(inputFilename) + processImageTokens(markdownTokens, baseDir) + html := md.RenderTokensToString(markdownTokens) title := getTitle(markdownTokens) @@ -172,3 +178,85 @@ func getText(token markdown.Token) string { func isSnap() bool { return os.Getenv("SNAP_USER_COMMON") != "" } + +// processImageTokens walks through markdown tokens and converts relative image paths to data URIs +func processImageTokens(tokens []markdown.Token, baseDir string) { + for _, token := range tokens { + switch t := token.(type) { + case *markdown.Image: + if isRelativePath(t.Src) { + if dataURI := imageToDataURI(t.Src, baseDir); dataURI != "" { + t.Src = dataURI + } + } + case *markdown.Inline: + // Recursively process child tokens + if t.Children != nil { + processImageTokens(t.Children, baseDir) + } + } + } +} + +// isRelativePath checks if a path is relative (not http://, https://, //, or absolute path) +func isRelativePath(path string) bool { + if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") { + return false + } + if strings.HasPrefix(path, "//") { + return false + } + if strings.HasPrefix(path, "data:") { + return false + } + if filepath.IsAbs(path) { + return false + } + return true +} + +// imageToDataURI reads an image file and converts it to a base64 data URI +func imageToDataURI(imagePath string, baseDir string) string { + // Resolve the full path relative to the markdown file + fullPath := filepath.Join(baseDir, imagePath) + + // Read the image file + data, err := os.ReadFile(fullPath) + if err != nil { + // If the image can't be read, log a warning and return empty string to keep original path + log.Printf("Warning: Unable to read image file %s: %v", fullPath, err) + return "" + } + + // Determine MIME type based on file extension + mimeType := getMimeType(fullPath) + + // Encode to base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // Return data URI + return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded) +} + +// getMimeType returns the MIME type based on file extension +func getMimeType(path string) string { + ext := strings.ToLower(filepath.Ext(path)) + switch ext { + case ".jpg", ".jpeg": + return "image/jpeg" + case ".png": + return "image/png" + case ".gif": + return "image/gif" + case ".svg": + return "image/svg+xml" + case ".webp": + return "image/webp" + case ".bmp": + return "image/bmp" + case ".ico": + return "image/x-icon" + default: + return "application/octet-stream" + } +} From 5fea495c2b602187fb7504268b4b53ecafdae900 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 21 Dec 2025 00:23:58 +0000 Subject: [PATCH 3/6] Add security improvements: file size limits and path validation Co-authored-by: mapitman <4205286+mapitman@users.noreply.github.com> --- main.go | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index 495e144..e59d3a0 100644 --- a/main.go +++ b/main.go @@ -220,16 +220,56 @@ func imageToDataURI(imagePath string, baseDir string) string { // Resolve the full path relative to the markdown file fullPath := filepath.Join(baseDir, imagePath) + // Clean and validate the path to prevent path traversal attacks + cleanedPath, err := filepath.Abs(fullPath) + if err != nil { + log.Printf("Warning: Invalid image path %s: %v", fullPath, err) + return "" + } + + // Ensure the resolved path is within or relative to the base directory + cleanedBase, err := filepath.Abs(baseDir) + if err != nil { + log.Printf("Warning: Invalid base directory %s: %v", baseDir, err) + return "" + } + + // Check if the cleaned path starts with the base directory or is a reasonable relative reference + // We allow accessing parent directories for flexibility with markdown repos + if !strings.HasPrefix(cleanedPath, cleanedBase) { + relPath, err := filepath.Rel(cleanedBase, cleanedPath) + if err != nil || strings.HasPrefix(relPath, "..") { + // Allow up to 3 levels of parent directory traversal for flexibility + parentLevels := strings.Count(relPath, "..") + if parentLevels > 3 { + log.Printf("Warning: Image path %s goes too many levels above base directory", imagePath) + return "" + } + } + } + + // Check file size before reading (limit to 10MB to prevent memory issues) + fileInfo, err := os.Stat(cleanedPath) + if err != nil { + log.Printf("Warning: Unable to stat image file %s: %v", cleanedPath, err) + return "" + } + + const maxSize = 10 * 1024 * 1024 // 10MB + if fileInfo.Size() > maxSize { + log.Printf("Warning: Image file %s is too large (%d bytes, max %d bytes)", cleanedPath, fileInfo.Size(), maxSize) + return "" + } + // Read the image file - data, err := os.ReadFile(fullPath) + data, err := os.ReadFile(cleanedPath) if err != nil { - // If the image can't be read, log a warning and return empty string to keep original path - log.Printf("Warning: Unable to read image file %s: %v", fullPath, err) + log.Printf("Warning: Unable to read image file %s: %v", cleanedPath, err) return "" } // Determine MIME type based on file extension - mimeType := getMimeType(fullPath) + mimeType := getMimeType(cleanedPath) // Encode to base64 encoded := base64.StdEncoding.EncodeToString(data) From 0f4c9a475b02404ddfffc4601e66541a1d104ea0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 21 Dec 2025 00:25:48 +0000 Subject: [PATCH 4/6] Fix parent directory counting and improve MIME type handling Co-authored-by: mapitman <4205286+mapitman@users.noreply.github.com> --- main.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index e59d3a0..2b67ba0 100644 --- a/main.go +++ b/main.go @@ -240,7 +240,14 @@ func imageToDataURI(imagePath string, baseDir string) string { relPath, err := filepath.Rel(cleanedBase, cleanedPath) if err != nil || strings.HasPrefix(relPath, "..") { // Allow up to 3 levels of parent directory traversal for flexibility - parentLevels := strings.Count(relPath, "..") + // Count the number of ".." path components + components := strings.Split(filepath.ToSlash(relPath), "/") + parentLevels := 0 + for _, component := range components { + if component == ".." { + parentLevels++ + } + } if parentLevels > 3 { log.Printf("Warning: Image path %s goes too many levels above base directory", imagePath) return "" @@ -297,6 +304,8 @@ func getMimeType(path string) string { case ".ico": return "image/x-icon" default: - return "application/octet-stream" + // For unknown extensions, log a warning but try with generic image type + log.Printf("Warning: Unknown image extension %s for file %s, using image/* MIME type", ext, path) + return "image/*" } } From 5d19cae65571b9072014d668fbdd189adb7f41e6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 21 Dec 2025 00:50:06 +0000 Subject: [PATCH 5/6] Add support for HTML inline images in addition to markdown images Co-authored-by: mapitman <4205286+mapitman@users.noreply.github.com> --- main.go | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/main.go b/main.go index 2b67ba0..3f2e5e8 100644 --- a/main.go +++ b/main.go @@ -12,6 +12,7 @@ import ( "log" "os" "path/filepath" + "regexp" "strings" "github.com/pkg/browser" @@ -189,6 +190,12 @@ func processImageTokens(tokens []markdown.Token, baseDir string) { t.Src = dataURI } } + case *markdown.HTMLInline: + // Process inline HTML that may contain tags + t.Content = processHTMLImages(t.Content, baseDir) + case *markdown.HTMLBlock: + // Process block HTML that may contain tags + t.Content = processHTMLImages(t.Content, baseDir) case *markdown.Inline: // Recursively process child tokens if t.Children != nil { @@ -198,6 +205,37 @@ func processImageTokens(tokens []markdown.Token, baseDir string) { } } +// processHTMLImages processes HTML content and converts relative image src attributes to data URIs +func processHTMLImages(html string, baseDir string) string { + // Regular expression to match tags with src attributes + // This handles various formats: src="path", src='path', src=path + imgRegex := regexp.MustCompile(`(]*\ssrc=)(['"]?)([^'"\s>]+)(['"]?)`) + + result := imgRegex.ReplaceAllStringFunc(html, func(match string) string { + // Extract the parts using the regex + parts := imgRegex.FindStringSubmatch(match) + if len(parts) != 5 { + return match + } + + prefix := parts[1] // " Date: Sun, 21 Dec 2025 00:53:24 +0000 Subject: [PATCH 6/6] Optimize regex compilation and improve HTML parsing logic Co-authored-by: mapitman <4205286+mapitman@users.noreply.github.com> --- main.go | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/main.go b/main.go index 3f2e5e8..cdbf268 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,10 @@ import ( var appVersion string +// imgSrcRegex matches tags with src attributes +// Captures: 1=prefix, 2=opening quote, 3=src path, 4=closing quote +var imgSrcRegex = regexp.MustCompile(`(]*\ssrc=)(["']?)([^"'\s>]+)(["']?)`) + //go:embed github-markdown.css var style string @@ -207,13 +211,10 @@ func processImageTokens(tokens []markdown.Token, baseDir string) { // processHTMLImages processes HTML content and converts relative image src attributes to data URIs func processHTMLImages(html string, baseDir string) string { - // Regular expression to match tags with src attributes - // This handles various formats: src="path", src='path', src=path - imgRegex := regexp.MustCompile(`(]*\ssrc=)(['"]?)([^'"\s>]+)(['"]?)`) - - result := imgRegex.ReplaceAllStringFunc(html, func(match string) string { + // Use the package-level regex to match tags with src attributes + result := imgSrcRegex.ReplaceAllStringFunc(html, func(match string) string { // Extract the parts using the regex - parts := imgRegex.FindStringSubmatch(match) + parts := imgSrcRegex.FindStringSubmatch(match) if len(parts) != 5 { return match } @@ -223,6 +224,11 @@ func processHTMLImages(html string, baseDir string) string { srcPath := parts[3] // the actual path closeQuote := parts[4] // " or ' or empty + // If quotes don't match, return original (malformed HTML) + if openQuote != closeQuote { + return match + } + // Check if the path is relative if isRelativePath(srcPath) { if dataURI := imageToDataURI(srcPath, baseDir); dataURI != "" { @@ -276,7 +282,13 @@ func imageToDataURI(imagePath string, baseDir string) string { // We allow accessing parent directories for flexibility with markdown repos if !strings.HasPrefix(cleanedPath, cleanedBase) { relPath, err := filepath.Rel(cleanedBase, cleanedPath) - if err != nil || strings.HasPrefix(relPath, "..") { + if err != nil { + log.Printf("Warning: Unable to determine relative path for %s: %v", imagePath, err) + return "" + } + + // If the path goes outside the base directory, check parent traversal limits + if strings.HasPrefix(relPath, "..") { // Allow up to 3 levels of parent directory traversal for flexibility // Count the number of ".." path components components := strings.Split(filepath.ToSlash(relPath), "/")