Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"crypto/rand"
"encoding/base64"
_ "embed"
"encoding/hex"
"errors"
Expand All @@ -11,6 +12,7 @@ import (
"log"
"os"
"path/filepath"
"regexp"
"strings"

"github.com/pkg/browser"
Expand All @@ -19,6 +21,10 @@ import (

var appVersion string

// imgSrcRegex matches <img> tags with src attributes
// Captures: 1=prefix, 2=opening quote, 3=src path, 4=closing quote
var imgSrcRegex = regexp.MustCompile(`(<img[^>]*\ssrc=)(["']?)([^"'\s>]+)(["']?)`)
Comment on lines +24 to +26
Copy link

Copilot AI Dec 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern for src paths uses [^"'\s>]+ which excludes spaces. This means unquoted src attributes containing spaces (e.g., <img src=my image.png>) would only capture the first part before the space ("my"), potentially causing incorrect path processing. While unquoted attributes with spaces are invalid HTML, consider adding validation to ensure src attributes are properly quoted, or update the regex to only match properly quoted or space-free unquoted values.

Suggested change
// imgSrcRegex matches <img> tags with src attributes
// Captures: 1=prefix, 2=opening quote, 3=src path, 4=closing quote
var imgSrcRegex = regexp.MustCompile(`(<img[^>]*\ssrc=)(["']?)([^"'\s>]+)(["']?)`)
// imgSrcRegex matches <img> tags with quoted src attributes
// Captures: 1=prefix, 2=opening quote, 3=src path, 4=closing quote
var imgSrcRegex = regexp.MustCompile(`(<img[^>]*\ssrc=)(["'])([^"']+)(["'])`)

Copilot uses AI. Check for mistakes.

//go:embed github-markdown.css
var style string

Expand Down Expand Up @@ -58,6 +64,11 @@ func main() {
markdown.Typographer(true))

markdownTokens := md.Parse(dat)

// Convert relative image links to data URIs
baseDir := filepath.Dir(inputFilename)
processImageTokens(markdownTokens, baseDir)

html := md.RenderTokensToString(markdownTokens)
title := getTitle(markdownTokens)

Expand Down Expand Up @@ -172,3 +183,179 @@ func getText(token markdown.Token) string {
func isSnap() bool {
return os.Getenv("SNAP_USER_COMMON") != ""
}

// processImageTokens walks through markdown tokens and converts relative image paths to data URIs
func processImageTokens(tokens []markdown.Token, baseDir string) {
for _, token := range tokens {
switch t := token.(type) {
case *markdown.Image:
if isRelativePath(t.Src) {
if dataURI := imageToDataURI(t.Src, baseDir); dataURI != "" {
t.Src = dataURI
}
}
case *markdown.HTMLInline:
// Process inline HTML that may contain <img> tags
t.Content = processHTMLImages(t.Content, baseDir)
case *markdown.HTMLBlock:
// Process block HTML that may contain <img> tags
t.Content = processHTMLImages(t.Content, baseDir)
case *markdown.Inline:
// Recursively process child tokens
if t.Children != nil {
processImageTokens(t.Children, baseDir)
}
}
}
}

// processHTMLImages processes HTML content and converts relative image src attributes to data URIs
func processHTMLImages(html string, baseDir string) string {
// Use the package-level regex to match <img> tags with src attributes
result := imgSrcRegex.ReplaceAllStringFunc(html, func(match string) string {
// Extract the parts using the regex
parts := imgSrcRegex.FindStringSubmatch(match)
if len(parts) != 5 {
return match
}

prefix := parts[1] // "<img...src="
openQuote := parts[2] // " or ' or empty
srcPath := parts[3] // the actual path
closeQuote := parts[4] // " or ' or empty

// If quotes don't match, return original (malformed HTML)
if openQuote != closeQuote {
return match
}

// Check if the path is relative
if isRelativePath(srcPath) {
if dataURI := imageToDataURI(srcPath, baseDir); dataURI != "" {
return prefix + openQuote + dataURI + closeQuote
}
}

return match
})

return result
}

// isRelativePath checks if a path is relative (not http://, https://, //, or absolute path)
func isRelativePath(path string) bool {
if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") {
return false
}
if strings.HasPrefix(path, "//") {
return false
}
if strings.HasPrefix(path, "data:") {
return false
}
if filepath.IsAbs(path) {
return false
}
return true
}

// imageToDataURI reads an image file and converts it to a base64 data URI
Copy link

Copilot AI Dec 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function comment should specify what happens when image conversion fails. Consider adding that the function returns an empty string on failure, which causes the original path to be preserved (graceful degradation).

Suggested change
// imageToDataURI reads an image file and converts it to a base64 data URI
// imageToDataURI reads an image file and converts it to a base64 data URI.
// On any failure it logs a warning and returns an empty string so callers can
// gracefully fall back to using the original image path.

Copilot uses AI. Check for mistakes.
func imageToDataURI(imagePath string, baseDir string) string {
// Resolve the full path relative to the markdown file
fullPath := filepath.Join(baseDir, imagePath)

// Clean and validate the path to prevent path traversal attacks
cleanedPath, err := filepath.Abs(fullPath)
if err != nil {
log.Printf("Warning: Invalid image path %s: %v", fullPath, err)
Copy link

Copilot AI Dec 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message could be more helpful by suggesting potential causes. Consider rephrasing to something like "Warning: Invalid image path %s (failed to resolve absolute path): %v" to clarify what operation failed.

Suggested change
log.Printf("Warning: Invalid image path %s: %v", fullPath, err)
log.Printf("Warning: Invalid image path %s (failed to resolve absolute path): %v", fullPath, err)

Copilot uses AI. Check for mistakes.
return ""
}

// Ensure the resolved path is within or relative to the base directory
cleanedBase, err := filepath.Abs(baseDir)
if err != nil {
log.Printf("Warning: Invalid base directory %s: %v", baseDir, err)
Copy link

Copilot AI Dec 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message could be more helpful by suggesting potential causes. Consider rephrasing to something like "Warning: Invalid base directory %s (failed to resolve absolute path): %v" to clarify what operation failed.

Suggested change
log.Printf("Warning: Invalid base directory %s: %v", baseDir, err)
log.Printf("Warning: Invalid base directory %s (failed to resolve absolute path): %v", baseDir, err)

Copilot uses AI. Check for mistakes.
return ""
}

// Check if the cleaned path starts with the base directory or is a reasonable relative reference
// We allow accessing parent directories for flexibility with markdown repos
if !strings.HasPrefix(cleanedPath, cleanedBase) {
relPath, err := filepath.Rel(cleanedBase, cleanedPath)
if err != nil {
log.Printf("Warning: Unable to determine relative path for %s: %v", imagePath, err)
return ""
}

// If the path goes outside the base directory, check parent traversal limits
if strings.HasPrefix(relPath, "..") {
// Allow up to 3 levels of parent directory traversal for flexibility
// Count the number of ".." path components
components := strings.Split(filepath.ToSlash(relPath), "/")
parentLevels := 0
for _, component := range components {
if component == ".." {
parentLevels++
}
}
if parentLevels > 3 {
log.Printf("Warning: Image path %s goes too many levels above base directory", imagePath)
return ""
}
}
}

// Check file size before reading (limit to 10MB to prevent memory issues)
fileInfo, err := os.Stat(cleanedPath)
if err != nil {
log.Printf("Warning: Unable to stat image file %s: %v", cleanedPath, err)
return ""
}

const maxSize = 10 * 1024 * 1024 // 10MB
if fileInfo.Size() > maxSize {
log.Printf("Warning: Image file %s is too large (%d bytes, max %d bytes)", cleanedPath, fileInfo.Size(), maxSize)
return ""
}

// Read the image file
data, err := os.ReadFile(cleanedPath)
if err != nil {
log.Printf("Warning: Unable to read image file %s: %v", cleanedPath, err)
return ""
}

// Determine MIME type based on file extension
mimeType := getMimeType(cleanedPath)

// Encode to base64
encoded := base64.StdEncoding.EncodeToString(data)

// Return data URI
return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded)
}

// getMimeType returns the MIME type based on file extension
Copy link

Copilot AI Dec 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function comment should document the return value more clearly. Consider adding that it returns the appropriate MIME type string for the image format, or "image/*" for unknown formats.

Suggested change
// getMimeType returns the MIME type based on file extension
// getMimeType returns the appropriate image MIME type string based on the file
// extension, or "image/*" for unknown or unsupported image formats.

Copilot uses AI. Check for mistakes.
func getMimeType(path string) string {
ext := strings.ToLower(filepath.Ext(path))
switch ext {
case ".jpg", ".jpeg":
return "image/jpeg"
case ".png":
return "image/png"
case ".gif":
return "image/gif"
case ".svg":
return "image/svg+xml"
case ".webp":
return "image/webp"
case ".bmp":
return "image/bmp"
case ".ico":
return "image/x-icon"
default:
// For unknown extensions, log a warning but try with generic image type
log.Printf("Warning: Unknown image extension %s for file %s, using image/* MIME type", ext, path)
return "image/*"
Comment on lines +357 to +359
Copy link

Copilot AI Dec 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using "image/*" as a fallback MIME type for unknown extensions is not standard and may not work correctly in all browsers. The asterisk is typically used in HTTP Accept headers, not in data URIs. Consider using a more specific default like "application/octet-stream" or "image/png", or simply skip conversion for unknown formats by returning an empty string.

Suggested change
// For unknown extensions, log a warning but try with generic image type
log.Printf("Warning: Unknown image extension %s for file %s, using image/* MIME type", ext, path)
return "image/*"
// For unknown extensions, log a warning but fall back to a generic binary MIME type
log.Printf("Warning: Unknown image extension %s for file %s, using application/octet-stream MIME type", ext, path)
return "application/octet-stream"

Copilot uses AI. Check for mistakes.
}
}