-
Notifications
You must be signed in to change notification settings - Fork 9
Convert relative image links to base64 data URIs (markdown and HTML) #47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
03ff174
2e080d4
5fea495
0f4c9a4
5d19cae
77a1720
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,6 +2,7 @@ package main | |||||||||||||
|
|
||||||||||||||
| import ( | ||||||||||||||
| "crypto/rand" | ||||||||||||||
| "encoding/base64" | ||||||||||||||
| _ "embed" | ||||||||||||||
| "encoding/hex" | ||||||||||||||
| "errors" | ||||||||||||||
|
|
@@ -11,6 +12,7 @@ import ( | |||||||||||||
| "log" | ||||||||||||||
| "os" | ||||||||||||||
| "path/filepath" | ||||||||||||||
| "regexp" | ||||||||||||||
| "strings" | ||||||||||||||
|
|
||||||||||||||
| "github.com/pkg/browser" | ||||||||||||||
|
|
@@ -19,6 +21,10 @@ import ( | |||||||||||||
|
|
||||||||||||||
| var appVersion string | ||||||||||||||
|
|
||||||||||||||
| // imgSrcRegex matches <img> tags with src attributes | ||||||||||||||
| // Captures: 1=prefix, 2=opening quote, 3=src path, 4=closing quote | ||||||||||||||
| var imgSrcRegex = regexp.MustCompile(`(<img[^>]*\ssrc=)(["']?)([^"'\s>]+)(["']?)`) | ||||||||||||||
|
|
||||||||||||||
| //go:embed github-markdown.css | ||||||||||||||
| var style string | ||||||||||||||
|
|
||||||||||||||
|
|
@@ -58,6 +64,11 @@ func main() { | |||||||||||||
| markdown.Typographer(true)) | ||||||||||||||
|
|
||||||||||||||
| markdownTokens := md.Parse(dat) | ||||||||||||||
|
|
||||||||||||||
| // Convert relative image links to data URIs | ||||||||||||||
| baseDir := filepath.Dir(inputFilename) | ||||||||||||||
| processImageTokens(markdownTokens, baseDir) | ||||||||||||||
|
|
||||||||||||||
| html := md.RenderTokensToString(markdownTokens) | ||||||||||||||
| title := getTitle(markdownTokens) | ||||||||||||||
|
|
||||||||||||||
|
|
@@ -172,3 +183,179 @@ func getText(token markdown.Token) string { | |||||||||||||
| func isSnap() bool { | ||||||||||||||
| return os.Getenv("SNAP_USER_COMMON") != "" | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // processImageTokens walks through markdown tokens and converts relative image paths to data URIs | ||||||||||||||
| func processImageTokens(tokens []markdown.Token, baseDir string) { | ||||||||||||||
| for _, token := range tokens { | ||||||||||||||
| switch t := token.(type) { | ||||||||||||||
| case *markdown.Image: | ||||||||||||||
| if isRelativePath(t.Src) { | ||||||||||||||
| if dataURI := imageToDataURI(t.Src, baseDir); dataURI != "" { | ||||||||||||||
| t.Src = dataURI | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| case *markdown.HTMLInline: | ||||||||||||||
| // Process inline HTML that may contain <img> tags | ||||||||||||||
| t.Content = processHTMLImages(t.Content, baseDir) | ||||||||||||||
| case *markdown.HTMLBlock: | ||||||||||||||
| // Process block HTML that may contain <img> tags | ||||||||||||||
| t.Content = processHTMLImages(t.Content, baseDir) | ||||||||||||||
| case *markdown.Inline: | ||||||||||||||
| // Recursively process child tokens | ||||||||||||||
| if t.Children != nil { | ||||||||||||||
| processImageTokens(t.Children, baseDir) | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // processHTMLImages processes HTML content and converts relative image src attributes to data URIs | ||||||||||||||
| func processHTMLImages(html string, baseDir string) string { | ||||||||||||||
| // Use the package-level regex to match <img> tags with src attributes | ||||||||||||||
| result := imgSrcRegex.ReplaceAllStringFunc(html, func(match string) string { | ||||||||||||||
| // Extract the parts using the regex | ||||||||||||||
| parts := imgSrcRegex.FindStringSubmatch(match) | ||||||||||||||
| if len(parts) != 5 { | ||||||||||||||
| return match | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| prefix := parts[1] // "<img...src=" | ||||||||||||||
| openQuote := parts[2] // " or ' or empty | ||||||||||||||
| srcPath := parts[3] // the actual path | ||||||||||||||
| closeQuote := parts[4] // " or ' or empty | ||||||||||||||
|
|
||||||||||||||
| // If quotes don't match, return original (malformed HTML) | ||||||||||||||
| if openQuote != closeQuote { | ||||||||||||||
| return match | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Check if the path is relative | ||||||||||||||
| if isRelativePath(srcPath) { | ||||||||||||||
| if dataURI := imageToDataURI(srcPath, baseDir); dataURI != "" { | ||||||||||||||
| return prefix + openQuote + dataURI + closeQuote | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return match | ||||||||||||||
| }) | ||||||||||||||
|
|
||||||||||||||
| return result | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // isRelativePath checks if a path is relative (not http://, https://, //, or absolute path) | ||||||||||||||
| func isRelativePath(path string) bool { | ||||||||||||||
| if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") { | ||||||||||||||
| return false | ||||||||||||||
| } | ||||||||||||||
| if strings.HasPrefix(path, "//") { | ||||||||||||||
| return false | ||||||||||||||
| } | ||||||||||||||
| if strings.HasPrefix(path, "data:") { | ||||||||||||||
| return false | ||||||||||||||
| } | ||||||||||||||
| if filepath.IsAbs(path) { | ||||||||||||||
| return false | ||||||||||||||
| } | ||||||||||||||
| return true | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // imageToDataURI reads an image file and converts it to a base64 data URI | ||||||||||||||
|
||||||||||||||
| // imageToDataURI reads an image file and converts it to a base64 data URI | |
| // imageToDataURI reads an image file and converts it to a base64 data URI. | |
| // On any failure it logs a warning and returns an empty string so callers can | |
| // gracefully fall back to using the original image path. |
Copilot
AI
Dec 21, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error message could be more helpful by suggesting potential causes. Consider rephrasing to something like "Warning: Invalid image path %s (failed to resolve absolute path): %v" to clarify what operation failed.
| log.Printf("Warning: Invalid image path %s: %v", fullPath, err) | |
| log.Printf("Warning: Invalid image path %s (failed to resolve absolute path): %v", fullPath, err) |
Copilot
AI
Dec 21, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error message could be more helpful by suggesting potential causes. Consider rephrasing to something like "Warning: Invalid base directory %s (failed to resolve absolute path): %v" to clarify what operation failed.
| log.Printf("Warning: Invalid base directory %s: %v", baseDir, err) | |
| log.Printf("Warning: Invalid base directory %s (failed to resolve absolute path): %v", baseDir, err) |
Copilot
AI
Dec 21, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The function comment should document the return value more clearly. Consider adding that it returns the appropriate MIME type string for the image format, or "image/*" for unknown formats.
| // getMimeType returns the MIME type based on file extension | |
| // getMimeType returns the appropriate image MIME type string based on the file | |
| // extension, or "image/*" for unknown or unsupported image formats. |
Copilot
AI
Dec 21, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using "image/*" as a fallback MIME type for unknown extensions is not standard and may not work correctly in all browsers. The asterisk is typically used in HTTP Accept headers, not in data URIs. Consider using a more specific default like "application/octet-stream" or "image/png", or simply skip conversion for unknown formats by returning an empty string.
| // For unknown extensions, log a warning but try with generic image type | |
| log.Printf("Warning: Unknown image extension %s for file %s, using image/* MIME type", ext, path) | |
| return "image/*" | |
| // For unknown extensions, log a warning but fall back to a generic binary MIME type | |
| log.Printf("Warning: Unknown image extension %s for file %s, using application/octet-stream MIME type", ext, path) | |
| return "application/octet-stream" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The regex pattern for src paths uses
[^"'\s>]+which excludes spaces. This means unquoted src attributes containing spaces (e.g.,<img src=my image.png>) would only capture the first part before the space ("my"), potentially causing incorrect path processing. While unquoted attributes with spaces are invalid HTML, consider adding validation to ensure src attributes are properly quoted, or update the regex to only match properly quoted or space-free unquoted values.