Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 95 additions & 1 deletion internal/pkg/config/config.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package config

import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"text/template"
"time"

"github.com/itchyny/gojq"
"gopkg.in/yaml.v3"
Expand All @@ -17,6 +21,11 @@ const (
placeholderRegex = "([a-zA-Z0-9_]+?)"
)

var (
// Jinja-style include directive: {% include 'path/to/file' %}
includeRegex = regexp.MustCompile(`\{\%\s*include\s+['"]([^'"]+)['"]\s*\%\}`)
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern \{\%\s*include\s+['"]([^'"]+)['"]\s*\%\} doesn't properly match Jinja-style include directives that may have mixed quotes. The pattern [^'"] will match until it finds either a single or double quote, which could lead to incorrect parsing if quotes are mixed or nested.

For example, the pattern would incorrectly parse:

  • {% include "file's_name.yaml" %} (stops at the apostrophe in "file's")
  • {% include 'file"name.yaml' %} (stops at the internal quote)

Consider using separate patterns for single and double quotes:

includeRegex = regexp.MustCompile(`\{\%\s*include\s+(?:'([^']+)'|"([^"]+)")\s*\%\}`)

Then handle both capture groups in the matching logic.

Suggested change
includeRegex = regexp.MustCompile(`\{\%\s*include\s+['"]([^'"]+)['"]\s*\%\}`)
includeRegex = regexp.MustCompile(`\{\%\s*include\s+(?:'([^']+)'|"([^"]+)")\s*\%\}`)

Copilot uses AI. Check for mistakes.
)

// Generic String type that can evaluate both macro and context templates
type String string

Expand Down Expand Up @@ -58,15 +67,23 @@ func Load(configFile string, obj interface{}) error {
return err
}

// Preprocess Jinja-style includes
configDir := filepath.Dir(configFile)
processedContent, err := processIncludes(string(content), configDir, make(map[string]bool))
if err != nil {
return fmt.Errorf("failed to process includes: %w", err)
}

// inject secrets
configTemplate := template.New(templateName).Funcs(template.FuncMap{
"env": getEnvironmentVariable, // returns environment variable
"macro": setMacroPlaceholder, // set placeholder string for macro replacement
"secret": getSecret, // we use this template function to inject secrets from parameter store
"context": setContextPlaceholder, // set placeholder string for context replacement
"ds": getDateString, // returns date string (YYYY-MM-DD) from env var or current date
})

parsedTemplate, err := configTemplate.Parse(string(content))
parsedTemplate, err := configTemplate.Parse(processedContent)
if err != nil {
return err
}
Expand All @@ -84,3 +101,80 @@ func Load(configFile string, obj interface{}) error {
return nil

}

// processIncludes processes Jinja-style {% include 'path' %} directives
// It recursively includes files and prevents circular includes
func processIncludes(content string, baseDir string, visited map[string]bool) (string, error) {
// Find all include directives
matches := includeRegex.FindAllStringSubmatch(content, -1)
if len(matches) == 0 {
return content, nil
}

result := content
for _, match := range matches {
if len(match) != 2 {
continue
}

includePath := match[0] // Full match: {% include 'path' %}
filePath := match[1] // Captured group: path

// Resolve the file path relative to baseDir
// If the path starts with /, it's absolute, otherwise relative to baseDir
var resolvedPath string
if filepath.IsAbs(filePath) {
resolvedPath = filePath
} else {
resolvedPath = filepath.Join(baseDir, filePath)
}

// Normalize the path to handle .. and . correctly
resolvedPath = filepath.Clean(resolvedPath)

// Check for circular includes
absPath, err := filepath.Abs(resolvedPath)
if err != nil {
return "", fmt.Errorf("failed to resolve absolute path for %s: %w", filePath, err)
}

if visited[absPath] {
return "", fmt.Errorf("circular include detected: %s", absPath)
}

// Read the included file
includedContent, err := os.ReadFile(resolvedPath)
if err != nil {
return "", fmt.Errorf("failed to read included file %s: %w", filePath, err)
Comment on lines +136 to +148
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The processIncludes function is missing validation to prevent directory traversal attacks. While filepath.Clean normalizes paths, it doesn't prevent accessing files outside the intended directory structure. An attacker could use {% include '../../../etc/passwd' %} to read sensitive files.

Consider adding a check after resolving the absolute path to ensure it stays within allowed directories:

// After line 139, add validation
if !strings.HasPrefix(absPath, filepath.Clean(baseDir)) {
    return "", fmt.Errorf("include path %s is outside allowed directory", filePath)
}

Alternatively, maintain a whitelist of allowed base directories for includes.

Copilot uses AI. Check for mistakes.
}

// Recursively process includes in the included file
visited[absPath] = true
processedContent, err := processIncludes(string(includedContent), filepath.Dir(resolvedPath), visited)
if err != nil {
return "", err
}
delete(visited, absPath)

// Replace the include directive with the file content
result = strings.ReplaceAll(result, includePath, processedContent)
}
Comment on lines +159 to +161
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using strings.ReplaceAll to replace include directives could cause incorrect replacements if the same include directive appears multiple times or if the included content itself contains text that matches other include directives in the original file.

For example, if file A has {% include 'common.yaml' %} twice, both will be replaced with the content, which is correct. However, if the included content also contains the text {% include 'common.yaml' %} as a literal string (not meant to be processed), it will be incorrectly replaced in subsequent iterations.

Consider using a different approach:

  1. Replace includes one at a time in order
  2. Build the result string progressively by replacing only the first occurrence at each iteration
  3. Or process matches in reverse order to avoid position shifts
Suggested change
// Replace the include directive with the file content
result = strings.ReplaceAll(result, includePath, processedContent)
}
// Replace only the first occurrence of the include directive with the file content
if idx := strings.Index(result, includePath); idx != -1 {
result = result[:idx] + processedContent + result[idx+len(includePath):]
}

Copilot uses AI. Check for mistakes.

return result, nil
}

// getDateString returns a date string in YYYY-MM-DD format
// It first checks for the DS or EXECUTION_DATE environment variable
// If not set, it defaults to the current date
func getDateString() (string, error) {
// Check for DS environment variable first (common in data pipelines)
if ds := os.Getenv("DS"); ds != "" {
return ds, nil
}
// Check for EXECUTION_DATE (alternative common name)
if execDate := os.Getenv("EXECUTION_DATE"); execDate != "" {
return execDate, nil
}
// Default to current date
return time.Now().Format("2006-01-02"), nil
Comment on lines +169 to +179
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The getDateString function always returns nil error but is declared to return an error. Since this function never fails (both environment variable reads and time.Now().Format() cannot fail), the error return type is unnecessary and inconsistent with similar functions like getEnvironmentVariable which also returns (string, error) but could be simplified.

Consider either:

  1. Removing the error return type: func getDateString() string
  2. Validating the date format from environment variables and returning an error if invalid

If you keep the error return, you should validate that DS/EXECUTION_DATE values are in the expected YYYY-MM-DD format.

Suggested change
func getDateString() (string, error) {
// Check for DS environment variable first (common in data pipelines)
if ds := os.Getenv("DS"); ds != "" {
return ds, nil
}
// Check for EXECUTION_DATE (alternative common name)
if execDate := os.Getenv("EXECUTION_DATE"); execDate != "" {
return execDate, nil
}
// Default to current date
return time.Now().Format("2006-01-02"), nil
func getDateString() string {
// Check for DS environment variable first (common in data pipelines)
if ds := os.Getenv("DS"); ds != "" {
return ds
}
// Check for EXECUTION_DATE (alternative common name)
if execDate := os.Getenv("EXECUTION_DATE"); execDate != "" {
return execDate
}
// Default to current date
return time.Now().Format("2006-01-02")

Copilot uses AI. Check for mistakes.
}