Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 120 additions & 74 deletions patchdiff/diff.py
Original file line number Diff line number Diff line change
@@ -1,91 +1,135 @@
from functools import partial, reduce
from __future__ import annotations

from typing import Dict, List, Set, Tuple

from .pointer import Pointer
from .types import Diffable


def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
memory = {(0, 0): {"ops": [], "rops": [], "cost": 0}}
m, n = len(input), len(output)

# Build DP table bottom-up (iterative approach)
# dp[i][j] = cost of transforming input[0:i] to output[0:j]
dp = [[0] * (n + 1) for _ in range(m + 1)]

def dist(i, j):
if (i, j) not in memory:
if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
step = dist(i - 1, j - 1)
# Initialize base cases
for i in range(1, m + 1):
dp[i][0] = i # Cost of deleting all elements
for j in range(1, n + 1):
dp[0][j] = j # Cost of adding all elements

# Fill DP table
for i in range(1, m + 1):
for j in range(1, n + 1):
if input[i - 1] == output[j - 1]:
# Elements match, no operation needed
dp[i][j] = dp[i - 1][j - 1]
else:
paths = []
if i > 0:
base = dist(i - 1, j)
op = {"op": "remove", "idx": i - 1}
rop = {"op": "add", "idx": j - 1, "value": input[i - 1]}
paths.append(
{
"ops": base["ops"] + [op],
"rops": base["rops"] + [rop],
"cost": base["cost"] + 1,
}
)
if j > 0:
base = dist(i, j - 1)
op = {"op": "add", "idx": i - 1, "value": output[j - 1]}
rop = {"op": "remove", "idx": j - 1}
paths.append(
{
"ops": base["ops"] + [op],
"rops": base["rops"] + [rop],
"cost": base["cost"] + 1,
}
)
if i > 0 and j > 0:
base = dist(i - 1, j - 1)
op = {
"op": "replace",
"idx": i - 1,
"original": input[i - 1],
"value": output[j - 1],
}
rop = {
"op": "replace",
"idx": j - 1,
"original": output[j - 1],
"value": input[i - 1],
}
paths.append(
{
"ops": base["ops"] + [op],
"rops": base["rops"] + [rop],
"cost": base["cost"] + 1,
}
)
step = min(paths, key=lambda a: a["cost"])
memory[(i, j)] = step
return memory[(i, j)]

def pad(state, op, target=None):
ops, padding = state
# Take minimum of three operations
dp[i][j] = min(
dp[i - 1][j] + 1, # Remove from input
dp[i][j - 1] + 1, # Add from output
dp[i - 1][j - 1] + 1, # Replace
)

# Traceback to extract operations
ops = []
rops = []
i, j = m, n

while i > 0 or j > 0:
if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
# Elements match, no operation
i -= 1
j -= 1
elif i > 0 and (j == 0 or dp[i][j] == dp[i - 1][j] + 1):
# Remove from input
ops.append({"op": "remove", "idx": i - 1})
rops.append({"op": "add", "idx": j - 1, "value": input[i - 1]})
i -= 1
elif j > 0 and (i == 0 or dp[i][j] == dp[i][j - 1] + 1):
# Add from output
ops.append({"op": "add", "idx": i - 1, "value": output[j - 1]})
rops.append({"op": "remove", "idx": j - 1})
j -= 1
else:
# Replace
ops.append(
{
"op": "replace",
"idx": i - 1,
"original": input[i - 1],
"value": output[j - 1],
}
)
rops.append(
{
"op": "replace",
"idx": j - 1,
"original": output[j - 1],
"value": input[i - 1],
}
)
i -= 1
j -= 1

# Apply padding to operations (using explicit loops instead of reduce)
padded_ops = []
padding = 0
# Iterate in reverse to get correct order (traceback extracts operations backwards)
for op in reversed(ops):
if op["op"] == "add":
padded_idx = op["idx"] + 1 + padding
idx_token = padded_idx if padded_idx < len(target) + padding else "-"
full_op = {
"op": "add",
"path": ptr.append(idx_token),
"value": op["value"],
}
return [[*ops, full_op], padding + 1]
idx_token = padded_idx if padded_idx < len(input) + padding else "-"
padded_ops.append(
{
"op": "add",
"path": ptr.append(idx_token),
"value": op["value"],
}
)
padding += 1
elif op["op"] == "remove":
full_op = {
"op": "remove",
"path": ptr.append(op["idx"] + padding),
}
return [[*ops, full_op], padding - 1]
else:
padded_ops.append(
{
"op": "remove",
"path": ptr.append(op["idx"] + padding),
}
)
padding -= 1
else: # replace
replace_ptr = ptr.append(op["idx"] + padding)
replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
return [ops + replace_ops, padding]
padded_ops.extend(replace_ops)

solution = dist(len(input), len(output))
padded_ops, _ = reduce(partial(pad, target=input), solution["ops"], [[], 0])
padded_rops, _ = reduce(partial(pad, target=output), solution["rops"], [[], 0])
padded_rops = []
padding = 0
# Iterate in reverse to get correct order (traceback extracts operations backwards)
for op in reversed(rops):
if op["op"] == "add":
padded_idx = op["idx"] + 1 + padding
idx_token = padded_idx if padded_idx < len(output) + padding else "-"
padded_rops.append(
{
"op": "add",
"path": ptr.append(idx_token),
"value": op["value"],
}
)
padding += 1
elif op["op"] == "remove":
padded_rops.append(
{
"op": "remove",
"path": ptr.append(op["idx"] + padding),
}
)
padding -= 1
else: # replace
replace_ptr = ptr.append(op["idx"] + padding)
replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
padded_rops.extend(replace_ops)

return padded_ops, padded_rops

Expand Down Expand Up @@ -125,7 +169,9 @@ def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
return ops, rops


def diff(input: Diffable, output: Diffable, ptr: Pointer = None) -> Tuple[List, List]:
def diff(
input: Diffable, output: Diffable, ptr: Pointer | None = None
) -> Tuple[List, List]:
if input == output:
return [], []
if ptr is None:
Expand Down
4 changes: 2 additions & 2 deletions patchdiff/pointer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import re
from typing import Any, Hashable, List, Tuple
from typing import Any, Hashable, Iterable, Tuple

from .types import Diffable

Expand All @@ -20,7 +20,7 @@ def escape(token: str) -> str:


class Pointer:
def __init__(self, tokens: List[Hashable] | None = None) -> None:
def __init__(self, tokens: Iterable[Hashable] | None = None) -> None:
if tokens is None:
tokens = []
self.tokens = tuple(tokens)
Expand Down