From c432e37da06e82ecc01727d630f51e3101bdd5b3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 26 Nov 2025 20:56:43 +0000
Subject: [PATCH 1/2] Add predicate-based MLP implementation using mat_vec
 predicates

Implements neural network operations using relational predicates:
- mat_vec(M, V_in, V_out): Matrix-vector multiplication
- vec_add(A, B, C): Vector addition
- activation(V_in, V_out): Activation function application

This declarative approach enables:
- Compositional reasoning about network structure
- Potential for bidirectional inference
- Natural integration with logic programming systems
- Prolog-style rule export for interpretability

Includes:
- Python implementation (PredicateMLP.py) with PyTorch backend
- F# implementation (PredicateMLP.fs) for inference engine
- Comprehensive test suite (test_predicate_mlp.py)
---
 DecoderRingEBM/PredicateMLP.py               | 454 +++++++++++++++++++
 DecoderRingEBM/test_predicate_mlp.py         | 292 ++++++++++++
 FsSRODecoderEngine/FsSRODecoderEngine.fsproj |   1 +
 FsSRODecoderEngine/PredicateMLP.fs           | 348 ++++++++++++++
 4 files changed, 1095 insertions(+)
 create mode 100644 DecoderRingEBM/PredicateMLP.py
 create mode 100644 DecoderRingEBM/test_predicate_mlp.py
 create mode 100644 FsSRODecoderEngine/PredicateMLP.fs

diff --git a/DecoderRingEBM/PredicateMLP.py b/DecoderRingEBM/PredicateMLP.py
new file mode 100644
index 0000000..743dd4e
--- /dev/null
+++ b/DecoderRingEBM/PredicateMLP.py
@@ -0,0 +1,454 @@
+"""
+Predicate-Based MLP Implementation for SRO Decoder Ring
+
+This module implements neural network operations using relational predicates:
+- mat_vec(M, V_in, V_out): Matrix-vector multiplication
+- vec_add(A, B, C): Vector addition
+- activation(V_in, V_out): Activation function application
+
+The predicate approach provides:
+1. Declarative specification of computations
+2. Potential for bidirectional inference
+3. Composability and interpretability
+4. Natural integration with logic programming systems
+"""
+
+import torch
+import torch.nn as nn
+from typing import Tuple, List, Optional, Callable, NamedTuple
+from dataclasses import dataclass
+from enum import Enum
+
+
+class ActivationType(Enum):
+    """Supported activation functions"""
+    LINEAR = "linear"
+    RELU = "relu"
+    SWISH = "swish"
+    SIGMOID = "sigmoid"
+    SOFTMAX = "softmax"
+
+
+@dataclass(frozen=True)
+class PredicateResult:
+    """Result of a predicate evaluation"""
+    success: bool
+    value: Optional[torch.Tensor]
+    binding: Optional[dict] = None
+
+
+# =============================================================================
+# Core Predicates
+# =============================================================================
+
+def mat_vec(M: torch.Tensor, V_in: torch.Tensor, V_out: Optional[torch.Tensor] = None) -> PredicateResult:
+    """
+    Matrix-vector multiplication predicate: mat_vec(M, V_in, V_out)
+
+    Modes:
+    - mat_vec(+M, +V_in, -V_out): Forward pass, compute V_out = M @ V_in
+    - mat_vec(+M, -V_in, +V_out): Inverse pass (pseudo-inverse), estimate V_in
+    - mat_vec(-M, +V_in, +V_out): Learn M given V_in and V_out (least squares)
+
+    Args:
+        M: Weight matrix [out_features, in_features] or [in_features, out_features]
+        V_in: Input vector [batch, in_features]
+        V_out: Output vector [batch, out_features] (optional, for binding)
+
+    Returns:
+        PredicateResult with computed or verified output
+    """
+    if V_out is None:
+        # Forward mode: compute V_out = V_in @ M (standard PyTorch linear convention)
+        computed = torch.matmul(V_in, M)
+        return PredicateResult(success=True, value=computed, binding={"V_out": computed})
+    else:
+        # Verification mode: check if V_out ≈ V_in @ M
+        computed = torch.matmul(V_in, M)
+        is_close = torch.allclose(computed, V_out, rtol=1e-5, atol=1e-8)
+        return PredicateResult(success=is_close, value=computed, binding={"match": is_close})
+
+
+def vec_add(A: torch.Tensor, B: torch.Tensor, C: Optional[torch.Tensor] = None) -> PredicateResult:
+    """
+    Vector addition predicate: vec_add(A, B, C)
+
+    Semantics: C = A + B
+
+    Modes:
+    - vec_add(+A, +B, -C): Compute C = A + B
+    - vec_add(+A, -B, +C): Compute B = C - A
+    - vec_add(-A, +B, +C): Compute A = C - B
+
+    Args:
+        A: First vector
+        B: Second vector (typically bias)
+        C: Result vector (optional)
+
+    Returns:
+        PredicateResult with sum
+    """
+    if C is None:
+        computed = A + B
+        return PredicateResult(success=True, value=computed, binding={"C": computed})
+    else:
+        computed = A + B
+        is_close = torch.allclose(computed, C, rtol=1e-5, atol=1e-8)
+        return PredicateResult(success=is_close, value=computed, binding={"match": is_close})
+
+
+def activation(V_in: torch.Tensor, V_out: Optional[torch.Tensor] = None,
+               act_type: ActivationType = ActivationType.SWISH) -> PredicateResult:
+    """
+    Activation function predicate: activation(V_in, V_out)
+
+    Args:
+        V_in: Pre-activation values
+        V_out: Post-activation values (optional, for verification)
+        act_type: Type of activation function
+
+    Returns:
+        PredicateResult with activated values
+    """
+    if act_type == ActivationType.LINEAR:
+        computed = V_in
+    elif act_type == ActivationType.RELU:
+        computed = torch.relu(V_in)
+    elif act_type == ActivationType.SWISH:
+        computed = V_in * torch.sigmoid(V_in)
+    elif act_type == ActivationType.SIGMOID:
+        computed = torch.sigmoid(V_in)
+    elif act_type == ActivationType.SOFTMAX:
+        computed = torch.softmax(V_in, dim=-1)
+    else:
+        raise ValueError(f"Unknown activation type: {act_type}")
+
+    if V_out is None:
+        return PredicateResult(success=True, value=computed, binding={"V_out": computed})
+    else:
+        is_close = torch.allclose(computed, V_out, rtol=1e-5, atol=1e-8)
+        return PredicateResult(success=is_close, value=computed, binding={"match": is_close})
+
+
+# =============================================================================
+# Compound Predicates
+# =============================================================================
+
+def dense_layer(W: torch.Tensor, B: torch.Tensor, V_in: torch.Tensor,
+                V_out: Optional[torch.Tensor] = None,
+                act_type: ActivationType = ActivationType.SWISH) -> PredicateResult:
+    """
+    Dense layer predicate combining mat_vec, vec_add, and activation.
+
+    Semantics:
+        dense_layer(W, B, V_in, V_out) :-
+            mat_vec(W, V_in, Z),
+            vec_add(Z, B, A),
+            activation(A, V_out).
+
+    Args:
+        W: Weight matrix
+        B: Bias vector
+        V_in: Input vector
+        V_out: Output vector (optional)
+        act_type: Activation type
+
+    Returns:
+        PredicateResult with layer output
+    """
+    # mat_vec(W, V_in, Z)
+    mat_result = mat_vec(W, V_in)
+    if not mat_result.success:
+        return PredicateResult(success=False, value=None)
+    Z = mat_result.value
+
+    # vec_add(Z, B, A)
+    add_result = vec_add(Z, B)
+    if not add_result.success:
+        return PredicateResult(success=False, value=None)
+    A = add_result.value
+
+    # activation(A, V_out)
+    act_result = activation(A, V_out, act_type)
+
+    return PredicateResult(
+        success=act_result.success,
+        value=act_result.value,
+        binding={
+            "Z": Z,  # Pre-bias
+            "A": A,  # Pre-activation
+            "V_out": act_result.value  # Final output
+        }
+    )
+
+
+# =============================================================================
+# Predicate-Based MLP Model
+# =============================================================================
+
+@dataclass
+class LayerSpec:
+    """Specification for a single layer"""
+    in_features: int
+    out_features: int
+    activation: ActivationType = ActivationType.SWISH
+    use_bias: bool = True
+
+
+class PredicateMLP(nn.Module):
+    """
+    MLP implemented using predicate-based operations.
+
+    Each forward pass is a composition of predicates:
+        mlp(X, Y) :-
+            dense_layer(W1, B1, X, H1, act1),
+            dense_layer(W2, B2, H1, H2, act2),
+            ...
+            dense_layer(Wn, Bn, Hn-1, Y, actn).
+
+    This representation makes the computation explicit and composable,
+    enabling potential applications in:
+    - Neural-symbolic integration
+    - Interpretable AI
+    - Bidirectional inference
+    """
+
+    def __init__(self, layer_specs: List[LayerSpec]):
+        """
+        Initialize predicate-based MLP.
+
+        Args:
+            layer_specs: List of LayerSpec defining the architecture
+        """
+        super(PredicateMLP, self).__init__()
+
+        self.layer_specs = layer_specs
+        self.n_layers = len(layer_specs)
+
+        # Initialize weights and biases as parameters
+        self.weights = nn.ParameterList()
+        self.biases = nn.ParameterList()
+
+        for i, spec in enumerate(layer_specs):
+            # Weight matrix: [in_features, out_features] for V_in @ W convention
+            W = nn.Parameter(torch.randn(spec.in_features, spec.out_features) * 0.1)
+            self.weights.append(W)
+
+            if spec.use_bias:
+                B = nn.Parameter(torch.zeros(1, spec.out_features))
+            else:
+                B = nn.Parameter(torch.zeros(1, spec.out_features), requires_grad=False)
+            self.biases.append(B)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass using predicate composition.
+
+        The computation is equivalent to:
+            mlp(X, Y) :-
+                layer(0, X, H0),
+                layer(1, H0, H1),
+                ...
+                layer(n, Hn-1, Y).
+        """
+        current = x
+
+        for i, spec in enumerate(self.layer_specs):
+            result = dense_layer(
+                W=self.weights[i],
+                B=self.biases[i],
+                V_in=current,
+                act_type=spec.activation
+            )
+            if not result.success:
+                raise RuntimeError(f"Predicate failed at layer {i}")
+            current = result.value
+
+        return current
+
+    def forward_with_trace(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[dict]]:
+        """
+        Forward pass returning intermediate predicate bindings.
+
+        Useful for interpretability and debugging.
+
+        Returns:
+            Tuple of (output, list of binding dicts for each layer)
+        """
+        current = x
+        trace = []
+
+        for i, spec in enumerate(self.layer_specs):
+            result = dense_layer(
+                W=self.weights[i],
+                B=self.biases[i],
+                V_in=current,
+                act_type=spec.activation
+            )
+            trace.append({
+                "layer": i,
+                "input_shape": current.shape,
+                "output_shape": result.value.shape,
+                "bindings": result.binding
+            })
+            current = result.value
+
+        return current, trace
+
+    def as_prolog_rules(self) -> str:
+        """
+        Export the MLP structure as Prolog-style rules.
+
+        Returns:
+            String representation of the MLP as Prolog predicates
+        """
+        rules = []
+        rules.append("% Predicate-based MLP for SRO Decoder Ring")
+        rules.append("% Generated from PredicateMLP")
+        rules.append("")
+
+        # Layer definitions
+        for i, spec in enumerate(self.layer_specs):
+            act_name = spec.activation.value
+            rules.append(f"% Layer {i}: {spec.in_features} -> {spec.out_features}, {act_name}")
+            rules.append(f"layer({i}, V_in, V_out) :-")
+            rules.append(f"    mat_vec(w{i}, V_in, Z{i}),")
+            rules.append(f"    vec_add(Z{i}, b{i}, A{i}),")
+            rules.append(f"    {act_name}(A{i}, V_out).")
+            rules.append("")
+
+        # Full MLP predicate
+        layer_chain = ", ".join([f"layer({i}, H{i}, H{i+1})" for i in range(self.n_layers)])
+        rules.append(f"mlp(X, Y) :- H0 = X, {layer_chain}, Y = H{self.n_layers}.")
+
+        return "\n".join(rules)
+
+
+# =============================================================================
+# Factory Functions
+# =============================================================================
+
+def create_sro_decoder_mlp() -> PredicateMLP:
+    """
+    Create the SRO Decoder Ring MLP with predicate-based architecture.
+
+    Architecture matches DeepEnergyModel.py:
+        Input(1) -> 6 -> 12 -> 24 -> 6 -> 1
+    """
+    specs = [
+        LayerSpec(1, 6, ActivationType.SWISH),
+        LayerSpec(6, 12, ActivationType.SWISH),
+        LayerSpec(12, 24, ActivationType.SWISH),
+        LayerSpec(24, 6, ActivationType.SWISH),
+        LayerSpec(6, 1, ActivationType.LINEAR),  # Final layer typically linear for energy
+    ]
+    return PredicateMLP(specs)
+
+
+def create_classifier_mlp(input_dim: int = 12, num_classes: int = 6) -> PredicateMLP:
+    """
+    Create a classifier MLP for rotation order prediction.
+
+    Architecture inspired by legacy Keras model:
+        Input(12) -> 64 -> 32 -> 32 -> 6 (softmax)
+    """
+    specs = [
+        LayerSpec(input_dim, 64, ActivationType.RELU),
+        LayerSpec(64, 32, ActivationType.RELU),
+        LayerSpec(32, 32, ActivationType.RELU),
+        LayerSpec(32, num_classes, ActivationType.SOFTMAX),
+    ]
+    return PredicateMLP(specs)
+
+
+# =============================================================================
+# Utility Functions for Logic Programming Integration
+# =============================================================================
+
+class PredicateQuery:
+    """
+    Query interface for predicate-based inference.
+
+    Supports Prolog-like queries:
+        query(mlp, {X: input_data}, {Y: ?})  -> Solve for Y
+    """
+
+    def __init__(self, model: PredicateMLP):
+        self.model = model
+
+    def query(self, input_binding: dict) -> dict:
+        """
+        Execute a forward query.
+
+        Args:
+            input_binding: Dict with input tensor, e.g., {"X": tensor}
+
+        Returns:
+            Dict with output binding, e.g., {"Y": output_tensor, "trace": [...]}
+        """
+        x = input_binding.get("X")
+        if x is None:
+            raise ValueError("Input binding must contain 'X'")
+
+        output, trace = self.model.forward_with_trace(x)
+
+        return {
+            "Y": output,
+            "trace": trace,
+            "success": True
+        }
+
+
+# =============================================================================
+# Example Usage and Tests
+# =============================================================================
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("Predicate-Based MLP for SRO Decoder Ring")
+    print("=" * 60)
+
+    # Create the model
+    model = create_sro_decoder_mlp()
+    print(f"\nModel architecture: {model.n_layers} layers")
+    for i, spec in enumerate(model.layer_specs):
+        print(f"  Layer {i}: {spec.in_features} -> {spec.out_features} ({spec.activation.value})")
+
+    # Test forward pass
+    print("\n--- Forward Pass Test ---")
+    x = torch.randn(4, 1)  # Batch of 4
+    output, trace = model.forward_with_trace(x)
+    print(f"Input shape: {x.shape}")
+    print(f"Output shape: {output.shape}")
+
+    # Show trace
+    print("\n--- Predicate Trace ---")
+    for t in trace:
+        print(f"Layer {t['layer']}: {t['input_shape']} -> {t['output_shape']}")
+
+    # Show Prolog representation
+    print("\n--- Prolog Representation ---")
+    print(model.as_prolog_rules())
+
+    # Test individual predicates
+    print("\n--- Individual Predicate Tests ---")
+
+    # mat_vec test
+    M = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])  # 3x2
+    V = torch.tensor([[1.0, 1.0, 1.0]])  # 1x3
+    result = mat_vec(M.T, V.T)  # Transpose for correct dims
+    print(f"mat_vec result: {result.value.T if result.value is not None else 'None'}")
+
+    # vec_add test
+    A = torch.tensor([[1.0, 2.0]])
+    B = torch.tensor([[0.5, 0.5]])
+    result = vec_add(A, B)
+    print(f"vec_add result: {result.value}")
+
+    # activation test
+    V_in = torch.tensor([[-1.0, 0.0, 1.0]])
+    result = activation(V_in, act_type=ActivationType.SWISH)
+    print(f"swish activation result: {result.value}")
+
+    print("\n" + "=" * 60)
+    print("Predicate-based MLP implementation complete!")
diff --git a/DecoderRingEBM/test_predicate_mlp.py b/DecoderRingEBM/test_predicate_mlp.py
new file mode 100644
index 0000000..dd34dc5
--- /dev/null
+++ b/DecoderRingEBM/test_predicate_mlp.py
@@ -0,0 +1,292 @@
+"""
+Tests for Predicate-Based MLP Implementation
+
+Run with: python -m pytest DecoderRingEBM/test_predicate_mlp.py -v
+Or:       python DecoderRingEBM/test_predicate_mlp.py
+"""
+
+import torch
+import sys
+from pathlib import Path
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from DecoderRingEBM.PredicateMLP import (
+    mat_vec,
+    vec_add,
+    activation,
+    dense_layer,
+    PredicateMLP,
+    LayerSpec,
+    ActivationType,
+    create_sro_decoder_mlp,
+    create_classifier_mlp,
+    PredicateQuery,
+)
+
+
+class TestCorePredates:
+    """Tests for the core predicate functions"""
+
+    def test_mat_vec_forward(self):
+        """Test matrix-vector multiplication predicate"""
+        # Simple 2x3 matrix, batch of 2
+        M = torch.tensor([[1.0, 2.0, 3.0],
+                          [4.0, 5.0, 6.0]])
+        V_in = torch.tensor([[1.0, 1.0],
+                             [2.0, 0.0]])  # batch=2, in_features=2
+
+        result = mat_vec(M, V_in)
+
+        assert result.success
+        assert result.value is not None
+        # Row 0: [1,1] @ [[1,2,3],[4,5,6]] = [5, 7, 9]
+        # Row 1: [2,0] @ [[1,2,3],[4,5,6]] = [2, 4, 6]
+        expected = torch.tensor([[5.0, 7.0, 9.0],
+                                 [2.0, 4.0, 6.0]])
+        assert torch.allclose(result.value, expected)
+
+    def test_vec_add_forward(self):
+        """Test vector addition predicate"""
+        A = torch.tensor([[1.0, 2.0, 3.0],
+                          [4.0, 5.0, 6.0]])
+        B = torch.tensor([[0.5, 0.5, 0.5]])
+
+        result = vec_add(A, B)
+
+        assert result.success
+        expected = torch.tensor([[1.5, 2.5, 3.5],
+                                 [4.5, 5.5, 6.5]])
+        assert torch.allclose(result.value, expected)
+
+    def test_activation_relu(self):
+        """Test ReLU activation predicate"""
+        V_in = torch.tensor([[-1.0, 0.0, 1.0, 2.0]])
+
+        result = activation(V_in, act_type=ActivationType.RELU)
+
+        assert result.success
+        expected = torch.tensor([[0.0, 0.0, 1.0, 2.0]])
+        assert torch.allclose(result.value, expected)
+
+    def test_activation_swish(self):
+        """Test Swish activation predicate"""
+        V_in = torch.tensor([[0.0, 1.0, -1.0]])
+
+        result = activation(V_in, act_type=ActivationType.SWISH)
+
+        assert result.success
+        # swish(x) = x * sigmoid(x)
+        # swish(0) = 0, swish(1) ≈ 0.731, swish(-1) ≈ -0.269
+        assert torch.allclose(result.value[:, 0], torch.tensor([0.0]), atol=1e-5)
+        assert result.value[0, 1] > 0.7 and result.value[0, 1] < 0.8
+        assert result.value[0, 2] > -0.3 and result.value[0, 2] < -0.2
+
+    def test_activation_softmax(self):
+        """Test Softmax activation predicate"""
+        V_in = torch.tensor([[1.0, 2.0, 3.0]])
+
+        result = activation(V_in, act_type=ActivationType.SOFTMAX)
+
+        assert result.success
+        # Softmax outputs should sum to 1
+        assert torch.allclose(result.value.sum(dim=-1), torch.tensor([1.0]))
+        # Values should be in increasing order
+        assert result.value[0, 0] < result.value[0, 1] < result.value[0, 2]
+
+
+class TestDenseLayerPredicate:
+    """Tests for the compound dense layer predicate"""
+
+    def test_dense_layer_forward(self):
+        """Test dense layer predicate"""
+        W = torch.tensor([[1.0, 0.0],
+                          [0.0, 1.0]])  # Identity-like
+        B = torch.tensor([[0.5, 0.5]])
+        V_in = torch.tensor([[1.0, 2.0]])
+
+        result = dense_layer(W, B, V_in, act_type=ActivationType.LINEAR)
+
+        assert result.success
+        expected = torch.tensor([[1.5, 2.5]])
+        assert torch.allclose(result.value, expected)
+
+    def test_dense_layer_with_swish(self):
+        """Test dense layer with Swish activation"""
+        W = torch.eye(2)
+        B = torch.zeros(1, 2)
+        V_in = torch.tensor([[0.0, 1.0]])
+
+        result = dense_layer(W, B, V_in, act_type=ActivationType.SWISH)
+
+        assert result.success
+        # Check bindings contain intermediate values
+        assert "Z" in result.binding
+        assert "A" in result.binding
+        assert "V_out" in result.binding
+
+
+class TestPredicateMLP:
+    """Tests for the full PredicateMLP model"""
+
+    def test_model_creation(self):
+        """Test creating a PredicateMLP"""
+        specs = [
+            LayerSpec(4, 8, ActivationType.RELU),
+            LayerSpec(8, 2, ActivationType.SOFTMAX),
+        ]
+        model = PredicateMLP(specs)
+
+        assert model.n_layers == 2
+        assert len(model.weights) == 2
+        assert len(model.biases) == 2
+
+    def test_model_forward(self):
+        """Test forward pass through model"""
+        specs = [
+            LayerSpec(3, 4, ActivationType.RELU),
+            LayerSpec(4, 2, ActivationType.LINEAR),
+        ]
+        model = PredicateMLP(specs)
+
+        x = torch.randn(5, 3)  # batch=5
+        y = model(x)
+
+        assert y.shape == (5, 2)
+
+    def test_model_with_trace(self):
+        """Test forward pass with trace"""
+        specs = [
+            LayerSpec(2, 4, ActivationType.SWISH),
+            LayerSpec(4, 3, ActivationType.LINEAR),
+        ]
+        model = PredicateMLP(specs)
+
+        x = torch.randn(3, 2)
+        y, trace = model.forward_with_trace(x)
+
+        assert y.shape == (3, 3)
+        assert len(trace) == 2
+        assert trace[0]["layer"] == 0
+        assert trace[1]["layer"] == 1
+
+    def test_sro_decoder_mlp(self):
+        """Test creating the SRO decoder model"""
+        model = create_sro_decoder_mlp()
+
+        assert model.n_layers == 5
+        # Architecture: 1 -> 6 -> 12 -> 24 -> 6 -> 1
+        assert model.layer_specs[0].in_features == 1
+        assert model.layer_specs[0].out_features == 6
+        assert model.layer_specs[-1].out_features == 1
+
+        x = torch.randn(10, 1)
+        y = model(x)
+        assert y.shape == (10, 1)
+
+    def test_classifier_mlp(self):
+        """Test creating the classifier model"""
+        model = create_classifier_mlp()
+
+        assert model.n_layers == 4
+        x = torch.randn(5, 12)
+        y = model(x)
+        assert y.shape == (5, 6)
+        # Softmax output should sum to 1
+        assert torch.allclose(y.sum(dim=-1), torch.ones(5), atol=1e-5)
+
+
+class TestPrologExport:
+    """Tests for Prolog representation export"""
+
+    def test_prolog_rules(self):
+        """Test generating Prolog rules"""
+        model = create_sro_decoder_mlp()
+        prolog = model.as_prolog_rules()
+
+        assert "mat_vec" in prolog
+        assert "vec_add" in prolog
+        assert "swish" in prolog
+        assert "layer(0, V_in, V_out)" in prolog
+        assert "mlp(X, Y)" in prolog
+
+
+class TestPredicateQuery:
+    """Tests for the query interface"""
+
+    def test_query_forward(self):
+        """Test query interface"""
+        model = create_sro_decoder_mlp()
+        query = PredicateQuery(model)
+
+        x = torch.randn(4, 1)
+        result = query.query({"X": x})
+
+        assert result["success"]
+        assert result["Y"].shape == (4, 1)
+        assert len(result["trace"]) == 5
+
+
+class TestGradients:
+    """Tests that gradients flow correctly"""
+
+    def test_gradient_flow(self):
+        """Test that gradients propagate through predicates"""
+        model = create_sro_decoder_mlp()
+
+        x = torch.randn(4, 1)
+        target = torch.randn(4, 1)
+
+        y = model(x)
+        loss = (y - target).pow(2).mean()
+        loss.backward()
+
+        # Check gradients exist
+        for i, w in enumerate(model.weights):
+            assert w.grad is not None, f"Weight {i} has no gradient"
+            assert not torch.all(w.grad == 0), f"Weight {i} has zero gradient"
+
+
+def run_tests():
+    """Run all tests manually"""
+    print("=" * 60)
+    print("Running Predicate MLP Tests")
+    print("=" * 60)
+
+    test_classes = [
+        TestCorePredates(),
+        TestDenseLayerPredicate(),
+        TestPredicateMLP(),
+        TestPrologExport(),
+        TestPredicateQuery(),
+        TestGradients(),
+    ]
+
+    total = 0
+    passed = 0
+
+    for test_class in test_classes:
+        class_name = test_class.__class__.__name__
+        print(f"\n{class_name}:")
+
+        for method_name in dir(test_class):
+            if method_name.startswith("test_"):
+                total += 1
+                try:
+                    getattr(test_class, method_name)()
+                    print(f"  ✓ {method_name}")
+                    passed += 1
+                except Exception as e:
+                    print(f"  ✗ {method_name}: {e}")
+
+    print(f"\n{'=' * 60}")
+    print(f"Results: {passed}/{total} tests passed")
+    print("=" * 60)
+
+    return passed == total
+
+
+if __name__ == "__main__":
+    success = run_tests()
+    sys.exit(0 if success else 1)
diff --git a/FsSRODecoderEngine/FsSRODecoderEngine.fsproj b/FsSRODecoderEngine/FsSRODecoderEngine.fsproj
index f7286b3..1cb9556 100644
--- a/FsSRODecoderEngine/FsSRODecoderEngine.fsproj
+++ b/FsSRODecoderEngine/FsSRODecoderEngine.fsproj
@@ -10,6 +10,7 @@
 
   <ItemGroup>
     <Compile Include="Types.fs" />
+    <Compile Include="PredicateMLP.fs" />
     <Compile Include="Dense.fs" />
     <Compile Include="Conv2d.fs" />
     <Compile Include="Model.fs" />
diff --git a/FsSRODecoderEngine/PredicateMLP.fs b/FsSRODecoderEngine/PredicateMLP.fs
new file mode 100644
index 0000000..d1c54d0
--- /dev/null
+++ b/FsSRODecoderEngine/PredicateMLP.fs
@@ -0,0 +1,348 @@
+namespace FsSRODecoderEngine
+
+/// Predicate-Based MLP Implementation for SRO Decoder Ring
+///
+/// This module implements neural network operations using relational predicates:
+/// - mat_vec: Matrix-vector multiplication predicate
+/// - vec_add: Vector addition predicate
+/// - activation: Activation function predicate
+///
+/// The predicate approach enables:
+/// 1. Declarative specification of computations
+/// 2. Compositional reasoning about network structure
+/// 3. Natural integration with logic programming concepts
+module PredicateMLP =
+    open System
+
+    // =========================================================================
+    // Types
+    // =========================================================================
+
+    /// Activation function types
+    type ActivationType =
+        | Linear
+        | ReLu
+        | Swish
+        | Sigmoid
+        | Softmax
+
+    /// Result of a predicate evaluation
+    type PredicateResult<'T> = {
+        Success: bool
+        Value: 'T option
+        Bindings: Map<string, obj>
+    }
+
+    /// Specification for a single layer
+    type LayerSpec = {
+        InFeatures: int
+        OutFeatures: int
+        Activation: ActivationType
+        UseBias: bool
+    }
+
+    /// A layer with its weights
+    type Layer = {
+        Spec: LayerSpec
+        Weights: float[,]  // [in_features, out_features]
+        Bias: float[]      // [out_features]
+    }
+
+    /// Complete MLP model
+    type PredicateMLPModel = {
+        Layers: Layer list
+    }
+
+    // =========================================================================
+    // Helper Functions
+    // =========================================================================
+
+    let private createResult success value bindings =
+        { Success = success; Value = value; Bindings = bindings }
+
+    let private successResult value =
+        createResult true (Some value) Map.empty
+
+    let private failureResult () =
+        createResult false None Map.empty
+
+    // =========================================================================
+    // Core Predicates
+    // =========================================================================
+
+    /// Matrix-vector multiplication predicate: mat_vec(M, V_in, V_out)
+    ///
+    /// Computes V_out = V_in @ M for each row in the batch
+    ///
+    /// Parameters:
+    /// - M: Weight matrix [in_features, out_features]
+    /// - V_in: Input matrix [batch, in_features]
+    ///
+    /// Returns: PredicateResult with output [batch, out_features]
+    let mat_vec (M: float[,]) (V_in: float[,]) : PredicateResult<float[,]> =
+        let batchSize = V_in.GetLength(0)
+        let inFeatures = V_in.GetLength(1)
+        let outFeatures = M.GetLength(1)
+
+        // Validate dimensions
+        if M.GetLength(0) <> inFeatures then
+            failureResult ()
+        else
+            let V_out = Array2D.init batchSize outFeatures (fun b o ->
+                seq { 0 .. inFeatures - 1 }
+                |> Seq.fold (fun sum i -> sum + V_in.[b, i] * M.[i, o]) 0.0
+            )
+            createResult true (Some V_out) (Map.ofList [("V_out", box V_out)])
+
+    /// Vector addition predicate: vec_add(A, B, C)
+    ///
+    /// Computes C = A + B (broadcasts B across batch dimension)
+    ///
+    /// Parameters:
+    /// - A: Matrix [batch, features]
+    /// - B: Bias vector [features]
+    ///
+    /// Returns: PredicateResult with sum [batch, features]
+    let vec_add (A: float[,]) (B: float[]) : PredicateResult<float[,]> =
+        let batchSize = A.GetLength(0)
+        let features = A.GetLength(1)
+
+        if B.Length <> features then
+            failureResult ()
+        else
+            let C = Array2D.init batchSize features (fun b f ->
+                A.[b, f] + B.[f]
+            )
+            createResult true (Some C) (Map.ofList [("C", box C)])
+
+    /// Activation function predicate: activation(V_in, V_out, type)
+    ///
+    /// Applies element-wise activation function
+    ///
+    /// Parameters:
+    /// - V_in: Input matrix [batch, features]
+    /// - actType: Type of activation function
+    ///
+    /// Returns: PredicateResult with activated values
+    let activation (V_in: float[,]) (actType: ActivationType) : PredicateResult<float[,]> =
+        let sigmoid x = 1.0 / (1.0 + exp(-x))
+
+        let activationFn =
+            match actType with
+            | Linear -> id
+            | ReLu -> fun x -> max 0.0 x
+            | Swish -> fun x -> x * sigmoid x
+            | Sigmoid -> sigmoid
+            | Softmax -> id  // Handled specially below
+
+        let batchSize = V_in.GetLength(0)
+        let features = V_in.GetLength(1)
+
+        let V_out =
+            match actType with
+            | Softmax ->
+                // Softmax: exp(x_i) / sum(exp(x_j)) for numerical stability
+                Array2D.init batchSize features (fun b f ->
+                    let maxVal = seq { 0 .. features - 1 } |> Seq.map (fun i -> V_in.[b, i]) |> Seq.max
+                    let expSum = seq { 0 .. features - 1 } |> Seq.sumBy (fun i -> exp(V_in.[b, i] - maxVal))
+                    exp(V_in.[b, f] - maxVal) / expSum
+                )
+            | _ ->
+                Array2D.map activationFn V_in
+
+        createResult true (Some V_out) (Map.ofList [("V_out", box V_out)])
+
+    // =========================================================================
+    // Compound Predicates
+    // =========================================================================
+
+    /// Dense layer predicate combining mat_vec, vec_add, and activation
+    ///
+    /// Semantics:
+    ///     dense_layer(W, B, V_in, V_out) :-
+    ///         mat_vec(W, V_in, Z),
+    ///         vec_add(Z, B, A),
+    ///         activation(A, V_out).
+    let dense_layer (layer: Layer) (V_in: float[,]) : PredicateResult<float[,]> =
+        // mat_vec(W, V_in, Z)
+        let matResult = mat_vec layer.Weights V_in
+        match matResult.Value with
+        | None -> failureResult ()
+        | Some Z ->
+            // vec_add(Z, B, A)
+            let addResult = vec_add Z layer.Bias
+            match addResult.Value with
+            | None -> failureResult ()
+            | Some A ->
+                // activation(A, V_out)
+                let actResult = activation A layer.Spec.Activation
+                match actResult.Value with
+                | None -> failureResult ()
+                | Some V_out ->
+                    createResult true (Some V_out) (Map.ofList [
+                        ("Z", box Z)
+                        ("A", box A)
+                        ("V_out", box V_out)
+                    ])
+
+    // =========================================================================
+    // MLP Forward Pass
+    // =========================================================================
+
+    /// Forward pass through the entire MLP using predicate composition
+    ///
+    /// The computation is equivalent to:
+    ///     mlp(X, Y) :-
+    ///         layer(0, X, H0),
+    ///         layer(1, H0, H1),
+    ///         ...
+    ///         layer(n, Hn-1, Y).
+    let forward (model: PredicateMLPModel) (input: float[,]) : PredicateResult<float[,]> =
+        let rec forwardLayers layers current =
+            match layers with
+            | [] -> successResult current
+            | layer :: rest ->
+                let result = dense_layer layer current
+                match result.Value with
+                | None -> failureResult ()
+                | Some output -> forwardLayers rest output
+
+        forwardLayers model.Layers input
+
+    /// Forward pass with trace of intermediate activations
+    let forwardWithTrace (model: PredicateMLPModel) (input: float[,])
+        : PredicateResult<float[,]> * (int * float[,]) list =
+
+        let rec forwardLayers layers current layerIdx trace =
+            match layers with
+            | [] -> (successResult current, List.rev trace)
+            | layer :: rest ->
+                let result = dense_layer layer current
+                match result.Value with
+                | None -> (failureResult (), List.rev trace)
+                | Some output ->
+                    let newTrace = (layerIdx, output) :: trace
+                    forwardLayers rest output (layerIdx + 1) newTrace
+
+        forwardLayers model.Layers input 0 []
+
+    // =========================================================================
+    // Model Construction
+    // =========================================================================
+
+    /// Create a layer with random initialization
+    let createLayer (spec: LayerSpec) : Layer =
+        let rng = Random()
+        let scale = sqrt(2.0 / float spec.InFeatures)  // He initialization
+
+        let weights = Array2D.init spec.InFeatures spec.OutFeatures (fun _ _ ->
+            (rng.NextDouble() * 2.0 - 1.0) * scale
+        )
+        let bias = Array.zeroCreate spec.OutFeatures
+
+        { Spec = spec; Weights = weights; Bias = bias }
+
+    /// Create the SRO Decoder Ring MLP architecture
+    /// Architecture: 1 -> 6 -> 12 -> 24 -> 6 -> 1
+    let createSRODecoderMLP () : PredicateMLPModel =
+        let specs = [
+            { InFeatures = 1; OutFeatures = 6; Activation = Swish; UseBias = true }
+            { InFeatures = 6; OutFeatures = 12; Activation = Swish; UseBias = true }
+            { InFeatures = 12; OutFeatures = 24; Activation = Swish; UseBias = true }
+            { InFeatures = 24; OutFeatures = 6; Activation = Swish; UseBias = true }
+            { InFeatures = 6; OutFeatures = 1; Activation = Linear; UseBias = true }
+        ]
+        { Layers = specs |> List.map createLayer }
+
+    /// Create a classifier MLP for rotation order prediction
+    /// Architecture: 12 -> 64 -> 32 -> 32 -> 6 (softmax)
+    let createClassifierMLP () : PredicateMLPModel =
+        let specs = [
+            { InFeatures = 12; OutFeatures = 64; Activation = ReLu; UseBias = true }
+            { InFeatures = 64; OutFeatures = 32; Activation = ReLu; UseBias = true }
+            { InFeatures = 32; OutFeatures = 32; Activation = ReLu; UseBias = true }
+            { InFeatures = 32; OutFeatures = 6; Activation = Softmax; UseBias = true }
+        ]
+        { Layers = specs |> List.map createLayer }
+
+    // =========================================================================
+    // Prolog-Style Query Interface
+    // =========================================================================
+
+    /// Query result type
+    type QueryResult = {
+        Output: float[,]
+        Trace: (int * float[,]) list
+        Success: bool
+    }
+
+    /// Execute a forward query on the model
+    let query (model: PredicateMLPModel) (input: float[,]) : QueryResult =
+        let result, trace = forwardWithTrace model input
+        {
+            Output = result.Value |> Option.defaultValue (Array2D.zeroCreate 0 0)
+            Trace = trace
+            Success = result.Success
+        }
+
+    // =========================================================================
+    // Prolog Representation Export
+    // =========================================================================
+
+    /// Export the MLP structure as Prolog-style rules
+    let toPrologRules (model: PredicateMLPModel) : string =
+        let sb = System.Text.StringBuilder()
+
+        sb.AppendLine("% Predicate-based MLP for SRO Decoder Ring") |> ignore
+        sb.AppendLine("% Generated from F# PredicateMLP") |> ignore
+        sb.AppendLine() |> ignore
+
+        model.Layers |> List.iteri (fun i layer ->
+            let actName =
+                match layer.Spec.Activation with
+                | Linear -> "linear"
+                | ReLu -> "relu"
+                | Swish -> "swish"
+                | Sigmoid -> "sigmoid"
+                | Softmax -> "softmax"
+
+            sb.AppendLine(sprintf "%% Layer %d: %d -> %d, %s"
+                i layer.Spec.InFeatures layer.Spec.OutFeatures actName) |> ignore
+            sb.AppendLine(sprintf "layer(%d, V_in, V_out) :-" i) |> ignore
+            sb.AppendLine(sprintf "    mat_vec(w%d, V_in, Z%d)," i i) |> ignore
+            sb.AppendLine(sprintf "    vec_add(Z%d, b%d, A%d)," i i i) |> ignore
+            sb.AppendLine(sprintf "    %s(A%d, V_out)." actName i) |> ignore
+            sb.AppendLine() |> ignore
+        )
+
+        // Full MLP predicate
+        let nLayers = model.Layers.Length
+        let layerCalls =
+            [0 .. nLayers - 1]
+            |> List.map (fun i -> sprintf "layer(%d, H%d, H%d)" i i (i + 1))
+            |> String.concat ", "
+
+        sb.AppendLine(sprintf "mlp(X, Y) :- H0 = X, %s, Y = H%d." layerCalls nLayers) |> ignore
+
+        sb.ToString()
+
+    // =========================================================================
+    // Weight Loading/Saving
+    // =========================================================================
+
+    /// Load weights into a layer from arrays
+    let loadLayerWeights (layer: Layer) (weights: float[,]) (bias: float[]) : Layer =
+        { layer with Weights = weights; Bias = bias }
+
+    /// Load model from weight dictionary
+    let loadWeights (model: PredicateMLPModel) (weightDict: Map<string, float[,] * float[]>)
+        : PredicateMLPModel =
+        let newLayers =
+            model.Layers
+            |> List.mapi (fun i layer ->
+                let key = sprintf "layer_%d" i
+                match weightDict.TryFind key with
+                | Some (w, b) -> loadLayerWeights layer w b
+                | None -> layer
+            )
+        { Layers = newLayers }

From 2b530e22dd24b238efe7981432d78d205a6f1e94 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 26 Nov 2025 21:12:02 +0000
Subject: [PATCH 2/2] Add SWI-Prolog inference engine with autodiff support

Implements MLP inference using relational predicates:
- mat_vec(M, V_in, V_out): Matrix-vector multiplication
- vec_add(A, B, C): Vector addition
- activation(Type, V_in, V_out): Activation functions

New files:
- sro_decoder_mlp.pl: Core MLP predicates for inference
- tensor_autodiff.pl: Computation graph with automatic differentiation
  - dense(W, B, In, Out): Dense layer building graph node
  - relu/swish/sigmoid(In, Out): Activations with grad functions
  - mse(Exp, Act, Loss): Loss function
  - backward(Loss, Gradients): Backpropagation via chain rule
- export_weights_to_prolog.py: Convert PyTorch weights to Prolog facts
- example_weights.pl: Sample weights for testing

The autodiff module builds execution graphs suitable for automatic
differentiation by recording operations and their gradient functions.
---
 PrologEngine/README.md                   | 140 +++++
 PrologEngine/example_weights.pl          |  86 +++
 PrologEngine/export_weights_to_prolog.py | 272 +++++++++
 PrologEngine/sro_decoder_mlp.pl          | 408 +++++++++++++
 PrologEngine/tensor_autodiff.pl          | 737 +++++++++++++++++++++++
 5 files changed, 1643 insertions(+)
 create mode 100644 PrologEngine/README.md
 create mode 100644 PrologEngine/example_weights.pl
 create mode 100644 PrologEngine/export_weights_to_prolog.py
 create mode 100644 PrologEngine/sro_decoder_mlp.pl
 create mode 100644 PrologEngine/tensor_autodiff.pl

diff --git a/PrologEngine/README.md b/PrologEngine/README.md
new file mode 100644
index 0000000..4108c8b
--- /dev/null
+++ b/PrologEngine/README.md
@@ -0,0 +1,140 @@
+# SRO Decoder Ring - SWI-Prolog Inference Engine
+
+This directory contains a SWI-Prolog implementation of the MLP inference engine using predicate-based matrix operations.
+
+## Core Predicates
+
+The implementation is built on three fundamental predicates:
+
+```prolog
+% Matrix-vector multiplication: V_out = V_in @ M
+mat_vec(Matrix, Vector, Result).
+
+% Vector addition: C = A + B
+vec_add(A, B, C).
+
+% Activation function: V_out = f(V_in)
+activation(Type, V_in, V_out).
+```
+
+These compose into the dense layer predicate:
+
+```prolog
+dense_layer(W, B, Act, V_in, V_out) :-
+    mat_vec(W, V_in, Z),
+    vec_add(Z, B, A),
+    activation(Act, A, V_out).
+```
+
+## Usage
+
+### Quick Start
+
+```prolog
+% Load the engine
+?- consult('sro_decoder_mlp.pl').
+
+% Load weights
+?- consult('example_weights.pl').
+
+% Run inference
+?- mlp([0.5], Energy).
+Energy = [0.00234567].
+
+% With trace for debugging
+?- mlp_with_trace([0.5], Energy, Trace).
+```
+
+### Running Tests
+
+```prolog
+?- consult('sro_decoder_mlp.pl').
+?- run_tests.
+```
+
+### Using Random Weights (for testing)
+
+```prolog
+?- init_random_weights.
+?- print_architecture.
+?- mlp([0.5], Energy).
+```
+
+## Architecture
+
+The SRO Decoder Ring MLP architecture:
+
+```
+Input (1) → Dense(6, swish) → Dense(12, swish) → Dense(24, swish) → Dense(6, swish) → Dense(1, linear) → Output
+```
+
+## Supported Activations
+
+- `linear` - Identity function
+- `relu` - Rectified Linear Unit: max(0, x)
+- `swish` - Swish/SiLU: x * sigmoid(x)
+- `sigmoid` - Logistic sigmoid: 1 / (1 + exp(-x))
+- `softmax` - Softmax (normalized exponential)
+
+## Exporting Weights from PyTorch
+
+Use the provided Python utility:
+
+```bash
+# From a trained model
+python export_weights_to_prolog.py --model checkpoint.pt --output weights.pl
+
+# Generate random weights for testing
+python export_weights_to_prolog.py --random --output weights.pl
+```
+
+## Weight File Format
+
+Weights are stored as Prolog facts:
+
+```prolog
+% weight(LayerId, RowIndex, WeightRow)
+weight(0, 0, [0.123, -0.456, 0.789]).
+weight(0, 1, [-0.321, 0.654, -0.987]).
+
+% bias(LayerId, BiasVector)
+bias(0, [0.0, 0.0]).
+
+% layer_activation(LayerId, ActivationType)
+layer_activation(0, swish).
+
+% layer_config(LayerId, InFeatures, OutFeatures)
+layer_config(0, 3, 2).
+```
+
+## API Reference
+
+### Predicates
+
+| Predicate | Description |
+|-----------|-------------|
+| `mat_vec(+M, +V, -R)` | Matrix-vector multiplication |
+| `vec_add(+A, +B, -C)` | Element-wise vector addition |
+| `activation(+Type, +In, -Out)` | Apply activation function |
+| `dense_layer(+W, +B, +Act, +In, -Out)` | Full dense layer |
+| `mlp(+Input, -Output)` | Full MLP forward pass |
+| `mlp_with_trace(+In, -Out, -Trace)` | Forward pass with trace |
+| `load_weights(+File)` | Load weights from file |
+| `init_random_weights` | Initialize with random weights |
+| `print_architecture` | Display model architecture |
+| `run_tests` | Run self-tests |
+
+## Why Prolog?
+
+The predicate-based approach offers several advantages:
+
+1. **Declarative semantics** - Operations are defined relationally
+2. **Bidirectional reasoning** - Potential for inverse inference
+3. **Compositional** - Predicates chain naturally
+4. **Interpretable** - Structure matches mathematical definitions
+5. **Symbolic integration** - Easy to combine with symbolic AI
+
+## Requirements
+
+- SWI-Prolog 8.0 or later
+- Python 3.8+ (for weight export utility)
diff --git a/PrologEngine/example_weights.pl b/PrologEngine/example_weights.pl
new file mode 100644
index 0000000..5c76c40
--- /dev/null
+++ b/PrologEngine/example_weights.pl
@@ -0,0 +1,86 @@
+% SRO Decoder Ring MLP - Example Weights
+% Generated for testing purposes
+%
+% Load with: ?- consult('example_weights.pl').
+% Then:      ?- mlp([0.5], Energy).
+%
+% Architecture: 1 -> 6 -> 12 -> 24 -> 6 -> 1
+
+:- discontiguous weight/3.
+:- discontiguous bias/2.
+:- discontiguous layer_activation/2.
+:- discontiguous layer_config/3.
+
+% Layer 0: 1 -> 6, swish
+weight(0, 0, [0.12345678]).
+weight(0, 1, [-0.23456789]).
+weight(0, 2, [0.34567890]).
+weight(0, 3, [-0.45678901]).
+weight(0, 4, [0.56789012]).
+weight(0, 5, [-0.67890123]).
+bias(0, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).
+layer_activation(0, swish).
+layer_config(0, 1, 6).
+
+% Layer 1: 6 -> 12, swish
+weight(1, 0, [0.1, -0.1, 0.2, -0.2, 0.1, -0.1]).
+weight(1, 1, [-0.1, 0.1, -0.2, 0.2, -0.1, 0.1]).
+weight(1, 2, [0.15, -0.15, 0.25, -0.25, 0.15, -0.15]).
+weight(1, 3, [-0.15, 0.15, -0.25, 0.25, -0.15, 0.15]).
+weight(1, 4, [0.2, -0.2, 0.3, -0.3, 0.2, -0.2]).
+weight(1, 5, [-0.2, 0.2, -0.3, 0.3, -0.2, 0.2]).
+weight(1, 6, [0.12, -0.12, 0.22, -0.22, 0.12, -0.12]).
+weight(1, 7, [-0.12, 0.12, -0.22, 0.22, -0.12, 0.12]).
+weight(1, 8, [0.18, -0.18, 0.28, -0.28, 0.18, -0.18]).
+weight(1, 9, [-0.18, 0.18, -0.28, 0.28, -0.18, 0.18]).
+weight(1, 10, [0.14, -0.14, 0.24, -0.24, 0.14, -0.14]).
+weight(1, 11, [-0.14, 0.14, -0.24, 0.24, -0.14, 0.14]).
+bias(1, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).
+layer_activation(1, swish).
+layer_config(1, 6, 12).
+
+% Layer 2: 12 -> 24, swish
+weight(2, 0, [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1]).
+weight(2, 1, [-0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1]).
+weight(2, 2, [0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15]).
+weight(2, 3, [-0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15]).
+weight(2, 4, [0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12]).
+weight(2, 5, [-0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12]).
+weight(2, 6, [0.08, -0.08, 0.08, -0.08, 0.08, -0.08, 0.08, -0.08, 0.08, -0.08, 0.08, -0.08]).
+weight(2, 7, [-0.08, 0.08, -0.08, 0.08, -0.08, 0.08, -0.08, 0.08, -0.08, 0.08, -0.08, 0.08]).
+weight(2, 8, [0.11, -0.11, 0.11, -0.11, 0.11, -0.11, 0.11, -0.11, 0.11, -0.11, 0.11, -0.11]).
+weight(2, 9, [-0.11, 0.11, -0.11, 0.11, -0.11, 0.11, -0.11, 0.11, -0.11, 0.11, -0.11, 0.11]).
+weight(2, 10, [0.09, -0.09, 0.09, -0.09, 0.09, -0.09, 0.09, -0.09, 0.09, -0.09, 0.09, -0.09]).
+weight(2, 11, [-0.09, 0.09, -0.09, 0.09, -0.09, 0.09, -0.09, 0.09, -0.09, 0.09, -0.09, 0.09]).
+weight(2, 12, [0.13, -0.13, 0.13, -0.13, 0.13, -0.13, 0.13, -0.13, 0.13, -0.13, 0.13, -0.13]).
+weight(2, 13, [-0.13, 0.13, -0.13, 0.13, -0.13, 0.13, -0.13, 0.13, -0.13, 0.13, -0.13, 0.13]).
+weight(2, 14, [0.07, -0.07, 0.07, -0.07, 0.07, -0.07, 0.07, -0.07, 0.07, -0.07, 0.07, -0.07]).
+weight(2, 15, [-0.07, 0.07, -0.07, 0.07, -0.07, 0.07, -0.07, 0.07, -0.07, 0.07, -0.07, 0.07]).
+weight(2, 16, [0.14, -0.14, 0.14, -0.14, 0.14, -0.14, 0.14, -0.14, 0.14, -0.14, 0.14, -0.14]).
+weight(2, 17, [-0.14, 0.14, -0.14, 0.14, -0.14, 0.14, -0.14, 0.14, -0.14, 0.14, -0.14, 0.14]).
+weight(2, 18, [0.06, -0.06, 0.06, -0.06, 0.06, -0.06, 0.06, -0.06, 0.06, -0.06, 0.06, -0.06]).
+weight(2, 19, [-0.06, 0.06, -0.06, 0.06, -0.06, 0.06, -0.06, 0.06, -0.06, 0.06, -0.06, 0.06]).
+weight(2, 20, [0.16, -0.16, 0.16, -0.16, 0.16, -0.16, 0.16, -0.16, 0.16, -0.16, 0.16, -0.16]).
+weight(2, 21, [-0.16, 0.16, -0.16, 0.16, -0.16, 0.16, -0.16, 0.16, -0.16, 0.16, -0.16, 0.16]).
+weight(2, 22, [0.05, -0.05, 0.05, -0.05, 0.05, -0.05, 0.05, -0.05, 0.05, -0.05, 0.05, -0.05]).
+weight(2, 23, [-0.05, 0.05, -0.05, 0.05, -0.05, 0.05, -0.05, 0.05, -0.05, 0.05, -0.05, 0.05]).
+bias(2, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).
+layer_activation(2, swish).
+layer_config(2, 12, 24).
+
+% Layer 3: 24 -> 6, swish
+weight(3, 0, [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1]).
+weight(3, 1, [-0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1]).
+weight(3, 2, [0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15]).
+weight(3, 3, [-0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15, -0.15, 0.15]).
+weight(3, 4, [0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12]).
+weight(3, 5, [-0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12, -0.12, 0.12]).
+bias(3, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).
+layer_activation(3, swish).
+layer_config(3, 24, 6).
+
+% Layer 4: 6 -> 1, linear (energy output)
+weight(4, 0, [0.2, -0.2, 0.3, -0.3, 0.25, -0.25]).
+bias(4, [0.0]).
+layer_activation(4, linear).
+layer_config(4, 6, 1).
diff --git a/PrologEngine/export_weights_to_prolog.py b/PrologEngine/export_weights_to_prolog.py
new file mode 100644
index 0000000..2175456
--- /dev/null
+++ b/PrologEngine/export_weights_to_prolog.py
@@ -0,0 +1,272 @@
+"""
+Export PyTorch/PredicateMLP weights to SWI-Prolog facts format.
+
+This utility converts trained neural network weights into Prolog facts
+that can be loaded by the sro_decoder_mlp.pl inference engine.
+
+Usage:
+    python export_weights_to_prolog.py --model checkpoint.pt --output weights.pl
+    python export_weights_to_prolog.py --random --output weights.pl  # Random init
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from typing import List, Tuple
+
+# Try to import torch, but allow running without it for format generation
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+
+def format_prolog_list(values: List[float], precision: int = 8) -> str:
+    """Format a list of floats as a Prolog list."""
+    formatted = [f"{v:.{precision}f}" for v in values]
+    return "[" + ", ".join(formatted) + "]"
+
+
+def export_layer_weights(
+    layer_id: int,
+    weights: List[List[float]],
+    bias: List[float],
+    activation: str,
+    precision: int = 8
+) -> str:
+    """
+    Export a single layer's weights to Prolog facts.
+
+    Args:
+        layer_id: Layer index (0-based)
+        weights: Weight matrix as list of rows
+        bias: Bias vector
+        activation: Activation function name (linear, relu, swish, sigmoid, softmax)
+        precision: Decimal precision for floats
+
+    Returns:
+        Prolog facts as string
+    """
+    lines = []
+    lines.append(f"% Layer {layer_id}: {len(weights[0])} -> {len(weights)}, {activation}")
+
+    # Weight rows
+    for row_idx, row in enumerate(weights):
+        prolog_row = format_prolog_list(row, precision)
+        lines.append(f"weight({layer_id}, {row_idx}, {prolog_row}).")
+
+    # Bias
+    prolog_bias = format_prolog_list(bias, precision)
+    lines.append(f"bias({layer_id}, {prolog_bias}).")
+
+    # Activation
+    lines.append(f"layer_activation({layer_id}, {activation}).")
+
+    # Config
+    in_features = len(weights[0])
+    out_features = len(weights)
+    lines.append(f"layer_config({layer_id}, {in_features}, {out_features}).")
+
+    return "\n".join(lines)
+
+
+def export_mlp_weights(
+    layer_weights: List[Tuple[List[List[float]], List[float], str]],
+    output_path: str,
+    model_name: str = "SRO Decoder Ring MLP"
+) -> None:
+    """
+    Export full MLP weights to a Prolog file.
+
+    Args:
+        layer_weights: List of (weights, bias, activation) tuples per layer
+        output_path: Output file path
+        model_name: Name for documentation
+    """
+    with open(output_path, 'w') as f:
+        # Header
+        f.write(f"% {model_name} - Weights\n")
+        f.write("% Generated by export_weights_to_prolog.py\n")
+        f.write("%\n")
+        f.write("% Load with: ?- load_weights('weights.pl').\n")
+        f.write("% Then:      ?- mlp([0.5], Energy).\n")
+        f.write("%\n\n")
+
+        # Discontiguous declarations (required for interleaved facts)
+        f.write(":- discontiguous weight/3.\n")
+        f.write(":- discontiguous bias/2.\n")
+        f.write(":- discontiguous layer_activation/2.\n")
+        f.write(":- discontiguous layer_config/3.\n\n")
+
+        # Export each layer
+        for layer_id, (weights, bias, activation) in enumerate(layer_weights):
+            f.write(export_layer_weights(layer_id, weights, bias, activation))
+            f.write("\n\n")
+
+    print(f"Exported {len(layer_weights)} layers to {output_path}")
+
+
+def generate_random_weights(
+    architecture: List[Tuple[int, int, str]]
+) -> List[Tuple[List[List[float]], List[float], str]]:
+    """
+    Generate random weights for a given architecture.
+
+    Args:
+        architecture: List of (in_features, out_features, activation) tuples
+
+    Returns:
+        List of (weights, bias, activation) tuples
+    """
+    import random
+
+    layer_weights = []
+    for in_features, out_features, activation in architecture:
+        scale = (2.0 / in_features) ** 0.5  # He initialization
+
+        # Weight matrix: out_features rows, in_features columns
+        weights = [
+            [(random.random() * 2 - 1) * scale for _ in range(in_features)]
+            for _ in range(out_features)
+        ]
+
+        # Bias vector
+        bias = [0.0 for _ in range(out_features)]
+
+        layer_weights.append((weights, bias, activation))
+
+    return layer_weights
+
+
+def export_from_pytorch(model_path: str, output_path: str) -> None:
+    """
+    Export weights from a PyTorch checkpoint.
+
+    Args:
+        model_path: Path to PyTorch .pt or .ckpt file
+        output_path: Output Prolog file path
+    """
+    if not TORCH_AVAILABLE:
+        print("Error: PyTorch not available. Install with: pip install torch")
+        sys.exit(1)
+
+    # Load checkpoint
+    checkpoint = torch.load(model_path, map_location='cpu')
+
+    # Handle different checkpoint formats
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+
+    # Extract layer weights
+    # Expected keys: mlp.layers.0.weight, mlp.layers.0.bias, etc.
+    layer_weights = []
+    layer_id = 0
+
+    # Default activations for SRO Decoder architecture
+    activations = ['swish', 'swish', 'swish', 'swish', 'linear']
+
+    while True:
+        # Try different key patterns
+        weight_keys = [
+            f'mlp.layers.{layer_id * 2}.weight',  # With activation layers
+            f'layers.{layer_id * 2}.weight',
+            f'layer_{layer_id}.weight',
+            f'weights.{layer_id}',
+        ]
+
+        weight = None
+        for key in weight_keys:
+            if key in state_dict:
+                weight = state_dict[key]
+                break
+
+        if weight is None:
+            break
+
+        # Find corresponding bias
+        bias_keys = [
+            f'mlp.layers.{layer_id * 2}.bias',
+            f'layers.{layer_id * 2}.bias',
+            f'layer_{layer_id}.bias',
+            f'biases.{layer_id}',
+        ]
+
+        bias = None
+        for key in bias_keys:
+            if key in state_dict:
+                bias = state_dict[key]
+                break
+
+        if bias is None:
+            bias = torch.zeros(weight.shape[0])
+
+        # Convert to lists
+        # Note: PyTorch Linear stores weights as [out_features, in_features]
+        # but our mat_vec expects [out_features, in_features] (rows are output neurons)
+        weights_list = weight.tolist()
+        bias_list = bias.tolist()
+
+        activation = activations[layer_id] if layer_id < len(activations) else 'linear'
+        layer_weights.append((weights_list, bias_list, activation))
+
+        layer_id += 1
+
+    if not layer_weights:
+        print(f"Error: Could not find weights in {model_path}")
+        print(f"Available keys: {list(state_dict.keys())}")
+        sys.exit(1)
+
+    export_mlp_weights(layer_weights, output_path)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Export neural network weights to SWI-Prolog format'
+    )
+    parser.add_argument(
+        '--model', '-m',
+        help='Path to PyTorch model checkpoint (.pt or .ckpt)'
+    )
+    parser.add_argument(
+        '--output', '-o',
+        default='weights.pl',
+        help='Output Prolog file path (default: weights.pl)'
+    )
+    parser.add_argument(
+        '--random', '-r',
+        action='store_true',
+        help='Generate random weights instead of loading from file'
+    )
+    parser.add_argument(
+        '--precision', '-p',
+        type=int,
+        default=8,
+        help='Decimal precision for weights (default: 8)'
+    )
+
+    args = parser.parse_args()
+
+    if args.random:
+        # SRO Decoder Ring architecture: 1 -> 6 -> 12 -> 24 -> 6 -> 1
+        architecture = [
+            (1, 6, 'swish'),
+            (6, 12, 'swish'),
+            (12, 24, 'swish'),
+            (24, 6, 'swish'),
+            (6, 1, 'linear'),
+        ]
+        layer_weights = generate_random_weights(architecture)
+        export_mlp_weights(layer_weights, args.output)
+    elif args.model:
+        export_from_pytorch(args.model, args.output)
+    else:
+        parser.print_help()
+        print("\nError: Specify --model or --random")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PrologEngine/sro_decoder_mlp.pl b/PrologEngine/sro_decoder_mlp.pl
new file mode 100644
index 0000000..3364921
--- /dev/null
+++ b/PrologEngine/sro_decoder_mlp.pl
@@ -0,0 +1,408 @@
+/**
+ * SRO Decoder Ring MLP - SWI-Prolog Implementation
+ *
+ * Implements neural network inference using relational predicates:
+ *   - mat_vec(M, V_in, V_out): Matrix-vector multiplication
+ *   - vec_add(A, B, C): Vector addition
+ *   - activation(Type, V_in, V_out): Activation functions
+ *
+ * Architecture: 1 -> 6 -> 12 -> 24 -> 6 -> 1 (energy output)
+ *
+ * Usage:
+ *   ?- consult('sro_decoder_mlp.pl').
+ *   ?- mlp([0.5], Energy).
+ *   ?- mlp_with_trace([0.5], Energy, Trace).
+ */
+
+:- module(sro_decoder_mlp, [
+    mat_vec/3,
+    vec_add/3,
+    activation/3,
+    dense_layer/5,
+    mlp/2,
+    mlp_with_trace/3,
+    load_weights/1,
+    export_prolog_weights/2
+]).
+
+:- use_module(library(lists)).
+:- use_module(library(apply)).
+
+% =============================================================================
+% Core Predicates
+% =============================================================================
+
+/**
+ * mat_vec(+Matrix, +Vector, -Result)
+ *
+ * Matrix-vector multiplication predicate.
+ * Matrix is a list of rows, Vector is a list of values.
+ * Result[i] = sum(Matrix[i][j] * Vector[j]) for all j
+ *
+ * Example:
+ *   ?- mat_vec([[1,2],[3,4]], [1,1], R).
+ *   R = [3, 7].
+ */
+mat_vec(Matrix, Vector, Result) :-
+    maplist(dot_product(Vector), Matrix, Result).
+
+/**
+ * dot_product(+V1, +V2, -Product)
+ * Compute dot product of two vectors.
+ */
+dot_product(V1, V2, Product) :-
+    maplist(multiply, V1, V2, Products),
+    sum_list(Products, Product).
+
+multiply(X, Y, Z) :- Z is X * Y.
+
+/**
+ * vec_add(+A, +B, -C)
+ *
+ * Element-wise vector addition predicate.
+ * C[i] = A[i] + B[i]
+ *
+ * Example:
+ *   ?- vec_add([1,2,3], [0.5,0.5,0.5], R).
+ *   R = [1.5, 2.5, 3.5].
+ */
+vec_add(A, B, C) :-
+    maplist(add, A, B, C).
+
+add(X, Y, Z) :- Z is X + Y.
+
+/**
+ * activation(+Type, +V_in, -V_out)
+ *
+ * Apply activation function element-wise.
+ * Supported types: linear, relu, swish, sigmoid, softmax
+ *
+ * Example:
+ *   ?- activation(relu, [-1, 0, 1], R).
+ *   R = [0, 0, 1].
+ */
+activation(linear, V, V).
+
+activation(relu, V_in, V_out) :-
+    maplist(relu_fn, V_in, V_out).
+
+activation(swish, V_in, V_out) :-
+    maplist(swish_fn, V_in, V_out).
+
+activation(sigmoid, V_in, V_out) :-
+    maplist(sigmoid_fn, V_in, V_out).
+
+activation(softmax, V_in, V_out) :-
+    softmax_fn(V_in, V_out).
+
+% Activation function implementations
+relu_fn(X, Y) :- X >= 0 -> Y = X ; Y = 0.
+
+sigmoid_fn(X, Y) :- Y is 1 / (1 + exp(-X)).
+
+swish_fn(X, Y) :-
+    Sig is 1 / (1 + exp(-X)),
+    Y is X * Sig.
+
+softmax_fn(V_in, V_out) :-
+    max_list(V_in, Max),
+    maplist({Max}/[X, Y]>>(Y is exp(X - Max)), V_in, Exps),
+    sum_list(Exps, Sum),
+    maplist({Sum}/[E, S]>>(S is E / Sum), Exps, V_out).
+
+% =============================================================================
+% Layer Predicates
+% =============================================================================
+
+/**
+ * dense_layer(+Weights, +Bias, +Activation, +V_in, -V_out)
+ *
+ * Dense (fully connected) layer predicate.
+ * Computes: V_out = activation(V_in @ Weights + Bias)
+ *
+ * This is the composition:
+ *   dense_layer(W, B, Act, V_in, V_out) :-
+ *       mat_vec(W, V_in, Z),
+ *       vec_add(Z, B, A),
+ *       activation(Act, A, V_out).
+ */
+dense_layer(Weights, Bias, Activation, V_in, V_out) :-
+    mat_vec(Weights, V_in, Z),
+    vec_add(Z, Bias, A),
+    activation(Activation, A, V_out).
+
+/**
+ * dense_layer_traced(+LayerId, +Weights, +Bias, +Activation, +V_in, -V_out, -Trace)
+ *
+ * Dense layer with trace of intermediate computations.
+ */
+dense_layer_traced(LayerId, Weights, Bias, Activation, V_in, V_out, Trace) :-
+    mat_vec(Weights, V_in, Z),
+    vec_add(Z, Bias, A),
+    activation(Activation, A, V_out),
+    Trace = trace(LayerId, V_in, Z, A, V_out).
+
+% =============================================================================
+% Weight Storage (Dynamic Predicates)
+% =============================================================================
+
+:- dynamic weight/3.    % weight(LayerId, RowIndex, Row)
+:- dynamic bias/2.      % bias(LayerId, BiasVector)
+:- dynamic layer_config/3.  % layer_config(LayerId, InFeatures, OutFeatures)
+:- dynamic layer_activation/2.  % layer_activation(LayerId, ActivationType)
+
+/**
+ * get_weight_matrix(+LayerId, -Matrix)
+ * Retrieve the weight matrix for a layer.
+ */
+get_weight_matrix(LayerId, Matrix) :-
+    findall(Row, weight(LayerId, _, Row), Matrix).
+
+/**
+ * get_bias_vector(+LayerId, -Bias)
+ * Retrieve the bias vector for a layer.
+ */
+get_bias_vector(LayerId, Bias) :-
+    bias(LayerId, Bias).
+
+/**
+ * get_layer_activation(+LayerId, -Activation)
+ * Get activation type for a layer.
+ */
+get_layer_activation(LayerId, Activation) :-
+    layer_activation(LayerId, Activation).
+
+% =============================================================================
+% MLP Architecture
+% =============================================================================
+
+/**
+ * mlp(+Input, -Output)
+ *
+ * Full MLP forward pass.
+ * Architecture: 1 -> 6 -> 12 -> 24 -> 6 -> 1
+ *
+ * Example:
+ *   ?- mlp([0.5], Energy).
+ */
+mlp(Input, Output) :-
+    layer(0, Input, H1),
+    layer(1, H1, H2),
+    layer(2, H2, H3),
+    layer(3, H3, H4),
+    layer(4, H4, Output).
+
+/**
+ * layer(+LayerId, +Input, -Output)
+ *
+ * Compute single layer given its ID.
+ */
+layer(LayerId, Input, Output) :-
+    get_weight_matrix(LayerId, Weights),
+    get_bias_vector(LayerId, Bias),
+    get_layer_activation(LayerId, Activation),
+    dense_layer(Weights, Bias, Activation, Input, Output).
+
+/**
+ * mlp_with_trace(+Input, -Output, -Trace)
+ *
+ * MLP forward pass with trace of all intermediate activations.
+ * Useful for debugging and interpretability.
+ */
+mlp_with_trace(Input, Output, Trace) :-
+    layer_traced(0, Input, H1, T1),
+    layer_traced(1, H1, H2, T2),
+    layer_traced(2, H2, H3, T3),
+    layer_traced(3, H3, H4, T4),
+    layer_traced(4, H4, Output, T5),
+    Trace = [T1, T2, T3, T4, T5].
+
+layer_traced(LayerId, Input, Output, Trace) :-
+    get_weight_matrix(LayerId, Weights),
+    get_bias_vector(LayerId, Bias),
+    get_layer_activation(LayerId, Activation),
+    dense_layer_traced(LayerId, Weights, Bias, Activation, Input, Output, Trace).
+
+% =============================================================================
+% Weight Loading
+% =============================================================================
+
+/**
+ * load_weights(+Filename)
+ *
+ * Load weights from a Prolog facts file.
+ * File format:
+ *   weight(LayerId, RowIndex, [w1, w2, ...]).
+ *   bias(LayerId, [b1, b2, ...]).
+ *   layer_activation(LayerId, swish).
+ */
+load_weights(Filename) :-
+    clear_weights,
+    consult(Filename).
+
+/**
+ * clear_weights/0
+ * Remove all loaded weights.
+ */
+clear_weights :-
+    retractall(weight(_, _, _)),
+    retractall(bias(_, _)),
+    retractall(layer_config(_, _, _)),
+    retractall(layer_activation(_, _)).
+
+% =============================================================================
+% Default Weights (Random Initialization for Testing)
+% =============================================================================
+
+/**
+ * init_random_weights/0
+ * Initialize with small random weights for testing.
+ */
+init_random_weights :-
+    clear_weights,
+    init_layer(0, 1, 6, swish),
+    init_layer(1, 6, 12, swish),
+    init_layer(2, 12, 24, swish),
+    init_layer(3, 24, 6, swish),
+    init_layer(4, 6, 1, linear).
+
+init_layer(LayerId, InFeatures, OutFeatures, Activation) :-
+    assertz(layer_config(LayerId, InFeatures, OutFeatures)),
+    assertz(layer_activation(LayerId, Activation)),
+    Scale is sqrt(2.0 / InFeatures),
+    init_weight_rows(LayerId, 0, OutFeatures, InFeatures, Scale),
+    init_bias(LayerId, OutFeatures).
+
+init_weight_rows(_, RowIdx, OutFeatures, _, _) :-
+    RowIdx >= OutFeatures, !.
+init_weight_rows(LayerId, RowIdx, OutFeatures, InFeatures, Scale) :-
+    random_vector(InFeatures, Scale, Row),
+    assertz(weight(LayerId, RowIdx, Row)),
+    NextRow is RowIdx + 1,
+    init_weight_rows(LayerId, NextRow, OutFeatures, InFeatures, Scale).
+
+init_bias(LayerId, Size) :-
+    length(Bias, Size),
+    maplist(=(0.0), Bias),
+    assertz(bias(LayerId, Bias)).
+
+random_vector(0, _, []) :- !.
+random_vector(N, Scale, [V|Rest]) :-
+    N > 0,
+    random(R),
+    V is (R * 2 - 1) * Scale,
+    N1 is N - 1,
+    random_vector(N1, Scale, Rest).
+
+% =============================================================================
+% Weight Export (for interoperability)
+% =============================================================================
+
+/**
+ * export_prolog_weights(+Model, +Filename)
+ * Export current weights to a Prolog facts file.
+ */
+export_prolog_weights(_, Filename) :-
+    open(Filename, write, Stream),
+    write(Stream, '% SRO Decoder Ring MLP Weights\n'),
+    write(Stream, '% Generated from Prolog\n\n'),
+    export_all_weights(Stream),
+    export_all_biases(Stream),
+    export_all_activations(Stream),
+    close(Stream).
+
+export_all_weights(Stream) :-
+    forall(weight(L, R, W),
+           format(Stream, 'weight(~w, ~w, ~w).~n', [L, R, W])).
+
+export_all_biases(Stream) :-
+    forall(bias(L, B),
+           format(Stream, 'bias(~w, ~w).~n', [L, B])).
+
+export_all_activations(Stream) :-
+    forall(layer_activation(L, A),
+           format(Stream, 'layer_activation(~w, ~w).~n', [L, A])).
+
+% =============================================================================
+% Utility Predicates
+% =============================================================================
+
+/**
+ * print_architecture/0
+ * Display the current MLP architecture.
+ */
+print_architecture :-
+    format('SRO Decoder Ring MLP Architecture~n'),
+    format('================================~n'),
+    forall(layer_config(L, In, Out),
+           (layer_activation(L, Act),
+            format('Layer ~w: ~w -> ~w (~w)~n', [L, In, Out, Act]))).
+
+/**
+ * verify_predicate(+Pred, +Expected)
+ * Verify a predicate produces expected output.
+ */
+verify_predicate(Goal, Expected) :-
+    call(Goal) ->
+        (Goal = Expected ->
+            format('PASS: ~w~n', [Goal])
+        ;
+            format('FAIL: Expected ~w, got ~w~n', [Expected, Goal]))
+    ;
+        format('FAIL: Goal ~w failed~n', [Goal]).
+
+% =============================================================================
+% Self-Test
+% =============================================================================
+
+/**
+ * run_tests/0
+ * Run basic self-tests.
+ */
+run_tests :-
+    format('~n=== SRO Decoder MLP Tests ===~n~n'),
+
+    % Test mat_vec
+    format('Test mat_vec: '),
+    (mat_vec([[1,2],[3,4]], [1,1], [3,7]) ->
+        format('PASS~n') ; format('FAIL~n')),
+
+    % Test vec_add
+    format('Test vec_add: '),
+    (vec_add([1,2], [0.5,0.5], [1.5,2.5]) ->
+        format('PASS~n') ; format('FAIL~n')),
+
+    % Test relu
+    format('Test relu: '),
+    (activation(relu, [-1,0,1], [0,0,1]) ->
+        format('PASS~n') ; format('FAIL~n')),
+
+    % Test swish at 0
+    format('Test swish(0): '),
+    (activation(swish, [0], [0.0]) ->
+        format('PASS~n') ; format('FAIL~n')),
+
+    % Test sigmoid at 0
+    format('Test sigmoid(0): '),
+    (activation(sigmoid, [0], [0.5]) ->
+        format('PASS~n') ; format('FAIL~n')),
+
+    % Test dense layer
+    format('Test dense_layer: '),
+    (dense_layer([[1,0],[0,1]], [0.5,0.5], linear, [1,2], [1.5,2.5]) ->
+        format('PASS~n') ; format('FAIL~n')),
+
+    % Test full MLP (with random weights)
+    format('~nTest full MLP with random weights:~n'),
+    init_random_weights,
+    print_architecture,
+    format('~nRunning mlp([0.5], Output):~n'),
+    (mlp([0.5], Output) ->
+        format('Output: ~w~n', [Output])
+    ;
+        format('MLP failed~n')),
+
+    format('~n=== Tests Complete ===~n').
+
+% Auto-initialize on load (comment out for production)
+% :- init_random_weights.
diff --git a/PrologEngine/tensor_autodiff.pl b/PrologEngine/tensor_autodiff.pl
new file mode 100644
index 0000000..ea67f32
--- /dev/null
+++ b/PrologEngine/tensor_autodiff.pl
@@ -0,0 +1,737 @@
+/**
+ * Tensor Computation Graph with Automatic Differentiation
+ *
+ * This module implements a computational graph framework in SWI-Prolog
+ * that builds execution graphs suitable for automatic differentiation.
+ *
+ * Core predicates:
+ *   - dense(Weights, Bias, In, Out)    : Dense layer operation
+ *   - relu(In, Out)                     : ReLU activation
+ *   - swish(In, Out)                    : Swish activation
+ *   - mse(Expected, Actual, Loss)       : Mean squared error loss
+ *   - forward(Graph, Input, Output)     : Execute forward pass
+ *   - backward(Graph, Loss, Gradients)  : Compute gradients (autodiff)
+ *
+ * Usage:
+ *   ?- build_graph(mlp, Graph), forward(Graph, [0.5], Out), backward(Graph, Out, Grads).
+ */
+
+:- module(tensor_autodiff, [
+    % Tensor operations (build graph)
+    dense/4,
+    bias_add/3,
+    relu/2,
+    swish/2,
+    sigmoid/2,
+    softmax/2,
+    mse/3,
+    mae/3,
+
+    % Graph operations
+    build_mlp/3,
+    forward/3,
+    backward/2,
+
+    % Tensor utilities
+    tensor/2,
+    tensor_shape/2,
+    zeros/2,
+    ones/2,
+    random_tensor/3,
+    from_list/2,
+
+    % Graph utilities
+    reset_graph/0,
+    print_graph/0,
+    export_graph/2,
+
+    % Testing
+    run_autodiff_tests/0
+]).
+
+:- use_module(library(lists)).
+:- use_module(library(apply)).
+
+% Discontiguous declarations
+:- discontiguous grad/4.
+
+% =============================================================================
+% Tensor Representation
+% =============================================================================
+
+/**
+ * tensor(Id, Data)
+ *
+ * A tensor is represented as tensor(Id, Data) where:
+ * - Id: unique identifier for the tensor node in the graph
+ * - Data: the actual numerical data (list or nested list)
+ *
+ * Tensors can also be symbolic placeholders:
+ * - input(Name): Input placeholder
+ * - param(Name): Trainable parameter
+ * - const(Value): Constant value
+ */
+
+:- dynamic tensor_store/2.      % tensor_store(Id, Data)
+:- dynamic tensor_shape/2.      % tensor_shape(Id, Shape)
+:- dynamic tensor_grad/2.       % tensor_grad(Id, Gradient)
+:- dynamic graph_node/3.        % graph_node(OutputId, Op, Inputs)
+:- dynamic param/2.             % param(Name, TensorId)
+
+% Generate unique tensor ID
+:- dynamic tensor_counter/1.
+tensor_counter(0).
+
+new_tensor_id(Id) :-
+    retract(tensor_counter(N)),
+    Id is N + 1,
+    assertz(tensor_counter(Id)).
+
+reset_graph :-
+    retractall(tensor_store(_, _)),
+    retractall(tensor_shape(_, _)),
+    retractall(tensor_grad(_, _)),
+    retractall(graph_node(_, _, _)),
+    retractall(param(_, _)),
+    retractall(tensor_counter(_)),
+    assertz(tensor_counter(0)).
+
+% =============================================================================
+% Tensor Constructors
+% =============================================================================
+
+/**
+ * tensor(+Shape, -Tensor)
+ * Create a new tensor with given shape (uninitialized).
+ */
+tensor(Shape, tensor(Id, Shape)) :-
+    new_tensor_id(Id),
+    assertz(tensor_shape(Id, Shape)).
+
+/**
+ * zeros(+Shape, -Tensor)
+ * Create a tensor filled with zeros.
+ */
+zeros(Shape, tensor(Id, Data)) :-
+    new_tensor_id(Id),
+    create_zeros(Shape, Data),
+    assertz(tensor_store(Id, Data)),
+    assertz(tensor_shape(Id, Shape)).
+
+create_zeros([], 0.0) :- !.
+create_zeros([N], List) :- !,
+    length(List, N),
+    maplist(=(0.0), List).
+create_zeros([N|Rest], List) :-
+    length(List, N),
+    maplist(create_zeros(Rest), List).
+
+/**
+ * ones(+Shape, -Tensor)
+ * Create a tensor filled with ones.
+ */
+ones(Shape, tensor(Id, Data)) :-
+    new_tensor_id(Id),
+    create_ones(Shape, Data),
+    assertz(tensor_store(Id, Data)),
+    assertz(tensor_shape(Id, Shape)).
+
+create_ones([], 1.0) :- !.
+create_ones([N], List) :- !,
+    length(List, N),
+    maplist(=(1.0), List).
+create_ones([N|Rest], List) :-
+    length(List, N),
+    maplist(create_ones(Rest), List).
+
+/**
+ * random_tensor(+Shape, +Scale, -Tensor)
+ * Create a tensor with random values scaled by Scale.
+ */
+random_tensor(Shape, Scale, tensor(Id, Data)) :-
+    new_tensor_id(Id),
+    create_random(Shape, Scale, Data),
+    assertz(tensor_store(Id, Data)),
+    assertz(tensor_shape(Id, Shape)).
+
+create_random([], Scale, V) :- !,
+    random(R),
+    V is (R * 2 - 1) * Scale.
+create_random([N], Scale, List) :- !,
+    length(List, N),
+    maplist({Scale}/[V]>>(random(R), V is (R * 2 - 1) * Scale), List).
+create_random([N|Rest], Scale, List) :-
+    length(List, N),
+    maplist(create_random(Rest, Scale), List).
+
+/**
+ * from_list(+Data, -Tensor)
+ * Create a tensor from a nested list.
+ */
+from_list(Data, tensor(Id, Data)) :-
+    new_tensor_id(Id),
+    infer_shape(Data, Shape),
+    assertz(tensor_store(Id, Data)),
+    assertz(tensor_shape(Id, Shape)).
+
+infer_shape(X, []) :- number(X), !.
+infer_shape([], [0]) :- !.
+infer_shape([H|T], [N|Rest]) :-
+    length([H|T], N),
+    infer_shape(H, Rest).
+
+% =============================================================================
+% Graph Node Representation
+% =============================================================================
+
+/**
+ * Graph nodes represent operations in the computation graph.
+ *
+ * node(Id, Op, Inputs, Output, GradFn)
+ * - Id: unique node identifier
+ * - Op: operation name (dense, relu, mse, etc.)
+ * - Inputs: list of input tensor IDs
+ * - Output: output tensor ID
+ * - GradFn: gradient function for backprop
+ */
+
+:- dynamic node/5.
+:- dynamic node_counter/1.
+node_counter(0).
+
+new_node_id(Id) :-
+    retract(node_counter(N)),
+    Id is N + 1,
+    assertz(node_counter(Id)).
+
+add_node(Op, Inputs, Output, GradFn) :-
+    new_node_id(NodeId),
+    assertz(node(NodeId, Op, Inputs, Output, GradFn)),
+    assertz(graph_node(Output, Op, Inputs)).
+
+% =============================================================================
+% Core Operations (Build Computation Graph)
+% =============================================================================
+
+/**
+ * dense(+Weights, +Bias, +Input, -Output)
+ *
+ * Dense (fully connected) layer: Output = Input @ Weights + Bias
+ *
+ * Builds a graph node for the operation.
+ * Gradient: dL/dW = Input^T @ dL/dOut, dL/dIn = dL/dOut @ W^T
+ */
+dense(Weights, Bias, Input, Output) :-
+    % Get tensor IDs
+    tensor_id(Weights, WId),
+    tensor_id(Bias, BId),
+    tensor_id(Input, InId),
+
+    % Compute forward pass
+    get_tensor_data(WId, WData),
+    get_tensor_data(BId, BData),
+    get_tensor_data(InId, InData),
+
+    mat_vec_compute(WData, InData, MvResult),
+    vec_add_compute(MvResult, BData, OutData),
+
+    % Create output tensor
+    new_tensor_id(OutId),
+    assertz(tensor_store(OutId, OutData)),
+    Output = tensor(OutId, OutData),
+
+    % Record in graph with gradient function
+    add_node(dense, [WId, BId, InId], OutId, grad_dense(WId, BId, InId)).
+
+/**
+ * bias_add(+Bias, +Input, -Output)
+ *
+ * Add bias to input: Output = Input + Bias
+ */
+bias_add(Bias, Input, Output) :-
+    tensor_id(Bias, BId),
+    tensor_id(Input, InId),
+
+    get_tensor_data(BId, BData),
+    get_tensor_data(InId, InData),
+
+    vec_add_compute(InData, BData, OutData),
+
+    new_tensor_id(OutId),
+    assertz(tensor_store(OutId, OutData)),
+    Output = tensor(OutId, OutData),
+
+    add_node(bias_add, [BId, InId], OutId, grad_bias_add(BId, InId)).
+
+/**
+ * relu(+Input, -Output)
+ *
+ * ReLU activation: Output = max(0, Input)
+ * Gradient: dL/dIn = dL/dOut * (Input > 0 ? 1 : 0)
+ */
+relu(Input, Output) :-
+    tensor_id(Input, InId),
+    get_tensor_data(InId, InData),
+
+    map_relu(InData, OutData),
+
+    new_tensor_id(OutId),
+    assertz(tensor_store(OutId, OutData)),
+    Output = tensor(OutId, OutData),
+
+    add_node(relu, [InId], OutId, grad_relu(InId)).
+
+map_relu(X, Y) :- number(X), !, (X > 0 -> Y = X ; Y = 0.0).
+map_relu(List, Result) :-
+    is_list(List),
+    maplist(map_relu, List, Result).
+
+/**
+ * swish(+Input, -Output)
+ *
+ * Swish activation: Output = Input * sigmoid(Input)
+ * Gradient: dL/dIn = dL/dOut * (swish(x) + sigmoid(x) * (1 - swish(x)))
+ */
+swish(Input, Output) :-
+    tensor_id(Input, InId),
+    get_tensor_data(InId, InData),
+
+    map_swish(InData, OutData),
+
+    new_tensor_id(OutId),
+    assertz(tensor_store(OutId, OutData)),
+    Output = tensor(OutId, OutData),
+
+    add_node(swish, [InId], OutId, grad_swish(InId)).
+
+map_swish(X, Y) :-
+    number(X), !,
+    Sig is 1 / (1 + exp(-X)),
+    Y is X * Sig.
+map_swish(List, Result) :-
+    is_list(List),
+    maplist(map_swish, List, Result).
+
+/**
+ * sigmoid(+Input, -Output)
+ *
+ * Sigmoid activation: Output = 1 / (1 + exp(-Input))
+ * Gradient: dL/dIn = dL/dOut * sigmoid(x) * (1 - sigmoid(x))
+ */
+sigmoid(Input, Output) :-
+    tensor_id(Input, InId),
+    get_tensor_data(InId, InData),
+
+    map_sigmoid(InData, OutData),
+
+    new_tensor_id(OutId),
+    assertz(tensor_store(OutId, OutData)),
+    Output = tensor(OutId, OutData),
+
+    add_node(sigmoid, [InId], OutId, grad_sigmoid(InId)).
+
+map_sigmoid(X, Y) :- number(X), !, Y is 1 / (1 + exp(-X)).
+map_sigmoid(List, Result) :-
+    is_list(List),
+    maplist(map_sigmoid, List, Result).
+
+/**
+ * softmax(+Input, -Output)
+ *
+ * Softmax activation (for classification)
+ */
+softmax(Input, Output) :-
+    tensor_id(Input, InId),
+    get_tensor_data(InId, InData),
+
+    compute_softmax(InData, OutData),
+
+    new_tensor_id(OutId),
+    assertz(tensor_store(OutId, OutData)),
+    Output = tensor(OutId, OutData),
+
+    add_node(softmax, [InId], OutId, grad_softmax(InId)).
+
+compute_softmax(List, Result) :-
+    max_list(List, Max),
+    maplist({Max}/[X, E]>>(E is exp(X - Max)), List, Exps),
+    sum_list(Exps, Sum),
+    maplist({Sum}/[E, S]>>(S is E / Sum), Exps, Result).
+
+/**
+ * mse(+Expected, +Actual, -Loss)
+ *
+ * Mean Squared Error loss: Loss = mean((Expected - Actual)^2)
+ * Gradient: dL/dActual = 2 * (Actual - Expected) / N
+ */
+mse(Expected, Actual, Loss) :-
+    tensor_id(Expected, ExpId),
+    tensor_id(Actual, ActId),
+
+    get_tensor_data(ExpId, ExpData),
+    get_tensor_data(ActId, ActData),
+
+    compute_mse(ExpData, ActData, LossVal),
+
+    new_tensor_id(LossId),
+    assertz(tensor_store(LossId, LossVal)),
+    Loss = tensor(LossId, LossVal),
+
+    add_node(mse, [ExpId, ActId], LossId, grad_mse(ExpId, ActId)).
+
+compute_mse(Exp, Act, Loss) :-
+    flatten(Exp, ExpFlat),
+    flatten(Act, ActFlat),
+    maplist([E, A, D]>>(D is (E - A)^2), ExpFlat, ActFlat, Diffs),
+    sum_list(Diffs, Sum),
+    length(Diffs, N),
+    Loss is Sum / N.
+
+/**
+ * mae(+Expected, +Actual, -Loss)
+ *
+ * Mean Absolute Error loss: Loss = mean(|Expected - Actual|)
+ */
+mae(Expected, Actual, Loss) :-
+    tensor_id(Expected, ExpId),
+    tensor_id(Actual, ActId),
+
+    get_tensor_data(ExpId, ExpData),
+    get_tensor_data(ActId, ActData),
+
+    compute_mae(ExpData, ActData, LossVal),
+
+    new_tensor_id(LossId),
+    assertz(tensor_store(LossId, LossVal)),
+    Loss = tensor(LossId, LossVal),
+
+    add_node(mae, [ExpId, ActId], LossId, grad_mae(ExpId, ActId)).
+
+compute_mae(Exp, Act, Loss) :-
+    flatten(Exp, ExpFlat),
+    flatten(Act, ActFlat),
+    maplist([E, A, D]>>(D is abs(E - A)), ExpFlat, ActFlat, Diffs),
+    sum_list(Diffs, Sum),
+    length(Diffs, N),
+    Loss is Sum / N.
+
+% =============================================================================
+% Helper Functions
+% =============================================================================
+
+tensor_id(tensor(Id, _), Id) :- !.
+tensor_id(Id, Id) :- integer(Id).
+
+get_tensor_data(Id, Data) :-
+    tensor_store(Id, Data), !.
+get_tensor_data(tensor(_, Data), Data).
+
+mat_vec_compute(Matrix, Vector, Result) :-
+    maplist(dot_product_compute(Vector), Matrix, Result).
+
+dot_product_compute(V1, V2, Product) :-
+    maplist([X, Y, Z]>>(Z is X * Y), V1, V2, Products),
+    sum_list(Products, Product).
+
+vec_add_compute(A, B, C) :-
+    maplist([X, Y, Z]>>(Z is X + Y), A, B, C).
+
+% =============================================================================
+% Gradient Functions (Backward Pass)
+% =============================================================================
+
+/**
+ * grad(+Op, +Inputs, +GradOutput, -GradInputs)
+ *
+ * Compute gradients for each operation type.
+ */
+
+% ReLU gradient: pass through where input > 0
+grad(relu, [InId], GradOut, [GradIn]) :-
+    get_tensor_data(InId, InData),
+    compute_relu_grad(InData, GradOut, GradIn).
+
+compute_relu_grad(In, GradOut, GradIn) :-
+    number(In), !,
+    (In > 0 -> GradIn = GradOut ; GradIn = 0).
+compute_relu_grad(InList, GradOutList, GradInList) :-
+    maplist(compute_relu_grad, InList, GradOutList, GradInList).
+
+% Swish gradient: swish'(x) = swish(x) + sigmoid(x)(1 - swish(x))
+grad(swish, [InId], GradOut, [GradIn]) :-
+    get_tensor_data(InId, InData),
+    compute_swish_grad(InData, GradOut, GradIn).
+
+compute_swish_grad(In, GradOut, GradIn) :-
+    number(In), !,
+    Sig is 1 / (1 + exp(-In)),
+    Swish is In * Sig,
+    Deriv is Swish + Sig * (1 - Swish),
+    GradIn is GradOut * Deriv.
+compute_swish_grad(InList, GradOutList, GradInList) :-
+    maplist(compute_swish_grad, InList, GradOutList, GradInList).
+
+% Sigmoid gradient: sigmoid'(x) = sigmoid(x) * (1 - sigmoid(x))
+grad(sigmoid, [InId], GradOut, [GradIn]) :-
+    get_tensor_data(InId, InData),
+    compute_sigmoid_grad(InData, GradOut, GradIn).
+
+compute_sigmoid_grad(In, GradOut, GradIn) :-
+    number(In), !,
+    Sig is 1 / (1 + exp(-In)),
+    Deriv is Sig * (1 - Sig),
+    GradIn is GradOut * Deriv.
+compute_sigmoid_grad(InList, GradOutList, GradInList) :-
+    maplist(compute_sigmoid_grad, InList, GradOutList, GradInList).
+
+% MSE gradient: 2 * (actual - expected) / N
+grad(mse, [ExpId, ActId], GradOut, [GradExp, GradAct]) :-
+    get_tensor_data(ExpId, ExpData),
+    get_tensor_data(ActId, ActData),
+    compute_mse_grad(ExpData, ActData, GradOut, GradExp, GradAct).
+
+compute_mse_grad(Exp, Act, GradOut, GradExp, GradAct) :-
+    flatten(Exp, ExpFlat),
+    flatten(Act, ActFlat),
+    length(ExpFlat, N),
+    Scale is 2 * GradOut / N,
+    maplist({Scale}/[E, A, GE]>>(GE is Scale * (E - A)), ExpFlat, ActFlat, GradExp),
+    maplist({Scale}/[E, A, GA]>>(GA is Scale * (A - E)), ExpFlat, ActFlat, GradAct).
+
+% Dense layer gradient
+grad(dense, [WId, _BId, InId], GradOut, [GradW, GradB, GradIn]) :-
+    get_tensor_data(WId, WData),
+    get_tensor_data(InId, InData),
+    compute_dense_grad(WData, InData, GradOut, GradW, GradB, GradIn).
+
+compute_dense_grad(W, In, GradOut, GradW, GradB, GradIn) :-
+    % GradB = GradOut
+    GradB = GradOut,
+    % GradW[i][j] = In[j] * GradOut[i]
+    maplist({In}/[GO, Row]>>maplist({GO}/[I, G]>>(G is I * GO), In, Row), GradOut, GradW),
+    % GradIn[j] = sum_i(W[i][j] * GradOut[i])
+    transpose_matrix(W, WT),
+    mat_vec_compute(WT, GradOut, GradIn).
+
+transpose_matrix([], []) :- !.
+transpose_matrix([[]|_], []) :- !.
+transpose_matrix(Matrix, [Row|Rows]) :-
+    maplist(nth0(0), Matrix, Row),
+    maplist(select_tail, Matrix, RestMatrix),
+    transpose_matrix(RestMatrix, Rows).
+
+select_tail([_|T], T).
+
+% =============================================================================
+% Backward Pass (Automatic Differentiation)
+% =============================================================================
+
+/**
+ * backward(+LossTensor, -Gradients)
+ *
+ * Perform backward pass through the computation graph.
+ * Returns gradients for all parameters.
+ */
+backward(LossTensor, Gradients) :-
+    tensor_id(LossTensor, LossId),
+    % Start with gradient of 1 for the loss
+    backward_from(LossId, 1.0, Gradients).
+
+backward_from(NodeId, GradOut, Gradients) :-
+    (graph_node(NodeId, Op, Inputs) ->
+        % Compute gradients for this node
+        grad(Op, Inputs, GradOut, GradInputs),
+        % Recursively backprop to inputs
+        maplist(backward_from_pair, Inputs, GradInputs, GradLists),
+        append(GradLists, Gradients)
+    ;
+        % Leaf node (parameter or input)
+        Gradients = [(NodeId, GradOut)]
+    ).
+
+backward_from_pair(NodeId, GradOut, Gradients) :-
+    backward_from(NodeId, GradOut, Gradients).
+
+% =============================================================================
+% Graph Building Utilities
+% =============================================================================
+
+/**
+ * build_mlp(+LayerSizes, +Activations, -Graph)
+ *
+ * Build an MLP computation graph.
+ * Example: build_mlp([1, 6, 12, 24, 6, 1], [swish, swish, swish, swish, linear], Graph)
+ */
+build_mlp(Sizes, Activations, graph(Params, InputId, OutputId)) :-
+    reset_graph,
+    % Create input placeholder
+    new_tensor_id(InputId),
+    % Build layers
+    build_layers(Sizes, Activations, InputId, OutputId, Params).
+
+build_layers([_], [], LastId, LastId, []) :- !.
+build_layers([In, Out|Rest], [Act|Acts], PrevId, FinalId, [param(W, B)|Params]) :-
+    % Create weight and bias parameters
+    Scale is sqrt(2.0 / In),
+    random_tensor([Out, In], Scale, W),
+    zeros([Out], B),
+    % Forward through this layer
+    tensor_id(W, WId),
+    tensor_id(B, BId),
+    new_tensor_id(OutId),
+    add_node(dense, [WId, BId, PrevId], OutId, grad_dense(WId, BId, PrevId)),
+    % Apply activation
+    apply_activation(Act, OutId, ActOutId),
+    % Continue
+    build_layers([Out|Rest], Acts, ActOutId, FinalId, Params).
+
+apply_activation(linear, InId, InId) :- !.
+apply_activation(relu, InId, OutId) :-
+    new_tensor_id(OutId),
+    add_node(relu, [InId], OutId, grad_relu(InId)).
+apply_activation(swish, InId, OutId) :-
+    new_tensor_id(OutId),
+    add_node(swish, [InId], OutId, grad_swish(InId)).
+apply_activation(sigmoid, InId, OutId) :-
+    new_tensor_id(OutId),
+    add_node(sigmoid, [InId], OutId, grad_sigmoid(InId)).
+apply_activation(softmax, InId, OutId) :-
+    new_tensor_id(OutId),
+    add_node(softmax, [InId], OutId, grad_softmax(InId)).
+
+% =============================================================================
+% Forward Pass
+% =============================================================================
+
+/**
+ * forward(+Graph, +Input, -Output)
+ *
+ * Execute forward pass through the computation graph.
+ */
+forward(graph(_, InputId, OutputId), InputData, OutputData) :-
+    % Store input data
+    assertz(tensor_store(InputId, InputData)),
+    % Execute graph nodes in topological order
+    execute_graph(InputId, OutputId),
+    % Get output
+    get_tensor_data(OutputId, OutputData).
+
+execute_graph(InputId, OutputId) :-
+    findall(node(N, Op, Ins, Out), node(N, Op, Ins, Out, _), Nodes),
+    execute_nodes(Nodes, InputId, OutputId).
+
+execute_nodes([], _, _) :- !.
+execute_nodes([node(_, Op, Inputs, OutId)|Rest], InputId, FinalId) :-
+    % Check if all inputs are available
+    maplist(input_available, Inputs),
+    % Execute operation
+    execute_op(Op, Inputs, OutId),
+    % Continue
+    execute_nodes(Rest, InputId, FinalId).
+
+input_available(Id) :- tensor_store(Id, _).
+
+execute_op(dense, [WId, BId, InId], OutId) :-
+    get_tensor_data(WId, W),
+    get_tensor_data(BId, B),
+    get_tensor_data(InId, In),
+    mat_vec_compute(W, In, Mv),
+    vec_add_compute(Mv, B, Out),
+    assertz(tensor_store(OutId, Out)).
+
+execute_op(relu, [InId], OutId) :-
+    get_tensor_data(InId, In),
+    map_relu(In, Out),
+    assertz(tensor_store(OutId, Out)).
+
+execute_op(swish, [InId], OutId) :-
+    get_tensor_data(InId, In),
+    map_swish(In, Out),
+    assertz(tensor_store(OutId, Out)).
+
+execute_op(sigmoid, [InId], OutId) :-
+    get_tensor_data(InId, In),
+    map_sigmoid(In, Out),
+    assertz(tensor_store(OutId, Out)).
+
+% =============================================================================
+% Execution Graph Export
+% =============================================================================
+
+/**
+ * export_graph(+Format, -GraphRepr)
+ *
+ * Export the computation graph in various formats.
+ */
+export_graph(dot, DotString) :-
+    findall(node(N, Op, Ins, Out), node(N, Op, Ins, Out, _), Nodes),
+    export_as_dot(Nodes, DotString).
+
+export_as_dot(Nodes, DotString) :-
+    format(string(Header), 'digraph ComputationGraph {~n  rankdir=TB;~n', []),
+    maplist(node_to_dot, Nodes, NodeStrings),
+    maplist(edges_to_dot, Nodes, EdgeStrings),
+    atomic_list_concat([Header|NodeStrings], NodesPart),
+    atomic_list_concat(EdgeStrings, EdgesPart),
+    format(string(DotString), '~w~w}~n', [NodesPart, EdgesPart]).
+
+node_to_dot(node(N, Op, _, Out), String) :-
+    format(string(String), '  ~w [label="~w\\n(~w)"];~n', [Out, Op, N]).
+
+edges_to_dot(node(_, _, Inputs, Out), String) :-
+    maplist({Out}/[In, S]>>format(string(S), '  ~w -> ~w;~n', [In, Out]), Inputs, Strings),
+    atomic_list_concat(Strings, String).
+
+/**
+ * print_graph/0
+ * Print the current computation graph.
+ */
+print_graph :-
+    format('Computation Graph:~n'),
+    format('=================~n'),
+    forall(node(N, Op, Inputs, Output, _),
+           format('Node ~w: ~w(~w) -> ~w~n', [N, Op, Inputs, Output])).
+
+% =============================================================================
+% Self-Test
+% =============================================================================
+
+run_autodiff_tests :-
+    format('~n=== Tensor Autodiff Tests ===~n~n'),
+
+    % Test tensor creation
+    format('Test tensor creation: '),
+    reset_graph,
+    from_list([1.0, 2.0, 3.0], T1),
+    (T1 = tensor(_, [1.0, 2.0, 3.0]) -> format('PASS~n') ; format('FAIL~n')),
+
+    % Test relu
+    format('Test relu operation: '),
+    reset_graph,
+    from_list([-1.0, 0.0, 1.0, 2.0], Input),
+    relu(Input, Output),
+    tensor_id(Output, OutId),
+    get_tensor_data(OutId, OutData),
+    (OutData = [0.0, 0.0, 1.0, 2.0] -> format('PASS~n') ; format('FAIL~n')),
+
+    % Test graph recording
+    format('Test graph recording: '),
+    (graph_node(OutId, relu, _) -> format('PASS~n') ; format('FAIL~n')),
+
+    % Test MSE loss
+    format('Test MSE loss: '),
+    reset_graph,
+    from_list([1.0, 2.0], Exp),
+    from_list([1.5, 2.5], Act),
+    mse(Exp, Act, Loss),
+    tensor_id(Loss, LossId),
+    get_tensor_data(LossId, LossVal),
+    (abs(LossVal - 0.25) < 0.001 -> format('PASS~n') ; format('FAIL~n')),
+
+    format('~n=== Tests Complete ===~n').
+
+% Entry point for testing
+:- initialization((
+    format('Tensor Autodiff module loaded.~n'),
+    format('Run run_autodiff_tests for self-tests.~n')
+)).