microsoft · fw7th · Dec 24, 2025 · Jan 3, 2026 · Copilot · Dec 30, 2025
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -1208,12 +1208,31 @@ def aten_bilinear(
     # bias shape: (out_features) - optional
     # output shape: (..., out_features)
 
-    # Use Einsum to compute the bilinear transformation
-    # "...i,oij,...j->...o" means:
-    # - input1[..., i] * weight[o, i, j] * input2[..., j] -> output[..., o]
-    result = op.Einsum(input1, weight, input2, equation="...i,oij,...j->...o")
+    # Use MatMul to compute the bilinear transformation
+    batch_size = op.Shape(input1, start=0, end=1)
-    batch_size = op.Shape(input1, start=0, end=1)
+    batch_size = op.Shape(input1, end=-1)
-    batch_size = op.Shape(input1, start=0, end=1)
+    batch_size = op.Shape(input1, end=-1)
+    input1_shape = op.Shape(input1, start=-1)
+    input2_shape = op.Shape(input2, start=-1)
+    output_shape = op.Shape(weight, start=0, end=1)
+    neg_1 = op.Constant(value_ints=[-1])
+
+    # (O, H1, H2) -> (H1, O, H2) where O = output_shape, H1 = input1_shape, H2 = input2_shape
+    W_permute = op.Transpose(weight, perm=[1, 0, 2])
+
+    # (H1, O, H2) -> (H1, O * H2)
+    W_flat = op.Reshape(
+        W_permute,
+        op.Concat(input1_shape, op.Mul(output_shape, input2_shape), axis=0),
+    )
+
+    # (B, H1) @ (H1, O*H2) -> (B, O*H2)
+    tmp = op.MatMul(input1, W_flat)
+
+    # (B, O*H2) -> (B, O, H2)
+    tmp = op.Reshape(tmp, op.Concat(batch_size, output_shape, input2_shape, axis=0))
+
+    # (B, H2) -> (B, H2, 1) -> (B, O, H2) @ (B, H2, 1) -> (B, O, 1) -> (B, O)
-    # (O, H1, H2) -> (H1, O, H2) where O = output_shape, H1 = input1_shape, H2 = input2_shape
-    W_permute = op.Transpose(weight, perm=[1, 0, 2])
-
-    # (H1, O, H2) -> (H1, O * H2)
-    W_flat = op.Reshape(
-        W_permute,
-        op.Concat(input1_shape, op.Mul(output_shape, input2_shape), axis=0),
-    )
-
-    # (B, H1) @ (H1, O*H2) -> (B, O*H2)
-    tmp = op.MatMul(input1, W_flat)
-
-    # (B, O*H2) -> (B, O, H2)
-    tmp = op.Reshape(tmp, op.Concat(batch_size, output_shape, input2_shape, axis=0))
-
-    # (B, H2) -> (B, H2, 1) -> (B, O, H2) @ (B, H2, 1) -> (B, O, 1) -> (B, O)
+    # (out_features, in1_features, in2_features) -> (in1_features, out_features, in2_features)
+    W_permute = op.Transpose(weight, perm=[1, 0, 2])
+
+    # (in1_features, out_features, in2_features) -> (in1_features, out_features * in2_features)
+    W_flat = op.Reshape(
+        W_permute,
+        op.Concat(input1_shape, op.Mul(output_shape, input2_shape), axis=0),
+    )
+
+    # (batch_size, in1_features) @ (in1_features, out_features * in2_features)
+    #   -> (batch_size, out_features * in2_features)
+    tmp = op.MatMul(input1, W_flat)
+
+    # (batch_size, out_features * in2_features)
+    #   -> (batch_size, out_features, in2_features)
+    tmp = op.Reshape(tmp, op.Concat(batch_size, output_shape, input2_shape, axis=0))
+
+    # (batch_size, in2_features) -> (batch_size, in2_features, 1)
+    #   -> (batch_size, out_features, in2_features) @ (batch_size, in2_features, 1)
+    #   -> (batch_size, out_features, 1) -> (batch_size, out_features)
-    # (O, H1, H2) -> (H1, O, H2) where O = output_shape, H1 = input1_shape, H2 = input2_shape
-    W_permute = op.Transpose(weight, perm=[1, 0, 2])
-
-    # (H1, O, H2) -> (H1, O * H2)
-    W_flat = op.Reshape(
-        W_permute,
-        op.Concat(input1_shape, op.Mul(output_shape, input2_shape), axis=0),
-    )
-
-    # (B, H1) @ (H1, O*H2) -> (B, O*H2)
-    tmp = op.MatMul(input1, W_flat)
-
-    # (B, O*H2) -> (B, O, H2)
-    tmp = op.Reshape(tmp, op.Concat(batch_size, output_shape, input2_shape, axis=0))
-
-    # (B, H2) -> (B, H2, 1) -> (B, O, H2) @ (B, H2, 1) -> (B, O, 1) -> (B, O)
+    # (out_features, in1_features, in2_features) -> (in1_features, out_features, in2_features)
+    W_permute = op.Transpose(weight, perm=[1, 0, 2])
+
+    # (in1_features, out_features, in2_features) -> (in1_features, out_features * in2_features)
+    W_flat = op.Reshape(
+        W_permute,
+        op.Concat(input1_shape, op.Mul(output_shape, input2_shape), axis=0),
+    )
+
+    # (batch_size, in1_features) @ (in1_features, out_features * in2_features)
+    #   -> (batch_size, out_features * in2_features)
+    tmp = op.MatMul(input1, W_flat)
+
+    # (batch_size, out_features * in2_features)
+    #   -> (batch_size, out_features, in2_features)
+    tmp = op.Reshape(tmp, op.Concat(batch_size, output_shape, input2_shape, axis=0))
+
+    # (batch_size, in2_features) -> (batch_size, in2_features, 1)
+    #   -> (batch_size, out_features, in2_features) @ (batch_size, in2_features, 1)
+    #   -> (batch_size, out_features, 1) -> (batch_size, out_features)
+    result = op.Squeeze(op.MatMul(tmp, op.Unsqueeze(input2, neg_1)), neg_1)
 
-    # Add bias if provided
     if bias is not None:
         result = op.Add(result, bias)