5000user5000 · 5000user5000 · May 3, 2025 · May 3, 2025 · May 3, 2025 · May 3, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
           sudo apt-get -q update
           sudo apt-get -qy install \
                build-essential g++ \
-               intel-mkl-full python3 python3-pytest
+               intel-mkl-full python3 python3-pytest  python3-pybind11
 
       - name: Build project
         run: |
@@ -28,3 +28,7 @@ jobs:
 
       - name: Run correctness test
         run: make test
+
+      - name: Run Python post-processing tests
+        run: |
+          make pytest
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,7 @@
-.o
+*.o
 build
 .vscode
-.txt
+*.txt
+*.so
+__pycache__
+.pytest_cache
diff --git a/Makefile b/Makefile
@@ -1,6 +1,10 @@
 CXX        := g++
 CXXFLAGS   := -std=c++17 -O2 -Wall -I./src -march=native
 
+PYBIND11_INC := $(shell python3 -m pybind11 --includes)
+PYEXT := $(shell python3-config --extension-suffix)
+
+
 # ---- MKL toggle ----
 ifeq ($(USE_MKL),1)
 	MKL_INC := /usr/include/mkl
@@ -28,11 +32,12 @@ HEADERS    := \
     $(SRC_DIR)/storage_policies.hpp \
     $(SRC_DIR)/matrix.hpp \
     $(SRC_DIR)/matrix_ops.hpp \
-    $(SRC_DIR)/lut_utils.hpp
+    $(SRC_DIR)/lut_utils.hpp \
+	$(SRC_DIR)/post_processing.hpp
 
-.PHONY: all run test clean
+.PHONY: all run test clean pytest
 
-all: $(BUILD_DIR) $(TARGET_MAIN) $(TARGET_CORR)
+all: $(BUILD_DIR) $(TARGET_MAIN) $(TARGET_CORR) mpgemm$(PYEXT)
 
 # ensure build directory exists
 $(BUILD_DIR):
@@ -46,11 +51,20 @@ $(TARGET_MAIN): $(TEST_SRC) $(HEADERS)
 $(TARGET_CORR): $(CORR_SRC) $(HEADERS)
 	$(CXX) $(CXXFLAGS) $(CORR_SRC) -o $(TARGET_CORR) $(LDFLAGS) $(LDLIBS)
 
+# build pybind11 module
+mpgemm$(PYEXT): src/bindings.cpp $(HEADERS)
+	$(CXX) $(CXXFLAGS) $(PYBIND11_INC) -fPIC -shared src/bindings.cpp -o $@
+
+# run pytest
+pytest: all
+	PYTHONPATH=. python3 -m pytest -q tests/test_post_process.py
+
 run: all
 	./$(TARGET_MAIN)
 
 test: $(TARGET_CORR)
 	./$(TARGET_CORR)
 
 clean:
-	rm -rf $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)
+	rm -f mpgemm$(PYEXT)
diff --git a/src/bindings.cpp b/src/bindings.cpp
@@ -0,0 +1,69 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "matrix.hpp"
+#include "post_processing.hpp"
+
+namespace py = pybind11;
+
+PYBIND11_MODULE(mpgemm, m) {
+    m.doc() = "mpGEMM Python bindings";
+
+    // Activation enum
+    py::enum_<Activation>(m, "Activation")
+        .value("Linear",  Activation::Linear)
+        .value("ReLU",    Activation::ReLU)
+        .value("Sigmoid", Activation::Sigmoid)
+        .value("Tanh",    Activation::Tanh)
+        .export_values();
+
+    // add_bias(C_flat, M, N, bias)
+    m.def("add_bias",
+        [](const std::vector<float>& flatC,
+           int M, int N,
+           const std::vector<float>& bias)
+    {
+        // 1) Wrap flatC into Matrix
+        Matrix<float, RowMajor, PlainStorage<float>> C(M, N);
+        for (int i = 0; i < M; ++i)
+            for (int j = 0; j < N; ++j)
+                C.set(i, j, flatC[i * N + j]);
+
+        // 2) Call C++ add_bias
+        auto Rmat = add_bias(C, bias);
+
+        // 3) Flatten result
+        std::vector<float> out;
+        out.reserve(M * N);
+        for (int i = 0; i < M; ++i)
+            for (int j = 0; j < N; ++j)
+                out.push_back(Rmat.at(i, j));
+
+        // 4) Return to Python
+        return out;
+    },
+    py::arg("C"), py::arg("M"), py::arg("N"), py::arg("bias"));
+
+    // apply_activation(C_flat, M, N, act)
+    m.def("apply_activation",
+        [](const std::vector<float>& flatC,
+           int M, int N,
+           Activation act)
+    {
+        Matrix<float, RowMajor, PlainStorage<float>> C(M, N);
+        for (int i = 0; i < M; ++i)
+            for (int j = 0; j < N; ++j)
+                C.set(i, j, flatC[i * N + j]);
+
+        auto Rmat = apply_activation(C, act);
+
+        std::vector<float> out;
+        out.reserve(M * N);
+        for (int i = 0; i < M; ++i)
+            for (int j = 0; j < N; ++j)
+                out.push_back(Rmat.at(i, j));
+
+        return out;
+    },
+    py::arg("C"), py::arg("M"), py::arg("N"), py::arg("act"));
+}
diff --git a/src/post_processing.hpp b/src/post_processing.hpp
@@ -0,0 +1,51 @@
+#pragma once
+#include <vector>
+#include <cmath>
+#include "matrix.hpp"
+
+/// 後處理可選激活函式
+enum class Activation {
+    Linear,
+    ReLU,
+    Sigmoid,
+    Tanh
+};
+
+/// 1) bias 加法：對於每一列，將 bias[j] 加到 M(i,j)
+template<typename T, typename Layout, typename Storage>
+Matrix<T,Layout,Storage> add_bias(
+    const Matrix<T,Layout,Storage>& M,
+    const std::vector<T>& bias)
+{
+    size_t R = M.rows(), C = M.cols();
+    Matrix<T,Layout,Storage> Rmat(R, C);
+    for(size_t i = 0; i < R; ++i) {
+        for(size_t j = 0; j < C; ++j) {
+            Rmat.set(i, j, M.at(i,j) + bias[j]);
+        }
+    }
+    return Rmat;
+}
+
+/// 2) element-wise activation
+template<typename T, typename Layout, typename Storage>
+Matrix<T,Layout,Storage> apply_activation(
+    const Matrix<T,Layout,Storage>& M,
+    Activation act)
+{
+    size_t R = M.rows(), C = M.cols();
+    Matrix<T,Layout,Storage> Rmat(R, C);
+    for(size_t i = 0; i < R; ++i) {
+        for(size_t j = 0; j < C; ++j) {
+            T v = M.at(i,j);
+            switch(act) {
+              case Activation::ReLU:    v = v>static_cast<T>(0)?v:static_cast<T>(0); break;
+              case Activation::Sigmoid: v = static_cast<T>(1) / (static_cast<T>(1)+std::exp(-v)); break;
+              case Activation::Tanh:    v = std::tanh(v); break;
+              case Activation::Linear:  /* no-op */       break;
+            }
+            Rmat.set(i, j, v);
+        }
+    }
+    return Rmat;
+}
diff --git a/tests/test_correctness.cpp b/tests/test_correctness.cpp
@@ -4,6 +4,7 @@
 #include "../src/matrix_ops.hpp"
 #include "../src/lut_utils.hpp"
 #include "../src/quant_utils.hpp"
+#include "../src/post_processing.hpp"
 
 #include <iostream>
 #include <fstream>
@@ -283,11 +284,102 @@ bool run_mkl_test() {
 }
 #endif  // USE_MKL
 
+// 9. Bias addition test
+bool run_bias_test() {
+    std::cout << "Running bias addition test...\n";
+    // 2×3 範例
+    Matrix<int,RowMajor,PlainStorage<int>> M(2,3);
+    M.set(0,0,1); M.set(0,1,2); M.set(0,2,3);
+    M.set(1,0,4); M.set(1,1,5); M.set(1,2,6);
+    std::vector<int> bias = {10, 20, 30};
+    auto R = add_bias(M, bias);
+    Matrix<int,RowMajor,PlainStorage<int>> expected(2,3);
+    expected.set(0,0,11); expected.set(0,1,22); expected.set(0,2,33);
+    expected.set(1,0,14); expected.set(1,1,25); expected.set(1,2,36);
+    bool pass = check_equal(R, expected);
+    std::cout << (pass?"Bias test PASS\n":"Bias test FAIL\n");
+    return pass;
+}
+
+// 10. ReLU activation test
+bool run_relu_test() {
+    std::cout << "Running ReLU test...\n";
+    Matrix<int,RowMajor,PlainStorage<int>> M(2,2), E(2,2);
+    // data
+    M.set(0,0,-1); M.set(0,1,0);
+    M.set(1,0, 5); M.set(1,1,-3);
+    // expected
+    E.set(0,0, 0); E.set(0,1, 0);
+    E.set(1,0, 5); E.set(1,1, 0);
+    auto R = apply_activation(M, Activation::ReLU);
+    bool pass = check_equal(R, E);
+    std::cout << (pass?"ReLU test PASS\n":"ReLU test FAIL\n");
+    return pass;
+}
+
+// 11. Sigmoid activation test
+bool run_sigmoid_test() {
+    std::cout << "Running Sigmoid test...\n";
+    Matrix<float,RowMajor,PlainStorage<float>> M(1,3);
+    M.set(0,0, 0.0f);
+    M.set(0,1, 2.0f);
+    M.set(0,2,-2.0f);
+    auto R = apply_activation(M, Activation::Sigmoid);
+
+    // 理論值
+    float s0 = 1.0f/(1+std::exp(-0.0f)); // 0.5
+    float s1 = 1.0f/(1+std::exp(-2.0f));
+    float s2 = 1.0f/(1+std::exp( 2.0f));
+
+    const float eps = 1e-6f;
+    bool pass = std::fabs(R.at(0,0)-s0)<eps
+             && std::fabs(R.at(0,1)-s1)<eps
+             && std::fabs(R.at(0,2)-s2)<eps;
+
+    std::cout << (pass?"Sigmoid test PASS\n":"Sigmoid test FAIL\n");
+    return pass;
+}
+
+// 12. Tanh activation test
+bool run_tanh_test() {
+    std::cout << "Running Tanh test...\n";
+    Matrix<float,RowMajor,PlainStorage<float>> M(1,3);
+    M.set(0,0, 0.0f);
+    M.set(0,1, 1.0f);
+    M.set(0,2,-1.0f);
+    auto R = apply_activation(M, Activation::Tanh);
+
+    float t0 = std::tanh(0.0f); // 0
+    float t1 = std::tanh(1.0f);
+    float t2 = std::tanh(-1.0f);
+
+    const float eps = 1e-6f;
+    bool pass = std::fabs(R.at(0,0)-t0)<eps
+             && std::fabs(R.at(0,1)-t1)<eps
+             && std::fabs(R.at(0,2)-t2)<eps;
+
+    std::cout << (pass?"Tanh test PASS\n":"Tanh test FAIL\n");
+    return pass;
+}
+
+// 13. Linear (identity) test
+bool run_linear_test() {
+    std::cout << "Running Linear (identity) test...\n";
+    Matrix<int,RowMajor,PlainStorage<int>> M(2,2), E(2,2);
+    M.set(0,0,1); M.set(0,1,-2);
+    M.set(1,0, 0); M.set(1,1, 5);
+    // Linear = no-op
+    E = M;
+    auto R = apply_activation(M, Activation::Linear);
+    bool pass = check_equal(R, E);
+    std::cout << (pass?"Linear test PASS\n":"Linear test FAIL\n");
+    return pass;
+}
 
 
 int main() {
     int passed=0;
-    int total=10;
+    int total=15;
     if (run_basic_test()) ++passed;
     if (run_negative_test()) ++passed;
     if (run_non_square_test()) ++passed;
@@ -298,6 +390,11 @@ int main() {
     if (run_int4_int32_test()) ++passed;
     if(run_int4_fast_test()) ++passed;
     if (run_quant_dequant_test()) ++passed;
+    if (run_bias_test()) ++passed;
+    if (run_relu_test()) ++passed;
+    if (run_sigmoid_test()) ++passed;
+    if (run_tanh_test()) ++passed;
+    if (run_linear_test()) ++passed;
     #ifdef USE_MKL
         ++total;                  // 只有啟用 MKL 才加總數
         if (run_mkl_test()) ++passed;

diff --git a/tests/test_post_process.py b/tests/test_post_process.py
@@ -0,0 +1,15 @@
+import numpy as np
+import mpgemm
+
+def test_add_bias():
+    C = np.array([[1,2,3],[4,5,6]], dtype=np.float32)
+    bias = np.array([10,20,30], dtype=np.float32)
+    R = mpgemm.add_bias(C.flatten().tolist(), 2, 3, bias.tolist())
+    R = np.array(R).reshape(2,3)
+    assert np.allclose(R, C + bias)
+
+def test_relu():
+    M = np.array([[-1,0],[2,-3]], dtype=np.float32)
+    R = mpgemm.apply_activation(M.flatten().tolist(), 2, 2, mpgemm.Activation.ReLU)
+    R = np.array(R).reshape(2,2)
+    assert np.all(R >= 0)