diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bd571d..9dfe4b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: sudo apt-get -q update sudo apt-get -qy install \ build-essential g++ \ - intel-mkl-full python3 python3-pytest + intel-mkl-full python3 python3-pytest python3-pybind11 - name: Build project run: | @@ -28,3 +28,7 @@ jobs: - name: Run correctness test run: make test + + - name: Run Python post-processing tests + run: | + make pytest diff --git a/.gitignore b/.gitignore index 8e6b375..0d70979 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ -.o +*.o build .vscode -.txt \ No newline at end of file +*.txt +*.so +__pycache__ +.pytest_cache \ No newline at end of file diff --git a/Makefile b/Makefile index 8531af2..12bbc02 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,10 @@ CXX := g++ CXXFLAGS := -std=c++17 -O2 -Wall -I./src -march=native +PYBIND11_INC := $(shell python3 -m pybind11 --includes) +PYEXT := $(shell python3-config --extension-suffix) + + # ---- MKL toggle ---- ifeq ($(USE_MKL),1) MKL_INC := /usr/include/mkl @@ -28,11 +32,12 @@ HEADERS := \ $(SRC_DIR)/storage_policies.hpp \ $(SRC_DIR)/matrix.hpp \ $(SRC_DIR)/matrix_ops.hpp \ - $(SRC_DIR)/lut_utils.hpp + $(SRC_DIR)/lut_utils.hpp \ + $(SRC_DIR)/post_processing.hpp -.PHONY: all run test clean +.PHONY: all run test clean pytest -all: $(BUILD_DIR) $(TARGET_MAIN) $(TARGET_CORR) +all: $(BUILD_DIR) $(TARGET_MAIN) $(TARGET_CORR) mpgemm$(PYEXT) # ensure build directory exists $(BUILD_DIR): @@ -46,6 +51,14 @@ $(TARGET_MAIN): $(TEST_SRC) $(HEADERS) $(TARGET_CORR): $(CORR_SRC) $(HEADERS) $(CXX) $(CXXFLAGS) $(CORR_SRC) -o $(TARGET_CORR) $(LDFLAGS) $(LDLIBS) +# build pybind11 module +mpgemm$(PYEXT): src/bindings.cpp $(HEADERS) + $(CXX) $(CXXFLAGS) $(PYBIND11_INC) -fPIC -shared src/bindings.cpp -o $@ + +# run pytest +pytest: all + PYTHONPATH=. python3 -m pytest -q tests/test_post_process.py + run: all ./$(TARGET_MAIN) @@ -53,4 +66,5 @@ test: $(TARGET_CORR) ./$(TARGET_CORR) clean: - rm -rf $(BUILD_DIR) \ No newline at end of file + rm -rf $(BUILD_DIR) + rm -f mpgemm$(PYEXT) \ No newline at end of file diff --git a/src/bindings.cpp b/src/bindings.cpp new file mode 100644 index 0000000..e7124bf --- /dev/null +++ b/src/bindings.cpp @@ -0,0 +1,69 @@ +#include +#include + +#include "matrix.hpp" +#include "post_processing.hpp" + +namespace py = pybind11; + +PYBIND11_MODULE(mpgemm, m) { + m.doc() = "mpGEMM Python bindings"; + + // Activation enum + py::enum_(m, "Activation") + .value("Linear", Activation::Linear) + .value("ReLU", Activation::ReLU) + .value("Sigmoid", Activation::Sigmoid) + .value("Tanh", Activation::Tanh) + .export_values(); + + // add_bias(C_flat, M, N, bias) + m.def("add_bias", + [](const std::vector& flatC, + int M, int N, + const std::vector& bias) + { + // 1) Wrap flatC into Matrix + Matrix> C(M, N); + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + C.set(i, j, flatC[i * N + j]); + + // 2) Call C++ add_bias + auto Rmat = add_bias(C, bias); + + // 3) Flatten result + std::vector out; + out.reserve(M * N); + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + out.push_back(Rmat.at(i, j)); + + // 4) Return to Python + return out; + }, + py::arg("C"), py::arg("M"), py::arg("N"), py::arg("bias")); + + // apply_activation(C_flat, M, N, act) + m.def("apply_activation", + [](const std::vector& flatC, + int M, int N, + Activation act) + { + Matrix> C(M, N); + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + C.set(i, j, flatC[i * N + j]); + + auto Rmat = apply_activation(C, act); + + std::vector out; + out.reserve(M * N); + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + out.push_back(Rmat.at(i, j)); + + return out; + }, + py::arg("C"), py::arg("M"), py::arg("N"), py::arg("act")); +} diff --git a/src/post_processing.hpp b/src/post_processing.hpp new file mode 100644 index 0000000..aec487e --- /dev/null +++ b/src/post_processing.hpp @@ -0,0 +1,51 @@ +#pragma once +#include +#include +#include "matrix.hpp" + +/// 後處理可選激活函式 +enum class Activation { + Linear, + ReLU, + Sigmoid, + Tanh +}; + +/// 1) bias 加法:對於每一列,將 bias[j] 加到 M(i,j) +template +Matrix add_bias( + const Matrix& M, + const std::vector& bias) +{ + size_t R = M.rows(), C = M.cols(); + Matrix Rmat(R, C); + for(size_t i = 0; i < R; ++i) { + for(size_t j = 0; j < C; ++j) { + Rmat.set(i, j, M.at(i,j) + bias[j]); + } + } + return Rmat; +} + +/// 2) element-wise activation +template +Matrix apply_activation( + const Matrix& M, + Activation act) +{ + size_t R = M.rows(), C = M.cols(); + Matrix Rmat(R, C); + for(size_t i = 0; i < R; ++i) { + for(size_t j = 0; j < C; ++j) { + T v = M.at(i,j); + switch(act) { + case Activation::ReLU: v = v>static_cast(0)?v:static_cast(0); break; + case Activation::Sigmoid: v = static_cast(1) / (static_cast(1)+std::exp(-v)); break; + case Activation::Tanh: v = std::tanh(v); break; + case Activation::Linear: /* no-op */ break; + } + Rmat.set(i, j, v); + } + } + return Rmat; +} diff --git a/tests/test_correctness.cpp b/tests/test_correctness.cpp index a8b6af5..a8ead0c 100644 --- a/tests/test_correctness.cpp +++ b/tests/test_correctness.cpp @@ -4,6 +4,7 @@ #include "../src/matrix_ops.hpp" #include "../src/lut_utils.hpp" #include "../src/quant_utils.hpp" +#include "../src/post_processing.hpp" #include #include @@ -283,11 +284,102 @@ bool run_mkl_test() { } #endif // USE_MKL +// 9. Bias addition test +bool run_bias_test() { + std::cout << "Running bias addition test...\n"; + // 2×3 範例 + Matrix> M(2,3); + M.set(0,0,1); M.set(0,1,2); M.set(0,2,3); + M.set(1,0,4); M.set(1,1,5); M.set(1,2,6); + std::vector bias = {10, 20, 30}; + auto R = add_bias(M, bias); + Matrix> expected(2,3); + expected.set(0,0,11); expected.set(0,1,22); expected.set(0,2,33); + expected.set(1,0,14); expected.set(1,1,25); expected.set(1,2,36); + bool pass = check_equal(R, expected); + std::cout << (pass?"Bias test PASS\n":"Bias test FAIL\n"); + return pass; +} + +// 10. ReLU activation test +bool run_relu_test() { + std::cout << "Running ReLU test...\n"; + Matrix> M(2,2), E(2,2); + // data + M.set(0,0,-1); M.set(0,1,0); + M.set(1,0, 5); M.set(1,1,-3); + // expected + E.set(0,0, 0); E.set(0,1, 0); + E.set(1,0, 5); E.set(1,1, 0); + auto R = apply_activation(M, Activation::ReLU); + bool pass = check_equal(R, E); + std::cout << (pass?"ReLU test PASS\n":"ReLU test FAIL\n"); + return pass; +} + +// 11. Sigmoid activation test +bool run_sigmoid_test() { + std::cout << "Running Sigmoid test...\n"; + Matrix> M(1,3); + M.set(0,0, 0.0f); + M.set(0,1, 2.0f); + M.set(0,2,-2.0f); + auto R = apply_activation(M, Activation::Sigmoid); + + // 理論值 + float s0 = 1.0f/(1+std::exp(-0.0f)); // 0.5 + float s1 = 1.0f/(1+std::exp(-2.0f)); + float s2 = 1.0f/(1+std::exp( 2.0f)); + + const float eps = 1e-6f; + bool pass = std::fabs(R.at(0,0)-s0)> M(1,3); + M.set(0,0, 0.0f); + M.set(0,1, 1.0f); + M.set(0,2,-1.0f); + auto R = apply_activation(M, Activation::Tanh); + + float t0 = std::tanh(0.0f); // 0 + float t1 = std::tanh(1.0f); + float t2 = std::tanh(-1.0f); + + const float eps = 1e-6f; + bool pass = std::fabs(R.at(0,0)-t0)> M(2,2), E(2,2); + M.set(0,0,1); M.set(0,1,-2); + M.set(1,0, 0); M.set(1,1, 5); + // Linear = no-op + E = M; + auto R = apply_activation(M, Activation::Linear); + bool pass = check_equal(R, E); + std::cout << (pass?"Linear test PASS\n":"Linear test FAIL\n"); + return pass; +} int main() { int passed=0; - int total=10; + int total=15; if (run_basic_test()) ++passed; if (run_negative_test()) ++passed; if (run_non_square_test()) ++passed; @@ -298,6 +390,11 @@ int main() { if (run_int4_int32_test()) ++passed; if(run_int4_fast_test()) ++passed; if (run_quant_dequant_test()) ++passed; + if (run_bias_test()) ++passed; + if (run_relu_test()) ++passed; + if (run_sigmoid_test()) ++passed; + if (run_tanh_test()) ++passed; + if (run_linear_test()) ++passed; #ifdef USE_MKL ++total; // 只有啟用 MKL 才加總數 if (run_mkl_test()) ++passed; diff --git a/tests/test_post_process.py b/tests/test_post_process.py new file mode 100644 index 0000000..2f23ac6 --- /dev/null +++ b/tests/test_post_process.py @@ -0,0 +1,15 @@ +import numpy as np +import mpgemm + +def test_add_bias(): + C = np.array([[1,2,3],[4,5,6]], dtype=np.float32) + bias = np.array([10,20,30], dtype=np.float32) + R = mpgemm.add_bias(C.flatten().tolist(), 2, 3, bias.tolist()) + R = np.array(R).reshape(2,3) + assert np.allclose(R, C + bias) + +def test_relu(): + M = np.array([[-1,0],[2,-3]], dtype=np.float32) + R = mpgemm.apply_activation(M.flatten().tolist(), 2, 2, mpgemm.Activation.ReLU) + R = np.array(R).reshape(2,2) + assert np.all(R >= 0)