From d97cbdad2d5840ddb68bfc60d930ba264aa15dde Mon Sep 17 00:00:00 2001 From: VEERA GOPU Date: Wed, 28 May 2025 17:33:00 -0500 Subject: [PATCH 1/4] Added Dockerfile for CI images --- docker/Dockerfile | 67 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 docker/Dockerfile diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..af78091e8 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,67 @@ +# CONTEXT {'gpu_vendor': 'AMD', 'guest_os': 'UBUNTU'} +ARG BASE_DOCKER=rocm/pytorch:rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.5.1 +FROM $BASE_DOCKER +WORKDIR /var/lib/jenkins + +RUN apt update \ + && apt install -y nano wget ninja-build \ + && apt install -y python3 python3-pip git \ + && apt install -y sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev + +RUN python3 -m pip install --upgrade pip +RUN pip install ninja cmake setuptools wheel +RUN pip install uv tabulate +RUN pip install ipython pytest fire pydantic pybind11 + +RUN pip uninstall -y torch + +RUN apt --fix-broken install -y +RUN apt install -y libzstd-dev +RUN apt install -y libibverbs-dev + +ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer +ENV PATH=$PATH:/opt/rocm/bin: +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/: + +# Install pytorch +ARG PYTORCH_COMMIT="f929e0d602a71aa393ca2e6097674b210bdf321c" +ENV PYTORCH_ROCM_ARCH=gfx942 +RUN rm -fr pytorch \ + && git clone https://github.com/pytorch/pytorch \ + && cd pytorch \ + && git fetch origin ${PYTORCH_COMMIT} \ + && git checkout -q ${PYTORCH_COMMIT} \ + && git submodule update --recursive --init \ + && ./tools/amd_build/build_amd.py \ + && BUILD_TEST=0 python3 setup.py install + +WORKDIR /var/lib/jenkins + +# Install flash-attention +ENV GPU_ARCHS=${PYTORCH_ROCM_ARCH} +RUN git clone https://github.com/ROCm/flash-attention.git \ + && cd flash-attention \ + && git checkout v2.7.3-cktile \ + && pip install . + +WORKDIR /var/lib/jenkins + +# Install jax +RUN git clone -b rocm-jaxlib-v0.4.35-qa https://github.com/ROCm/jax.git \ + && git clone -b rocm-jaxlib-v0.4.35-qa https://github.com/ROCm/xla.git \ + && cd jax \ + && echo 'run:rocm_plugin --copt=-DLEGACY_HIPBLAS_DIRECT' > .bazelrc.user \ + && python3 ./build/build.py --enable_rocm \ + --build_gpu_plugin \ + --use_clang=true \ + --clang_path=/opt/rocm-6.4.0/lib/llvm/bin/clang \ + --gpu_plugin_rocm_version=60 \ + --rocm_path=/opt/rocm-6.4.0/ \ + --rocm_amdgpu_targets=gfx942 \ + --bazel_options=--override_repository=xla=/var/lib/jenkins/xla \ + && pip install jax==0.4.35 \ + && python3 setup.py develop --user && python3 -m pip install dist/*.whl \ + && pip install jax==0.4.35 + +WORKDIR /workspace/ +CMD ["/bin/bash"] \ No newline at end of file From 0dd689c16a4a7dd867efe5c6aabecaf016617e59 Mon Sep 17 00:00:00 2001 From: VEERA GOPU Date: Thu, 29 May 2025 02:21:30 -0500 Subject: [PATCH 2/4] Addressed reviews --- docker/{Dockerfile => Dockerfile.ci.deps} | 38 +++++++++++------------ 1 file changed, 19 insertions(+), 19 deletions(-) rename docker/{Dockerfile => Dockerfile.ci.deps} (66%) diff --git a/docker/Dockerfile b/docker/Dockerfile.ci.deps similarity index 66% rename from docker/Dockerfile rename to docker/Dockerfile.ci.deps index af78091e8..a454d441e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile.ci.deps @@ -1,21 +1,20 @@ -# CONTEXT {'gpu_vendor': 'AMD', 'guest_os': 'UBUNTU'} +# TE CI Dockerfile ARG BASE_DOCKER=rocm/pytorch:rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.5.1 FROM $BASE_DOCKER WORKDIR /var/lib/jenkins +RUN apt --fix-broken install -y RUN apt update \ && apt install -y nano wget ninja-build \ && apt install -y python3 python3-pip git \ && apt install -y sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev RUN python3 -m pip install --upgrade pip -RUN pip install ninja cmake setuptools wheel -RUN pip install uv tabulate +RUN pip install cmake setuptools wheel RUN pip install ipython pytest fire pydantic pybind11 -RUN pip uninstall -y torch +# RUN pip uninstall -y torch -RUN apt --fix-broken install -y RUN apt install -y libzstd-dev RUN apt install -y libibverbs-dev @@ -24,8 +23,8 @@ ENV PATH=$PATH:/opt/rocm/bin: ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/: # Install pytorch -ARG PYTORCH_COMMIT="f929e0d602a71aa393ca2e6097674b210bdf321c" -ENV PYTORCH_ROCM_ARCH=gfx942 +# ARG PYTORCH_COMMIT="f929e0d602a71aa393ca2e6097674b210bdf321c" +ARG PYTORCH_ROCM_ARCH=gfx942 RUN rm -fr pytorch \ && git clone https://github.com/pytorch/pytorch \ && cd pytorch \ @@ -33,23 +32,24 @@ RUN rm -fr pytorch \ && git checkout -q ${PYTORCH_COMMIT} \ && git submodule update --recursive --init \ && ./tools/amd_build/build_amd.py \ - && BUILD_TEST=0 python3 setup.py install - -WORKDIR /var/lib/jenkins + && BUILD_TEST=0 python3 setup.py install \ + && cd .. # Install flash-attention ENV GPU_ARCHS=${PYTORCH_ROCM_ARCH} +ARG FLASH_COMMIT="b3c68b169824a58df339e4fcb0ad5e5a3e4d4327" RUN git clone https://github.com/ROCm/flash-attention.git \ && cd flash-attention \ - && git checkout v2.7.3-cktile \ - && pip install . - -WORKDIR /var/lib/jenkins + && git fetch origin ${FLASH_COMMIT} \ + && git checkout -q ${FLASH_COMMIT} \ + && pip install . \ + && cd .. # Install jax -RUN git clone -b rocm-jaxlib-v0.4.35-qa https://github.com/ROCm/jax.git \ - && git clone -b rocm-jaxlib-v0.4.35-qa https://github.com/ROCm/xla.git \ - && cd jax \ +ARG JAX_COMMIT="58e53c664a30015eac865d57b4987827460d67b0" +ARG XLA_COMMIT="fe4a1ec96238c765874ebc76f17184df0d2c7b1f" +RUN git clone https://github.com/ROCm/xla.git && cd xla && git fetch origin ${XLA_COMMIT} && git checkout -q ${XLA_COMMIT} && cd .. \ + && git clone https://github.com/ROCm/jax.git && cd jax && git fetch origin ${JAX_COMMIT} && git checkout -q ${JAX_COMMIT} \ && echo 'run:rocm_plugin --copt=-DLEGACY_HIPBLAS_DIRECT' > .bazelrc.user \ && python3 ./build/build.py --enable_rocm \ --build_gpu_plugin \ @@ -57,11 +57,11 @@ RUN git clone -b rocm-jaxlib-v0.4.35-qa https://github.com/ROCm/jax.git \ --clang_path=/opt/rocm-6.4.0/lib/llvm/bin/clang \ --gpu_plugin_rocm_version=60 \ --rocm_path=/opt/rocm-6.4.0/ \ - --rocm_amdgpu_targets=gfx942 \ + --rocm_amdgpu_targets=${GPU_ARCH} \ --bazel_options=--override_repository=xla=/var/lib/jenkins/xla \ && pip install jax==0.4.35 \ && python3 setup.py develop --user && python3 -m pip install dist/*.whl \ && pip install jax==0.4.35 - + WORKDIR /workspace/ CMD ["/bin/bash"] \ No newline at end of file From 3867b21be3adfe21e5209f1ed5fb276d8db51552 Mon Sep 17 00:00:00 2001 From: VEERA GOPU Date: Thu, 29 May 2025 10:08:08 -0500 Subject: [PATCH 3/4] Addressed reviews --- docker/Dockerfile.ci.deps | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/Dockerfile.ci.deps b/docker/Dockerfile.ci.deps index a454d441e..caf5c19c8 100644 --- a/docker/Dockerfile.ci.deps +++ b/docker/Dockerfile.ci.deps @@ -50,7 +50,6 @@ ARG JAX_COMMIT="58e53c664a30015eac865d57b4987827460d67b0" ARG XLA_COMMIT="fe4a1ec96238c765874ebc76f17184df0d2c7b1f" RUN git clone https://github.com/ROCm/xla.git && cd xla && git fetch origin ${XLA_COMMIT} && git checkout -q ${XLA_COMMIT} && cd .. \ && git clone https://github.com/ROCm/jax.git && cd jax && git fetch origin ${JAX_COMMIT} && git checkout -q ${JAX_COMMIT} \ - && echo 'run:rocm_plugin --copt=-DLEGACY_HIPBLAS_DIRECT' > .bazelrc.user \ && python3 ./build/build.py --enable_rocm \ --build_gpu_plugin \ --use_clang=true \ From c4913b2cfe4b3afc977dfefa6674ca3a7f9d13be Mon Sep 17 00:00:00 2001 From: VEERA GOPU Date: Thu, 29 May 2025 10:57:07 -0500 Subject: [PATCH 4/4] Addressed flash attention commit to tag --- docker/Dockerfile.ci.deps | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker/Dockerfile.ci.deps b/docker/Dockerfile.ci.deps index caf5c19c8..f9683eea5 100644 --- a/docker/Dockerfile.ci.deps +++ b/docker/Dockerfile.ci.deps @@ -37,11 +37,9 @@ RUN rm -fr pytorch \ # Install flash-attention ENV GPU_ARCHS=${PYTORCH_ROCM_ARCH} -ARG FLASH_COMMIT="b3c68b169824a58df339e4fcb0ad5e5a3e4d4327" RUN git clone https://github.com/ROCm/flash-attention.git \ && cd flash-attention \ - && git fetch origin ${FLASH_COMMIT} \ - && git checkout -q ${FLASH_COMMIT} \ + && git checkout v2.7.3-cktile \ && pip install . \ && cd ..