diff --git a/experimental/centos/centos-8/README.md b/experimental/centos/centos-8/README.md new file mode 100644 index 0000000..da46c71 --- /dev/null +++ b/experimental/centos/centos-8/README.md @@ -0,0 +1,16 @@ +# Databricks Container Services - CentOS 8 Containers + +This is a Databricks container runtime using CentOS 8 as base image. + +### Info +- [DockerHub](https://hub.docker.com/_/centos) CentOS images +- Crypto policies in minimal are set to LEGACY enabling TLSv1, TLSv1.1 and CBC-ciphers + to allow connections into AWS RDS MySQL / MariaDB + +## Images + +- [Standard](standard): FUSE + OpenSSH server +- [Minimal](minimal): base, OpenJDK 1.8 +- [Python](python): Pyton 3.8 +- [DBFS FUSE](dbfsfuse): FUSE +- [SSH](ssh): OpenSSH server diff --git a/experimental/centos/centos-8/dbfsfuse/Dockerfile b/experimental/centos/centos-8/dbfsfuse/Dockerfile new file mode 100644 index 0000000..4a429af --- /dev/null +++ b/experimental/centos/centos-8/dbfsfuse/Dockerfile @@ -0,0 +1,9 @@ +FROM kingjatu/databricks-centos-8-python:latest + +# Fuse: +RUN dnf install -y \ + fuse + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* diff --git a/experimental/centos/centos-8/minimal/Dockerfile b/experimental/centos/centos-8/minimal/Dockerfile new file mode 100644 index 0000000..4d3d09c --- /dev/null +++ b/experimental/centos/centos-8/minimal/Dockerfile @@ -0,0 +1,19 @@ +FROM quay.io/centos/centos:stream8 + +# Import keys to suppress warnings about GPG-keys +RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial \ + && rpm --import https://packages.microsoft.com/keys/microsoft.asc + +# Minimal: +# WARNING! Lower security by enabling TLSv1 and TLSv1.1 +# Docs: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/considerations_in_adopting_rhel_8/security_considerations-in-adopting-rhel-8#tls-v10-v11_security +RUN dnf install -y \ + java-1.8.0-openjdk \ + sudo \ + procps iproute \ + && update-ca-trust \ + && update-crypto-policies --set LEGACY + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* diff --git a/experimental/centos/centos-8/python/Dockerfile b/experimental/centos/centos-8/python/Dockerfile new file mode 100644 index 0000000..d12661b --- /dev/null +++ b/experimental/centos/centos-8/python/Dockerfile @@ -0,0 +1,30 @@ +FROM kingjatu/databricks-centos-8-minimal:latest + +# Python: +ARG python_dir=/databricks/python3/bin +RUN dnf install -y \ + python38 \ + python3-virtualenv + +# Initialize the default environment that Spark and notebooks will use +RUN virtualenv --python python3.8 --system-site-packages /databricks/python3 + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# These python libraries are used by Databricks notebooks and the Python REPL +# You do not need to install pyspark - it is injected when the cluster is launched +# Versions are intended to reflect DBR 9.0 +RUN $python_dir/pip install \ + six==1.15.0 \ + # ensure minimum ipython version for Python autocomplete with jedi 0.17.x + ipython==7.19.0 \ + numpy==1.19.2 \ + pandas==1.2.4 \ + pyarrow==4.0.0 \ + matplotlib==3.4.2 \ + jinja2==2.11.3 + +# Specifies where Spark will look for the python process +ENV PYSPARK_PYTHON=/databricks/python3/bin/python3 diff --git a/experimental/centos/centos-8/ssh/Dockerfile b/experimental/centos/centos-8/ssh/Dockerfile new file mode 100644 index 0000000..9f233ed --- /dev/null +++ b/experimental/centos/centos-8/ssh/Dockerfile @@ -0,0 +1,14 @@ +FROM kingjatu/databricks-centos-8-minimal:latest + +# Fuse: +RUN dnf install -y \ + openssh-server + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# Add new user: bricks +# Warning: the created user has root permissions inside the container +# Warning: you still need to start the ssh process with `sudo service ssh start` +RUN useradd --create-home --shell /bin/bash --groups wheel bricks diff --git a/experimental/centos/centos-8/standard/Dockerfile b/experimental/centos/centos-8/standard/Dockerfile new file mode 100644 index 0000000..303e2ee --- /dev/null +++ b/experimental/centos/centos-8/standard/Dockerfile @@ -0,0 +1,14 @@ +FROM kingjatu/databricks-centos-8-dbfsfuse:latest + +# Fuse: +RUN dnf install -y \ + openssh-server + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# Add new user: bricks +# Warning: the created user has root permissions inside the container +# Warning: you still need to start the ssh process with `sudo service ssh start` +RUN useradd --create-home --shell /bin/bash --groups wheel bricks diff --git a/experimental/centos/centos-9/R-ssh/Dockerfile b/experimental/centos/centos-9/R-ssh/Dockerfile new file mode 100644 index 0000000..5bf1697 --- /dev/null +++ b/experimental/centos/centos-9/R-ssh/Dockerfile @@ -0,0 +1,20 @@ +FROM kingjatu/databricks-centos-9-r:latest + +# Fuse: +RUN dnf install -y \ + openssh-server + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# SSHd setup +CMD ["/usr/libexec/openssh/sshd-keygen", "ecdsa"] +CMD ["/usr/libexec/openssh/sshd-keygen", "rsa"] +CMD ["/usr/libexec/openssh/sshd-keygen", "ed25519"] +CMD ["/sbin/sshd", "-p", "2200"] + +# Add new user: bricks +# Warning: the created user has root permissions inside the container +# Warning: you still need to start the ssh process with `sudo service ssh start` +RUN useradd --create-home --shell /bin/bash --groups wheel bricks diff --git a/experimental/centos/centos-9/R/Dockerfile b/experimental/centos/centos-9/R/Dockerfile new file mode 100644 index 0000000..34c0fa3 --- /dev/null +++ b/experimental/centos/centos-9/R/Dockerfile @@ -0,0 +1,23 @@ +FROM kingjatu/databricks-centos-9-python:latest + +# R language, needs two repositories: +# CodeReady Linux Builder (CRB) and Extra Packages for Enterprise Linux (EPEL): +RUN dnf install -y dnf-plugins-core && \ + dnf config-manager --set-enabled crb && \ + dnf install -y epel-release && \ + dnf install -y \ + R + +# Install Rserve +RUN R --vanilla -e 'install.packages("Rserve",, "http://rforge.net")' + +# Clean-up: +RUN dnf config-manager --set-disabled crb \ + && dnf remove -y dnf-plugins-core \ + && dnf remove -y epel-release \ + && dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# Run Rserve on container launch. Databricks will do this. +#CMD ["R", "CMD", "Rserve"] + diff --git a/experimental/centos/centos-9/README.md b/experimental/centos/centos-9/README.md new file mode 100644 index 0000000..160a76d --- /dev/null +++ b/experimental/centos/centos-9/README.md @@ -0,0 +1,14 @@ +# Databricks Container Services - CentOS 9 stream Containers + +This is a Databricks container runtime using CentOS 9 stream as base image. + +### Info +- [RedHat Quay.io](https://quay.io/repository/centos/centos?tab=tags) CentOS images + +## Images + +- [Base](base): CentOS 9 Stream made run as Apache Spark node + [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) (Filesystem in Userspace) +- [Python](python): Python 3.9, configured to run in Databricks +- [Python-SSH](python-ssh): Python 3.9 + OpenSSH server +- [R](r): R 4.2 tools for statistical computing +- [R-ssh](r-ssh): R + OpenSSH server diff --git a/experimental/centos/centos-9/base/Dockerfile b/experimental/centos/centos-9/base/Dockerfile new file mode 100644 index 0000000..ef12f50 --- /dev/null +++ b/experimental/centos/centos-9/base/Dockerfile @@ -0,0 +1,21 @@ +FROM quay.io/centos/centos:stream9 + +# Import keys to suppress warnings about GPG-keys +RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial \ + && rpm --import https://packages.microsoft.com/keys/microsoft.asc + +RUN dnf install -y \ + java-1.8.0-openjdk \ + sudo \ + procps iproute iputils \ + fuse + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# Runtime version: +# Databricks has, but it needs to be a proper dotted version. +#ENV DATABRICKS_RUNTIME_VERSION= +ENV DATABRICKS_CUSTOM_RUNTIME_VERSION="Base 11.2 / CentOS9-Stream" + diff --git a/experimental/centos/centos-9/python-ssh/Dockerfile b/experimental/centos/centos-9/python-ssh/Dockerfile new file mode 100644 index 0000000..8cd1637 --- /dev/null +++ b/experimental/centos/centos-9/python-ssh/Dockerfile @@ -0,0 +1,20 @@ +FROM kingjatu/databricks-centos-9-python:latest + +# Fuse: +RUN dnf install -y \ + openssh-server + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + +# SSHd setup +CMD ["/usr/libexec/openssh/sshd-keygen", "ecdsa"] +CMD ["/usr/libexec/openssh/sshd-keygen", "rsa"] +CMD ["/usr/libexec/openssh/sshd-keygen", "ed25519"] +CMD ["/sbin/sshd", "-p", "2200"] + +# Add new user: bricks +# Warning: the created user has root permissions inside the container +# Warning: you still need to start the ssh process with `sudo service ssh start` +RUN useradd --create-home --shell /bin/bash --groups wheel bricks diff --git a/experimental/centos/centos-9/python/Dockerfile b/experimental/centos/centos-9/python/Dockerfile new file mode 100644 index 0000000..1bfcabb --- /dev/null +++ b/experimental/centos/centos-9/python/Dockerfile @@ -0,0 +1,50 @@ +FROM kingjatu/databricks-centos-9-base:latest AS compile-image + +# Python: +ARG python_dir=/databricks/python3/bin +RUN dnf install -y \ + python39 python3-devel gcc + +# Initialize the default environment that Spark and notebooks will use +RUN python3.9 -m venv --system-site-packages /databricks/python3 + +# These python libraries are used by Databricks notebooks and the Python REPL +# You do not need to install pyspark - it is injected when the cluster is launched +# Versions are intended to reflect DBR 9.0 +RUN $python_dir/pip install --upgrade pip +RUN $python_dir/pip install \ + six==1.16.0 \ + # ensure minimum ipython version for Python autocomplete with jedi 0.17.x + ipython==7.32.0 \ + numpy==1.20.3 + + + +FROM kingjatu/databricks-centos-9-base:latest AS build-image + +# Python: +ARG python_dir=/databricks/python3/bin +RUN dnf install -y \ + python39 + +# Clean-up: +RUN dnf clean all \ + && rm -rf /tmp/* /var/tmp/* + + +# Copy venv with libraries +COPY --from=compile-image /databricks/python3 /databricks/python3 + +RUN $python_dir/pip install \ + virtualenv \ + ipykernel \ + pandas==1.3.4 \ + pyarrow==9.0.0 \ + matplotlib==3.4.3 \ + jinja2==2.11.3 \ + databricks-sql-connector==2.1.0 + + +# Specifies where Spark will look for the python process +ENV PYSPARK_PYTHON=/databricks/python3/bin/python3 +