From 0ad7c5179fd6729eaa3ca3046be63bb3cfee50d9 Mon Sep 17 00:00:00 2001 From: andymc629 Date: Wed, 5 Feb 2025 19:40:03 +0000 Subject: [PATCH 1/6] added chapter 5 stuff --- Chapter05/mlewp2-airflow/aws-mwaa-local-runner | 1 + 1 file changed, 1 insertion(+) create mode 160000 Chapter05/mlewp2-airflow/aws-mwaa-local-runner diff --git a/Chapter05/mlewp2-airflow/aws-mwaa-local-runner b/Chapter05/mlewp2-airflow/aws-mwaa-local-runner new file mode 160000 index 0000000..2e40031 --- /dev/null +++ b/Chapter05/mlewp2-airflow/aws-mwaa-local-runner @@ -0,0 +1 @@ +Subproject commit 2e4003132892e7ffaef4a1071369899d7c3d8456 From c11a9d03635ca6d2d3fe86c250f8994caf4ef67a Mon Sep 17 00:00:00 2001 From: andymc629 Date: Wed, 5 Feb 2025 20:15:53 +0000 Subject: [PATCH 2/6] Created a hotfix yaml that covers most chapter 3 examples except auto-sklearn and pyspark. Issues on the new mac with the environment. Might need a container option. --- .gitignore | 6 +- Chapter03/features/feature-engineering.py | 1 - .../hyperparameter-opt/optuna_example.py | 3 +- Chapter03/mlewp-chapter03-hotfix copy.yml.txt | 211 ++++++++++++++++++ Chapter03/mlewp-chapter03-hotfix.yml | 211 ++++++++++++++++++ .../mlflow-feature-engineering.py | 2 +- Chapter03/mlflow-advanced/mlflow.db | Bin 0 -> 225280 bytes .../mlflow-advanced/start-mlflow-server.sh | 3 +- Chapter03/pipelines/sparkmllib_pipeline.py | 2 +- 9 files changed, 431 insertions(+), 8 deletions(-) create mode 100644 Chapter03/mlewp-chapter03-hotfix copy.yml.txt create mode 100644 Chapter03/mlewp-chapter03-hotfix.yml create mode 100644 Chapter03/mlflow-advanced/mlflow.db diff --git a/.gitignore b/.gitignore index 0a0d7ed..6350bb3 100644 --- a/.gitignore +++ b/.gitignore @@ -161,10 +161,12 @@ cython_debug/ #Other - potentially vs code *.DS_Store -**/.DS_Store +*/.DS_Store #MLFlow etc **/artifacts/model **/artifacts/** -**/mlruns/** \ No newline at end of file +**/mlruns/** +Chapter08/.DS_Store +Chapter09/.DS_Store diff --git a/Chapter03/features/feature-engineering.py b/Chapter03/features/feature-engineering.py index c68c0a7..594ecf7 100644 --- a/Chapter03/features/feature-engineering.py +++ b/Chapter03/features/feature-engineering.py @@ -9,7 +9,6 @@ # Make a 70/30 train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, - test_size=0.30, test_size=0.30, random_state=42) diff --git a/Chapter03/hyperparameter-opt/optuna_example.py b/Chapter03/hyperparameter-opt/optuna_example.py index 31a4397..01c8f47 100644 --- a/Chapter03/hyperparameter-opt/optuna_example.py +++ b/Chapter03/hyperparameter-opt/optuna_example.py @@ -53,5 +53,4 @@ def objective(trial, n_folds, X, y): study = optuna.create_study(direction='minimize') study.optimize(partial(objective, n_folds=n_folds, X=X_train, y=y_train), n_trials=16) - print(study.best_trial.params) - print(stu) + print(study.best_trial.params) \ No newline at end of file diff --git a/Chapter03/mlewp-chapter03-hotfix copy.yml.txt b/Chapter03/mlewp-chapter03-hotfix copy.yml.txt new file mode 100644 index 0000000..d9541a3 --- /dev/null +++ b/Chapter03/mlewp-chapter03-hotfix copy.yml.txt @@ -0,0 +1,211 @@ +name: mlewp-chapter03-hotfix +channels: + - conda-forge +dependencies: + - aiohappyeyeballs=2.4.4 + - aiohttp=3.11.11 + - aiosignal=1.3.2 + - alembic=1.14.1 + - annotated-types=0.7.0 + - async-timeout=5.0.1 + - attrs=25.1.0 + - aws-c-auth=0.8.1 + - aws-c-cal=0.8.1 + - aws-c-common=0.10.6 + - aws-c-compression=0.3.0 + - aws-c-event-stream=0.5.0 + - aws-c-http=0.9.2 + - aws-c-io=0.15.3 + - aws-c-mqtt=0.11.0 + - aws-c-s3=0.7.9 + - aws-c-sdkutils=0.2.2 + - aws-checksums=0.2.2 + - aws-crt-cpp=0.29.9 + - aws-sdk-cpp=1.11.489 + - azure-core-cpp=1.14.0 + - azure-identity-cpp=1.10.0 + - azure-storage-blobs-cpp=12.13.0 + - azure-storage-common-cpp=12.8.0 + - azure-storage-files-datalake-cpp=12.12.0 + - bcrypt=4.2.1 + - blinker=1.9.0 + - brotli=1.1.0 + - brotli-bin=1.1.0 + - brotli-python=1.1.0 + - bzip2=1.0.8 + - c-ares=1.34.4 + - ca-certificates=2025.1.31 + - cachetools=5.5.1 + - certifi=2024.12.14 + - cffi=1.17.1 + - charset-normalizer=3.4.1 + - click=8.1.8 + - cloudpickle=3.1.1 + - colorama=0.4.6 + - colorlog=6.9.0 + - contourpy=1.3.1 + - cryptography=44.0.0 + - cycler=0.12.1 + - databricks-sdk=0.43.0 + - deprecated=1.2.18 + - docker-py=7.1.0 + - entrypoints=0.4 + - flask=3.1.0 + - fonttools=4.55.8 + - freetype=2.12.1 + - frozenlist=1.5.0 + - gflags=2.2.2 + - gitdb=4.0.12 + - gitpython=3.1.44 + - glog=0.7.1 + - google-auth=2.38.0 + - graphene=3.4.3 + - graphql-core=3.2.6 + - graphql-relay=3.2.0 + - greenlet=3.1.1 + - gunicorn=23.0.0 + - h2=4.2.0 + - hpack=4.1.0 + - hyperframe=6.1.0 + - icu=75.1 + - idna=3.10 + - importlib-metadata=8.6.1 + - itsdangerous=2.2.0 + - jinja2=3.1.5 + - joblib=1.4.2 + - kiwisolver=1.4.7 + - krb5=1.21.3 + - lcms2=2.16 + - lerc=4.0.0 + - libabseil=20240722.0 + - libarrow=18.1.0 + - libarrow-acero=18.1.0 + - libarrow-dataset=18.1.0 + - libarrow-substrait=18.1.0 + - libblas=3.9.0 + - libbrotlicommon=1.1.0 + - libbrotlidec=1.1.0 + - libbrotlienc=1.1.0 + - libcblas=3.9.0 + - libcrc32c=1.1.2 + - libcurl=8.11.1 + - libcxx=19.1.7 + - libdeflate=1.23 + - libedit=3.1.20250104 + - libev=4.33 + - libevent=2.1.12 + - libffi=3.4.2 + - libgfortran=5.0.0 + - libgfortran5=13.2.0 + - libgoogle-cloud=2.34.0 + - libgoogle-cloud-storage=2.34.0 + - libgrpc=1.67.1 + - libiconv=1.17 + - libjpeg-turbo=3.0.0 + - liblapack=3.9.0 + - liblzma=5.6.4 + - liblzma-devel=5.6.4 + - libnghttp2=1.64.0 + - libopenblas=0.3.28 + - libparquet=18.1.0 + - libpng=1.6.46 + - libprotobuf=5.28.3 + - libre2-11=2024.07.02 + - libsodium=1.0.20 + - libsqlite=3.48.0 + - libssh2=1.11.1 + - libthrift=0.21.0 + - libtiff=4.7.0 + - libutf8proc=2.10.0 + - libwebp-base=1.5.0 + - libxcb=1.17.0 + - libxml2=2.13.5 + - libzlib=1.3.1 + - llvm-openmp=19.1.7 + - lz4-c=1.10.0 + - mako=1.3.9 + - markdown=3.6 + - markupsafe=3.0.2 + - matplotlib-base=3.10.0 + - mlflow=2.20.1 + - mlflow-skinny=2.20.1 + - mlflow-ui=2.20.1 + - multidict=6.1.0 + - munkres=1.1.4 + - ncurses=6.5 + - numpy=1.26.4 + - openjpeg=2.5.3 + - openssl=3.4.0 + - opentelemetry-api=1.16.0 + - opentelemetry-sdk=1.16.0 + - opentelemetry-semantic-conventions=0.37b0 + - optuna=4.2.0 + - orc=2.0.3 + - packaging=24.2 + - pandas=2.2.2 + - paramiko=3.5.1 + - pillow=11.1.0 + - pip=25.0 + - prometheus_client=0.21.1 + - prometheus_flask_exporter=0.23.1 + - propcache=0.2.1 + - protobuf=5.28.3 + - pthread-stubs=0.4 + - py4j=0.10.9.7 + - pyarrow=18.1.0 + - pyarrow-core=18.1.0 + - pyasn1=0.6.1 + - pyasn1-modules=0.4.1 + - pycparser=2.22 + - pydantic=2.10.6 + - pydantic-core=2.27.2 + - pynacl=1.5.0 + - pyopenssl=25.0.0 + - pyparsing=3.2.1 + - pysocks=1.7.1 + - pyspark=3.5.4 + - python=3.10.8 + - python-dateutil=2.9.0.post0 + - python-tzdata=2025.1 + - python_abi=3.10 + - pytz=2024.2 + - pyu2f=0.1.5 + - pywin32-on-windows=0.1.0 + - pyyaml=6.0.2 + - qhull=2020.2 + - querystring_parser=1.2.4 + - re2=2024.07.02 + - readline=8.2 + - requests=2.32.3 + - rsa=4.9 + - scikit-learn=1.6.1 + - scipy=1.15.1 + - setuptools=75.8.0 + - six=1.17.0 + - smmap=5.0.0 + - snappy=1.2.1 + - sqlalchemy=2.0.37 + - sqlparse=0.5.3 + - threadpoolctl=3.5.0 + - tk=8.6.13 + - tqdm=4.67.1 + - typing-extensions=4.12.2 + - typing_extensions=4.12.2 + - tzdata=2025a + - unicodedata2=16.0.0 + - urllib3=2.3.0 + - websocket-client=1.8.0 + - werkzeug=3.1.3 + - wheel=0.45.1 + - wrapt=1.17.2 + - xorg-libxau=1.0.12 + - xorg-libxdmcp=1.1.5 + - xz=5.6.4 + - xz-gpl-tools=5.6.4 + - xz-tools=5.6.4 + - yaml=0.2.5 + - yarl=1.18.3 + - zipp=3.21.0 + - zstandard=0.23.0 + - zstd=1.5.6 +prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix diff --git a/Chapter03/mlewp-chapter03-hotfix.yml b/Chapter03/mlewp-chapter03-hotfix.yml new file mode 100644 index 0000000..d9541a3 --- /dev/null +++ b/Chapter03/mlewp-chapter03-hotfix.yml @@ -0,0 +1,211 @@ +name: mlewp-chapter03-hotfix +channels: + - conda-forge +dependencies: + - aiohappyeyeballs=2.4.4 + - aiohttp=3.11.11 + - aiosignal=1.3.2 + - alembic=1.14.1 + - annotated-types=0.7.0 + - async-timeout=5.0.1 + - attrs=25.1.0 + - aws-c-auth=0.8.1 + - aws-c-cal=0.8.1 + - aws-c-common=0.10.6 + - aws-c-compression=0.3.0 + - aws-c-event-stream=0.5.0 + - aws-c-http=0.9.2 + - aws-c-io=0.15.3 + - aws-c-mqtt=0.11.0 + - aws-c-s3=0.7.9 + - aws-c-sdkutils=0.2.2 + - aws-checksums=0.2.2 + - aws-crt-cpp=0.29.9 + - aws-sdk-cpp=1.11.489 + - azure-core-cpp=1.14.0 + - azure-identity-cpp=1.10.0 + - azure-storage-blobs-cpp=12.13.0 + - azure-storage-common-cpp=12.8.0 + - azure-storage-files-datalake-cpp=12.12.0 + - bcrypt=4.2.1 + - blinker=1.9.0 + - brotli=1.1.0 + - brotli-bin=1.1.0 + - brotli-python=1.1.0 + - bzip2=1.0.8 + - c-ares=1.34.4 + - ca-certificates=2025.1.31 + - cachetools=5.5.1 + - certifi=2024.12.14 + - cffi=1.17.1 + - charset-normalizer=3.4.1 + - click=8.1.8 + - cloudpickle=3.1.1 + - colorama=0.4.6 + - colorlog=6.9.0 + - contourpy=1.3.1 + - cryptography=44.0.0 + - cycler=0.12.1 + - databricks-sdk=0.43.0 + - deprecated=1.2.18 + - docker-py=7.1.0 + - entrypoints=0.4 + - flask=3.1.0 + - fonttools=4.55.8 + - freetype=2.12.1 + - frozenlist=1.5.0 + - gflags=2.2.2 + - gitdb=4.0.12 + - gitpython=3.1.44 + - glog=0.7.1 + - google-auth=2.38.0 + - graphene=3.4.3 + - graphql-core=3.2.6 + - graphql-relay=3.2.0 + - greenlet=3.1.1 + - gunicorn=23.0.0 + - h2=4.2.0 + - hpack=4.1.0 + - hyperframe=6.1.0 + - icu=75.1 + - idna=3.10 + - importlib-metadata=8.6.1 + - itsdangerous=2.2.0 + - jinja2=3.1.5 + - joblib=1.4.2 + - kiwisolver=1.4.7 + - krb5=1.21.3 + - lcms2=2.16 + - lerc=4.0.0 + - libabseil=20240722.0 + - libarrow=18.1.0 + - libarrow-acero=18.1.0 + - libarrow-dataset=18.1.0 + - libarrow-substrait=18.1.0 + - libblas=3.9.0 + - libbrotlicommon=1.1.0 + - libbrotlidec=1.1.0 + - libbrotlienc=1.1.0 + - libcblas=3.9.0 + - libcrc32c=1.1.2 + - libcurl=8.11.1 + - libcxx=19.1.7 + - libdeflate=1.23 + - libedit=3.1.20250104 + - libev=4.33 + - libevent=2.1.12 + - libffi=3.4.2 + - libgfortran=5.0.0 + - libgfortran5=13.2.0 + - libgoogle-cloud=2.34.0 + - libgoogle-cloud-storage=2.34.0 + - libgrpc=1.67.1 + - libiconv=1.17 + - libjpeg-turbo=3.0.0 + - liblapack=3.9.0 + - liblzma=5.6.4 + - liblzma-devel=5.6.4 + - libnghttp2=1.64.0 + - libopenblas=0.3.28 + - libparquet=18.1.0 + - libpng=1.6.46 + - libprotobuf=5.28.3 + - libre2-11=2024.07.02 + - libsodium=1.0.20 + - libsqlite=3.48.0 + - libssh2=1.11.1 + - libthrift=0.21.0 + - libtiff=4.7.0 + - libutf8proc=2.10.0 + - libwebp-base=1.5.0 + - libxcb=1.17.0 + - libxml2=2.13.5 + - libzlib=1.3.1 + - llvm-openmp=19.1.7 + - lz4-c=1.10.0 + - mako=1.3.9 + - markdown=3.6 + - markupsafe=3.0.2 + - matplotlib-base=3.10.0 + - mlflow=2.20.1 + - mlflow-skinny=2.20.1 + - mlflow-ui=2.20.1 + - multidict=6.1.0 + - munkres=1.1.4 + - ncurses=6.5 + - numpy=1.26.4 + - openjpeg=2.5.3 + - openssl=3.4.0 + - opentelemetry-api=1.16.0 + - opentelemetry-sdk=1.16.0 + - opentelemetry-semantic-conventions=0.37b0 + - optuna=4.2.0 + - orc=2.0.3 + - packaging=24.2 + - pandas=2.2.2 + - paramiko=3.5.1 + - pillow=11.1.0 + - pip=25.0 + - prometheus_client=0.21.1 + - prometheus_flask_exporter=0.23.1 + - propcache=0.2.1 + - protobuf=5.28.3 + - pthread-stubs=0.4 + - py4j=0.10.9.7 + - pyarrow=18.1.0 + - pyarrow-core=18.1.0 + - pyasn1=0.6.1 + - pyasn1-modules=0.4.1 + - pycparser=2.22 + - pydantic=2.10.6 + - pydantic-core=2.27.2 + - pynacl=1.5.0 + - pyopenssl=25.0.0 + - pyparsing=3.2.1 + - pysocks=1.7.1 + - pyspark=3.5.4 + - python=3.10.8 + - python-dateutil=2.9.0.post0 + - python-tzdata=2025.1 + - python_abi=3.10 + - pytz=2024.2 + - pyu2f=0.1.5 + - pywin32-on-windows=0.1.0 + - pyyaml=6.0.2 + - qhull=2020.2 + - querystring_parser=1.2.4 + - re2=2024.07.02 + - readline=8.2 + - requests=2.32.3 + - rsa=4.9 + - scikit-learn=1.6.1 + - scipy=1.15.1 + - setuptools=75.8.0 + - six=1.17.0 + - smmap=5.0.0 + - snappy=1.2.1 + - sqlalchemy=2.0.37 + - sqlparse=0.5.3 + - threadpoolctl=3.5.0 + - tk=8.6.13 + - tqdm=4.67.1 + - typing-extensions=4.12.2 + - typing_extensions=4.12.2 + - tzdata=2025a + - unicodedata2=16.0.0 + - urllib3=2.3.0 + - websocket-client=1.8.0 + - werkzeug=3.1.3 + - wheel=0.45.1 + - wrapt=1.17.2 + - xorg-libxau=1.0.12 + - xorg-libxdmcp=1.1.5 + - xz=5.6.4 + - xz-gpl-tools=5.6.4 + - xz-tools=5.6.4 + - yaml=0.2.5 + - yarl=1.18.3 + - zipp=3.21.0 + - zstandard=0.23.0 + - zstd=1.5.6 +prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix diff --git a/Chapter03/mlflow-advanced/mlflow-feature-engineering.py b/Chapter03/mlflow-advanced/mlflow-feature-engineering.py index b25fc3c..039e2f5 100644 --- a/Chapter03/mlflow-advanced/mlflow-feature-engineering.py +++ b/Chapter03/mlflow-advanced/mlflow-feature-engineering.py @@ -15,7 +15,7 @@ if __name__=="__main__": # assume you have already run 'start-mlflow-server.sh' - mlflow.set_tracking_uri("http://localhost:5000") + mlflow.set_tracking_uri("http://localhost:8000") X, y = load_wine(return_X_y=True) diff --git a/Chapter03/mlflow-advanced/mlflow.db b/Chapter03/mlflow-advanced/mlflow.db new file mode 100644 index 0000000000000000000000000000000000000000..87140bf163a7957db0a89fc3c288a59fe2a2e5cb GIT binary patch literal 225280 zcmeI*ZEV|SVh3>1mL@g*hM0Au%FOqqwN2f3lB(xAP7JJ0uX=z z1Rwwb2tWV=5P$##_DUdlmgMx6fnM^PVGADe9}fsX00Izz00bZa0SG_<0uX=z1olK= zoAq>iPHroGU0w5XPN=AQWL1;PT0|)2#e77PHzR35+)xT~cu^M01*NbaP8HV4bGb~H zZz%dk_{x^PQ7nX)WwBV0!YN77m0}?>w;_~txg3p0@@h^k-UtiQrce-NDI%10B`1iw z7KxJI%BIYG|HHpr|2y+{57{n;{Vxysj|T)G009U<00Izz00bZa0SG_<0tZQ;r}JdD zHUB{G|C1N};{gE(KmY;|fB*y_009U<00IzzzyTJZ_y2MIe}Eepn+5?0KmY;|fB*y_ z009U<00Iyo0`&gBmp$ZR-($DgOJo5L2tWV=5P$##AOHafKmY;|fWW~PIOp~E`Mc8H z-DO!6RCV_L?N2|x{pl~s`{dtw`dBn7txZZ}F;SQh#)ZjgVLFkB$#Qfp9uvsl>8Vct zQ6q6KHhceu@PqGNd%eAcFLd}%83{|ISVbdm8CR?zNYk-v9Tq-SqnZ0{gpU0S^d300Izz00bZa0SG_<0uX?}0T!6)?(6DK`@0$Q z;=jF%=js%JH_?66Or49ho6?*k=zOvJl$o-2`QLU1#w0<<3*G%j27&7Jf4k|;Sps_f z-@(4?VLu|j@PGgWAOHafKmY;|fB*y_009UW{{Lsl@C#>Hkp z00Izz00bZa0SG_<0uX=z1S|oz^SOZi`XBHAN7sM=1Rwwb2tWV=5P$##AOHaf9DD)1 z|Nr2(F}4r_5P$##AOHafKmY;|fB*y_K(GIO>|pKt|JY092_6uD00bZa0SG_<0uX=z z1Rwx`Cr_ZA@7nXV6RE46NcjH0C$EQLiy;632tWV=5P$##AOHafKmYmb{~vnT z55M4okUaz-009U<00Izz00bZa0SG_<0(&hm#B?RPx_Y(iVO18&g|MbeVNDcNIV`F< z=KCMMbMj64yZ@)==l}oO!~S}&tAP9<009U<00Izz00bZa0SG_<0uXq#0)3r{K;yXp zdjH?c9`dm7vD@Um|A!uJy-*qiAOHafKmY;|fB*y_009U<;E@ZQ^ZNVzUESU3vP|xj ztF!lSfBNz5Pk%}8lRted8kN>2rLmYOObFw`$tu4om+9P+#>bq``|l2)_6I{D=FLO8u%^n+Q0KF+&t+4|l@zyJ2(27C>u5*ON)IvRu%4 zO&8W}bu|-d#Y$PMuO*ooY08y?J*)Vbv6keZSG0NoRu;xuJ}+yUy#RYws#3UKOJ)XW zda+ER^J-Bv7+k?Ufw`s3@=7*IR5*t_U%Jj+$u6Xm*{j^;)KzYnHo*vIQ5*HVu#`J8zGkdDxwCf4AB@GAyT#r8&4}ULJetqf9Q|&0IPG}@*X6B&n%dYz)YbI_ zwc==3v#~yZe{I?xq{6$}#Hic|*Scxy4QFp7#}-=+WQ$pEDN!Kr@si}8(84pm;OkS( zTFTw#R%10>-L-SJ-yfWsV(uO{+N)uthAX{}R%;mLi)uJq<1{MwLG80AdrYkzhQ96# zo;%UB9V&D@CWDLC_nkv0{lOC_m_OJumB~7~{;Lkd0lLZMQ&-2f zk8ZraRjTg_V~!w4vg(jUM$7URUm}MSop(hrw3>~yuJJKZ9W=|jdmxt!yT*2NB(5H| zYCAUBuZ^REl9R9A%}Jbj%ve<(nnBOb#++YY3oqEE?g+K~ipT zjInnmg6Gmq7HNb~ zs3(;CsxP=2X}a6E4_Ra%a<4kR;|=mmqs(JW)@yvO<4!=p3uxOU-0EK%_MEDXdbU!E7+aTQGf8v8Rl-lP;>>mo*s4x zx&lA{P;bkqzP~yKI~$ZO@&gXUwpdp0edt|)P(Sq`YVy|W{F)+G2Q)tBTG6{5I%H7q znAEOd*VCsQ)UH5EZJqz|g-X6Q0J@J6Ha1rdqTAr?#=%(H9&Cs9jeewFASh^X%&7fqQVSUd$Nh z{Ec^U+a-$oz-A-1Ro#6Mu?1BJ)B4$dJ#cRhD6RpbmwCs-dfxT;|JnaTUsu=9x>DW` zdOu*74*hNCZ#sX!=f8Wnz`v1T>))Y&J@l2HK+nGf-VeP0$S-s41#WMh^#}XEGGRn! zj`FfzRz!`j?mo`d&opp!#;7Hvn$95GKDjpR3nor7-?o+SF4%e=Xi%a%DlN)dXDcJH z>It{q6yfK6!Q{zija)4-W-a*d^gZVfo;=BX$2|A2R$G63&hgY*+x~!hVR0!*<^XN; zfOLwFTqop9);ymhu-Z~hM_DpkRZBRZx{zF5T;YbILyk-oja;b~IC3Slm`Y~2X3=bi zXLGZfjo7ssk8rg35w2QGG7Up#mPRE_zo(B3m#?Pd?EiZ|^Sw#IZ+zFC`A zGyDSDtoN|zPukvBA6dkB!WX=Fy4j#$9wN-k!20r;?bT_2@bqbBr(*P=dPsfgw;VmE zzMh`2o6nq6Rj#`Nt~2%t=Si2bd)3;UPDwd-bX%j>3*Ob@^;(#duw=K|erd`dyxqs_ z9I_4CMm&ut_Z4)dVBL-&lVy$-ip1MyUCb1r2BZ3C&&z(mc2|MUPt_}>NMsx`ZdDkQ zd7+&PUiHg|^pfz!>Ay-~KA@+1SJ_xD_TxvP`d8ntoxyBeA1H^LbeG6wM0InWe*j@%~V z?3Y}0tz(+gUA}d{#ke6hpIS_j8)DYcz&g@9!?e3$#e3`G_rg{(Ko_Zw)Bl+A-pV$#ex)0NeX?P zb7XEqD3O=zM&psZnp2B6!h*CZ6hv8yRA1YmMWT^bTTqW|e*XVQ9`>Vy(xBK52tWV= z5P$##AOHafKmY;|fWVU_FxZ(6w3-Q^_y0Xdo-7w^Ed(F{0SG_<0uX=z1Rwwb2teRT z6QKA1asB_KwK}#J0uX=z1Rwwb2tWV=5P$##o-_gD{r`urc*uV|AOHafKmY;|fB*y_ z009U<00M0b+zB&B5&`l~dQbn(wRF(ayK_yu9#-kQ+{2nKg~=Bbsd89UbH=;lRr>00Izz00bZa0SG_< z0uX?};~;?R|Hq+~P)7(r00Izz00bZa0SG_<0uX?}!wKN}|Ka?Q8Uhf200bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_u#W^7QkQ4R&mj33*hlq3RUiNX2tWV= z5P$##AOHafKmY;|*aHE4{{J2zA`1vW00Izz00bZa0SG_<0uX?}J{Q3I|Mz(Vp?VO2 z00bZa0SG_<0uX=z1Rwx`JrKb4{~jPB3kW~}0uX=z1Rwwb2tWV=5P-lw7r^!ZK5rmY z4+0Q?00bZa0SG_<0uX=z1R$^n0(k%b9v~tM2tWV=5P$##AOHafKmY;|fWST%!2SPy z-ax1x1Rwwb2tWV=5P$##AOHafKwu99=>7i=_D3G}C*&6%5P$##AOHafKmY;|fB*y_ z009VmX#(L6Z{P8>R#Z3TvL>uYqte=>G!_$u31M8AoED}NiI^-$*WxjO{GC3-c#oe* z>qRv>8jZDzOz;19us`;&KW2aarEMSzh5!U0009U<00Izz00bZa0SG{#CUC~vmuAdq zfL53F9YfyZ=>VMrXfavP+d1Sp#`Jpv9|R=-Uj-um?ymQ`{;=b1Z+GY4baXTQoo{uX z?fu{0f9QSnsUOxVVlMoX>-oWZy1lXJ51u%|+==VLnkwtUx<-G-=d!8fN{U-ao?lFH z19a5@H{25#xGrxEaIYk@bC;6Y;jxKmh|4UkaGBM`#SyM2uqmh&xfVPfjYdOsb-7aD zD-}hmg~Z34>E@O)%PZOBLS}^{Me(KU+?DJ?I+?x7T~1x)hK*83xN0^=o)?y~sfCLf z8ezB=z-3bxQrT2yF11W!Yg|33C$#lBNtwyBFQ##RG1~iH($?NInXRKa+S|6C9HZ((Y15s_Q94Hs zJE@DQtm8;#uMl^B;UYaW*^hMcO>;v?zR-FXHA=R2QM!Ek8^7%fPW3UjY(1+al!ZL$ z&eouBKJ9MMe_Ro1Pcc{7j^1a^`GT{3&BhHYhnV>{Om6qQ?K+a-DFpVdz;-$@T=lJzT#Y%{VBe~Swa$g^cGcLfL1CmCU@tYKy-ZG1 zNu}+lsnra4Yr#0{udW<;Q2p!)oK>-hgd$gbw9qDJj@6R*Sas>tUC)v~*x%25*F1Nq zuCM+*>Nt)JR972k4b~IamCK~_%Ed8yHawCPgna#yXo6f3IgTTevQ7@OwPQuxm3_4i zw&rd#F1M0;&3VYG@&peEKmY;|fB*y_009U<00MS_yD4AS%$xn`yqZ&sH%8Us zdN^N{WOZ~y(ez?@>)NdW`t25qG;o$1X!R{*10&pkP}Y^4AnJTc&^Krz?Yb%p<${?l zjZx7>^9x19vMlJbk@Hw|Y$6;T3r8oovFO=o^z7KgXlyJRjZM)MMvaWmsHYhUqBu1f zm!wH)e0+RLk`uz@_;{TBpB$SK*5n*bkyC}uVp${6Zw-{T^bPVICb>#Mq+e!YtTS`0 zR?*zz(sh+4EtO?SA@$GEnAK-AKrITAT;@sSiYn7E%gow9Bdu8!{epM1q=EQoEIK+( z!{oxIQM6brNCHi6ER1dmd861(rL0#3)r?iGGkIo4xP6=CP%X&VC`BZ$60MD_8kI3x z&DsVlCU2`qY;ZrwvR5G%q(L|CTSzV~8>h0UN zUvJg)vt3^Dl~B(Hbo^=by@R{@N!EBdHd%;yz7 zDo9h}L~JUNlP0Ie60#t~MIjy?pInnC<7;biF(=CtQ7O@?lyjZlSkP4p{eFqXrMcu{ ztHff5H@5CdOurAn-0H9(Z3+cZmLk^k9O>PavK*ER>qV@^u(pIZN#u@Kmt%eoS zSVAc;=3Awr_y2u|-}W4S`|!WB1MHu&-(>&MzMEqi0uX=z1Rwwb2tWV=5P$##An;`f z%y?(gj5S-ZYtE_F#jY>z4b@Uxa}BN1blFSXJ-rZ>)+VK~m?%sL$Fr7%mWI4JP zj|t@Ow0FuItCeV_rqc$kO6_crj?NggO4kwb4yFT^51j;Pl>ndr_hs~AlnDU{KmY;| vfB*y_009U<00Izrv;uhl|D*LrX%K(_1Rwwb2tWV=5P$##AOL|cL*V}bWAsf2 literal 0 HcmV?d00001 diff --git a/Chapter03/mlflow-advanced/start-mlflow-server.sh b/Chapter03/mlflow-advanced/start-mlflow-server.sh index 822e78a..49b0ea2 100644 --- a/Chapter03/mlflow-advanced/start-mlflow-server.sh +++ b/Chapter03/mlflow-advanced/start-mlflow-server.sh @@ -1,5 +1,6 @@ mlflow server \ --backend-store-uri sqlite:///mlflow.db \ --default-artifact-root ./artifacts \ - --host 0.0.0.0 + --host 0.0.0.0 \ + --port 8000 diff --git a/Chapter03/pipelines/sparkmllib_pipeline.py b/Chapter03/pipelines/sparkmllib_pipeline.py index 14535e8..c4402b2 100644 --- a/Chapter03/pipelines/sparkmllib_pipeline.py +++ b/Chapter03/pipelines/sparkmllib_pipeline.py @@ -16,7 +16,7 @@ # Get the data and place it in a spark dataframe data = spark.read.format("csv").option("sep", ";").option("inferSchema", "true").option("header", "true").load( - "../../chapter1/stream-classifier/data/bank/bank.csv") + "../Chapter01/classifying/bank_data/bank.csv") # map target to numerical category data = data.withColumn('label', f.when((f.col("y") == "yes"), 1).otherwise(0)) From c536d89bef0b0e318a4157868d08160965640c3f Mon Sep 17 00:00:00 2001 From: andymc629 Date: Fri, 7 Feb 2025 20:20:06 +0000 Subject: [PATCH 3/6] This ch3 env builds and works for all the cases tested now. --- Chapter03/pipelines/sklearn_pipeline.py | 7 +++++++ Chapter03/pipelines/sparkmllib_pipeline.py | 2 ++ 2 files changed, 9 insertions(+) diff --git a/Chapter03/pipelines/sklearn_pipeline.py b/Chapter03/pipelines/sklearn_pipeline.py index 479b3be..8be03dc 100644 --- a/Chapter03/pipelines/sklearn_pipeline.py +++ b/Chapter03/pipelines/sklearn_pipeline.py @@ -3,6 +3,8 @@ from sklearn.impute import SimpleImputer from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +import pandas as pd numeric_features = ['age', 'balance'] numeric_transformer = Pipeline(steps=[ @@ -22,5 +24,10 @@ clf_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', LogisticRegression())]) +df = pd.read_csv('../../Chapter01/classifying/bank_data/bank.csv', delimiter=';', decimal=',') +X, y = df.drop('y', axis=1), df['y'].apply(lambda x: 1 if x == 'yes' else 0) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) +# You need ot get clf_pipeline.fit(X_train, y_train) +print(clf_pipeline.predict(X_test)) diff --git a/Chapter03/pipelines/sparkmllib_pipeline.py b/Chapter03/pipelines/sparkmllib_pipeline.py index c4402b2..c49e586 100644 --- a/Chapter03/pipelines/sparkmllib_pipeline.py +++ b/Chapter03/pipelines/sparkmllib_pipeline.py @@ -68,3 +68,5 @@ # Define the entire pipeline and fit on the train data and transform on the test data clfPipeline = Pipeline().setStages(stages).fit(trainingData) clfPipeline.transform(testData) + + print(clfPipeline.transform(testData).show()) From 09ebf02058ff24cb21362d747c05f7bf276a09a9 Mon Sep 17 00:00:00 2001 From: andymc629 Date: Fri, 7 Feb 2025 20:21:13 +0000 Subject: [PATCH 4/6] Fixing the env yaml definition. --- Chapter03/mlewp-chapter03-hotfix copy.yml.txt | 211 --------- Chapter03/mlewp-chapter03-hotfix.yml | 211 --------- Chapter03/mlewp-chapter03.yml | 428 +++++++++--------- 3 files changed, 203 insertions(+), 647 deletions(-) delete mode 100644 Chapter03/mlewp-chapter03-hotfix copy.yml.txt delete mode 100644 Chapter03/mlewp-chapter03-hotfix.yml diff --git a/Chapter03/mlewp-chapter03-hotfix copy.yml.txt b/Chapter03/mlewp-chapter03-hotfix copy.yml.txt deleted file mode 100644 index d9541a3..0000000 --- a/Chapter03/mlewp-chapter03-hotfix copy.yml.txt +++ /dev/null @@ -1,211 +0,0 @@ -name: mlewp-chapter03-hotfix -channels: - - conda-forge -dependencies: - - aiohappyeyeballs=2.4.4 - - aiohttp=3.11.11 - - aiosignal=1.3.2 - - alembic=1.14.1 - - annotated-types=0.7.0 - - async-timeout=5.0.1 - - attrs=25.1.0 - - aws-c-auth=0.8.1 - - aws-c-cal=0.8.1 - - aws-c-common=0.10.6 - - aws-c-compression=0.3.0 - - aws-c-event-stream=0.5.0 - - aws-c-http=0.9.2 - - aws-c-io=0.15.3 - - aws-c-mqtt=0.11.0 - - aws-c-s3=0.7.9 - - aws-c-sdkutils=0.2.2 - - aws-checksums=0.2.2 - - aws-crt-cpp=0.29.9 - - aws-sdk-cpp=1.11.489 - - azure-core-cpp=1.14.0 - - azure-identity-cpp=1.10.0 - - azure-storage-blobs-cpp=12.13.0 - - azure-storage-common-cpp=12.8.0 - - azure-storage-files-datalake-cpp=12.12.0 - - bcrypt=4.2.1 - - blinker=1.9.0 - - brotli=1.1.0 - - brotli-bin=1.1.0 - - brotli-python=1.1.0 - - bzip2=1.0.8 - - c-ares=1.34.4 - - ca-certificates=2025.1.31 - - cachetools=5.5.1 - - certifi=2024.12.14 - - cffi=1.17.1 - - charset-normalizer=3.4.1 - - click=8.1.8 - - cloudpickle=3.1.1 - - colorama=0.4.6 - - colorlog=6.9.0 - - contourpy=1.3.1 - - cryptography=44.0.0 - - cycler=0.12.1 - - databricks-sdk=0.43.0 - - deprecated=1.2.18 - - docker-py=7.1.0 - - entrypoints=0.4 - - flask=3.1.0 - - fonttools=4.55.8 - - freetype=2.12.1 - - frozenlist=1.5.0 - - gflags=2.2.2 - - gitdb=4.0.12 - - gitpython=3.1.44 - - glog=0.7.1 - - google-auth=2.38.0 - - graphene=3.4.3 - - graphql-core=3.2.6 - - graphql-relay=3.2.0 - - greenlet=3.1.1 - - gunicorn=23.0.0 - - h2=4.2.0 - - hpack=4.1.0 - - hyperframe=6.1.0 - - icu=75.1 - - idna=3.10 - - importlib-metadata=8.6.1 - - itsdangerous=2.2.0 - - jinja2=3.1.5 - - joblib=1.4.2 - - kiwisolver=1.4.7 - - krb5=1.21.3 - - lcms2=2.16 - - lerc=4.0.0 - - libabseil=20240722.0 - - libarrow=18.1.0 - - libarrow-acero=18.1.0 - - libarrow-dataset=18.1.0 - - libarrow-substrait=18.1.0 - - libblas=3.9.0 - - libbrotlicommon=1.1.0 - - libbrotlidec=1.1.0 - - libbrotlienc=1.1.0 - - libcblas=3.9.0 - - libcrc32c=1.1.2 - - libcurl=8.11.1 - - libcxx=19.1.7 - - libdeflate=1.23 - - libedit=3.1.20250104 - - libev=4.33 - - libevent=2.1.12 - - libffi=3.4.2 - - libgfortran=5.0.0 - - libgfortran5=13.2.0 - - libgoogle-cloud=2.34.0 - - libgoogle-cloud-storage=2.34.0 - - libgrpc=1.67.1 - - libiconv=1.17 - - libjpeg-turbo=3.0.0 - - liblapack=3.9.0 - - liblzma=5.6.4 - - liblzma-devel=5.6.4 - - libnghttp2=1.64.0 - - libopenblas=0.3.28 - - libparquet=18.1.0 - - libpng=1.6.46 - - libprotobuf=5.28.3 - - libre2-11=2024.07.02 - - libsodium=1.0.20 - - libsqlite=3.48.0 - - libssh2=1.11.1 - - libthrift=0.21.0 - - libtiff=4.7.0 - - libutf8proc=2.10.0 - - libwebp-base=1.5.0 - - libxcb=1.17.0 - - libxml2=2.13.5 - - libzlib=1.3.1 - - llvm-openmp=19.1.7 - - lz4-c=1.10.0 - - mako=1.3.9 - - markdown=3.6 - - markupsafe=3.0.2 - - matplotlib-base=3.10.0 - - mlflow=2.20.1 - - mlflow-skinny=2.20.1 - - mlflow-ui=2.20.1 - - multidict=6.1.0 - - munkres=1.1.4 - - ncurses=6.5 - - numpy=1.26.4 - - openjpeg=2.5.3 - - openssl=3.4.0 - - opentelemetry-api=1.16.0 - - opentelemetry-sdk=1.16.0 - - opentelemetry-semantic-conventions=0.37b0 - - optuna=4.2.0 - - orc=2.0.3 - - packaging=24.2 - - pandas=2.2.2 - - paramiko=3.5.1 - - pillow=11.1.0 - - pip=25.0 - - prometheus_client=0.21.1 - - prometheus_flask_exporter=0.23.1 - - propcache=0.2.1 - - protobuf=5.28.3 - - pthread-stubs=0.4 - - py4j=0.10.9.7 - - pyarrow=18.1.0 - - pyarrow-core=18.1.0 - - pyasn1=0.6.1 - - pyasn1-modules=0.4.1 - - pycparser=2.22 - - pydantic=2.10.6 - - pydantic-core=2.27.2 - - pynacl=1.5.0 - - pyopenssl=25.0.0 - - pyparsing=3.2.1 - - pysocks=1.7.1 - - pyspark=3.5.4 - - python=3.10.8 - - python-dateutil=2.9.0.post0 - - python-tzdata=2025.1 - - python_abi=3.10 - - pytz=2024.2 - - pyu2f=0.1.5 - - pywin32-on-windows=0.1.0 - - pyyaml=6.0.2 - - qhull=2020.2 - - querystring_parser=1.2.4 - - re2=2024.07.02 - - readline=8.2 - - requests=2.32.3 - - rsa=4.9 - - scikit-learn=1.6.1 - - scipy=1.15.1 - - setuptools=75.8.0 - - six=1.17.0 - - smmap=5.0.0 - - snappy=1.2.1 - - sqlalchemy=2.0.37 - - sqlparse=0.5.3 - - threadpoolctl=3.5.0 - - tk=8.6.13 - - tqdm=4.67.1 - - typing-extensions=4.12.2 - - typing_extensions=4.12.2 - - tzdata=2025a - - unicodedata2=16.0.0 - - urllib3=2.3.0 - - websocket-client=1.8.0 - - werkzeug=3.1.3 - - wheel=0.45.1 - - wrapt=1.17.2 - - xorg-libxau=1.0.12 - - xorg-libxdmcp=1.1.5 - - xz=5.6.4 - - xz-gpl-tools=5.6.4 - - xz-tools=5.6.4 - - yaml=0.2.5 - - yarl=1.18.3 - - zipp=3.21.0 - - zstandard=0.23.0 - - zstd=1.5.6 -prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix diff --git a/Chapter03/mlewp-chapter03-hotfix.yml b/Chapter03/mlewp-chapter03-hotfix.yml deleted file mode 100644 index d9541a3..0000000 --- a/Chapter03/mlewp-chapter03-hotfix.yml +++ /dev/null @@ -1,211 +0,0 @@ -name: mlewp-chapter03-hotfix -channels: - - conda-forge -dependencies: - - aiohappyeyeballs=2.4.4 - - aiohttp=3.11.11 - - aiosignal=1.3.2 - - alembic=1.14.1 - - annotated-types=0.7.0 - - async-timeout=5.0.1 - - attrs=25.1.0 - - aws-c-auth=0.8.1 - - aws-c-cal=0.8.1 - - aws-c-common=0.10.6 - - aws-c-compression=0.3.0 - - aws-c-event-stream=0.5.0 - - aws-c-http=0.9.2 - - aws-c-io=0.15.3 - - aws-c-mqtt=0.11.0 - - aws-c-s3=0.7.9 - - aws-c-sdkutils=0.2.2 - - aws-checksums=0.2.2 - - aws-crt-cpp=0.29.9 - - aws-sdk-cpp=1.11.489 - - azure-core-cpp=1.14.0 - - azure-identity-cpp=1.10.0 - - azure-storage-blobs-cpp=12.13.0 - - azure-storage-common-cpp=12.8.0 - - azure-storage-files-datalake-cpp=12.12.0 - - bcrypt=4.2.1 - - blinker=1.9.0 - - brotli=1.1.0 - - brotli-bin=1.1.0 - - brotli-python=1.1.0 - - bzip2=1.0.8 - - c-ares=1.34.4 - - ca-certificates=2025.1.31 - - cachetools=5.5.1 - - certifi=2024.12.14 - - cffi=1.17.1 - - charset-normalizer=3.4.1 - - click=8.1.8 - - cloudpickle=3.1.1 - - colorama=0.4.6 - - colorlog=6.9.0 - - contourpy=1.3.1 - - cryptography=44.0.0 - - cycler=0.12.1 - - databricks-sdk=0.43.0 - - deprecated=1.2.18 - - docker-py=7.1.0 - - entrypoints=0.4 - - flask=3.1.0 - - fonttools=4.55.8 - - freetype=2.12.1 - - frozenlist=1.5.0 - - gflags=2.2.2 - - gitdb=4.0.12 - - gitpython=3.1.44 - - glog=0.7.1 - - google-auth=2.38.0 - - graphene=3.4.3 - - graphql-core=3.2.6 - - graphql-relay=3.2.0 - - greenlet=3.1.1 - - gunicorn=23.0.0 - - h2=4.2.0 - - hpack=4.1.0 - - hyperframe=6.1.0 - - icu=75.1 - - idna=3.10 - - importlib-metadata=8.6.1 - - itsdangerous=2.2.0 - - jinja2=3.1.5 - - joblib=1.4.2 - - kiwisolver=1.4.7 - - krb5=1.21.3 - - lcms2=2.16 - - lerc=4.0.0 - - libabseil=20240722.0 - - libarrow=18.1.0 - - libarrow-acero=18.1.0 - - libarrow-dataset=18.1.0 - - libarrow-substrait=18.1.0 - - libblas=3.9.0 - - libbrotlicommon=1.1.0 - - libbrotlidec=1.1.0 - - libbrotlienc=1.1.0 - - libcblas=3.9.0 - - libcrc32c=1.1.2 - - libcurl=8.11.1 - - libcxx=19.1.7 - - libdeflate=1.23 - - libedit=3.1.20250104 - - libev=4.33 - - libevent=2.1.12 - - libffi=3.4.2 - - libgfortran=5.0.0 - - libgfortran5=13.2.0 - - libgoogle-cloud=2.34.0 - - libgoogle-cloud-storage=2.34.0 - - libgrpc=1.67.1 - - libiconv=1.17 - - libjpeg-turbo=3.0.0 - - liblapack=3.9.0 - - liblzma=5.6.4 - - liblzma-devel=5.6.4 - - libnghttp2=1.64.0 - - libopenblas=0.3.28 - - libparquet=18.1.0 - - libpng=1.6.46 - - libprotobuf=5.28.3 - - libre2-11=2024.07.02 - - libsodium=1.0.20 - - libsqlite=3.48.0 - - libssh2=1.11.1 - - libthrift=0.21.0 - - libtiff=4.7.0 - - libutf8proc=2.10.0 - - libwebp-base=1.5.0 - - libxcb=1.17.0 - - libxml2=2.13.5 - - libzlib=1.3.1 - - llvm-openmp=19.1.7 - - lz4-c=1.10.0 - - mako=1.3.9 - - markdown=3.6 - - markupsafe=3.0.2 - - matplotlib-base=3.10.0 - - mlflow=2.20.1 - - mlflow-skinny=2.20.1 - - mlflow-ui=2.20.1 - - multidict=6.1.0 - - munkres=1.1.4 - - ncurses=6.5 - - numpy=1.26.4 - - openjpeg=2.5.3 - - openssl=3.4.0 - - opentelemetry-api=1.16.0 - - opentelemetry-sdk=1.16.0 - - opentelemetry-semantic-conventions=0.37b0 - - optuna=4.2.0 - - orc=2.0.3 - - packaging=24.2 - - pandas=2.2.2 - - paramiko=3.5.1 - - pillow=11.1.0 - - pip=25.0 - - prometheus_client=0.21.1 - - prometheus_flask_exporter=0.23.1 - - propcache=0.2.1 - - protobuf=5.28.3 - - pthread-stubs=0.4 - - py4j=0.10.9.7 - - pyarrow=18.1.0 - - pyarrow-core=18.1.0 - - pyasn1=0.6.1 - - pyasn1-modules=0.4.1 - - pycparser=2.22 - - pydantic=2.10.6 - - pydantic-core=2.27.2 - - pynacl=1.5.0 - - pyopenssl=25.0.0 - - pyparsing=3.2.1 - - pysocks=1.7.1 - - pyspark=3.5.4 - - python=3.10.8 - - python-dateutil=2.9.0.post0 - - python-tzdata=2025.1 - - python_abi=3.10 - - pytz=2024.2 - - pyu2f=0.1.5 - - pywin32-on-windows=0.1.0 - - pyyaml=6.0.2 - - qhull=2020.2 - - querystring_parser=1.2.4 - - re2=2024.07.02 - - readline=8.2 - - requests=2.32.3 - - rsa=4.9 - - scikit-learn=1.6.1 - - scipy=1.15.1 - - setuptools=75.8.0 - - six=1.17.0 - - smmap=5.0.0 - - snappy=1.2.1 - - sqlalchemy=2.0.37 - - sqlparse=0.5.3 - - threadpoolctl=3.5.0 - - tk=8.6.13 - - tqdm=4.67.1 - - typing-extensions=4.12.2 - - typing_extensions=4.12.2 - - tzdata=2025a - - unicodedata2=16.0.0 - - urllib3=2.3.0 - - websocket-client=1.8.0 - - werkzeug=3.1.3 - - wheel=0.45.1 - - wrapt=1.17.2 - - xorg-libxau=1.0.12 - - xorg-libxdmcp=1.1.5 - - xz=5.6.4 - - xz-gpl-tools=5.6.4 - - xz-tools=5.6.4 - - yaml=0.2.5 - - yarl=1.18.3 - - zipp=3.21.0 - - zstandard=0.23.0 - - zstd=1.5.6 -prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix diff --git a/Chapter03/mlewp-chapter03.yml b/Chapter03/mlewp-chapter03.yml index 9ec4025..d9541a3 100644 --- a/Chapter03/mlewp-chapter03.yml +++ b/Chapter03/mlewp-chapter03.yml @@ -1,233 +1,211 @@ -name: mlewp-chapter03 +name: mlewp-chapter03-hotfix channels: - conda-forge dependencies: - - appnope=0.1.3 - - asttokens=2.2.1 - - backcall=0.2.0 - - backports=1.0 - - backports.functools_lru_cache=1.6.4 + - aiohappyeyeballs=2.4.4 + - aiohttp=3.11.11 + - aiosignal=1.3.2 + - alembic=1.14.1 + - annotated-types=0.7.0 + - async-timeout=5.0.1 + - attrs=25.1.0 + - aws-c-auth=0.8.1 + - aws-c-cal=0.8.1 + - aws-c-common=0.10.6 + - aws-c-compression=0.3.0 + - aws-c-event-stream=0.5.0 + - aws-c-http=0.9.2 + - aws-c-io=0.15.3 + - aws-c-mqtt=0.11.0 + - aws-c-s3=0.7.9 + - aws-c-sdkutils=0.2.2 + - aws-checksums=0.2.2 + - aws-crt-cpp=0.29.9 + - aws-sdk-cpp=1.11.489 + - azure-core-cpp=1.14.0 + - azure-identity-cpp=1.10.0 + - azure-storage-blobs-cpp=12.13.0 + - azure-storage-common-cpp=12.8.0 + - azure-storage-files-datalake-cpp=12.12.0 + - bcrypt=4.2.1 + - blinker=1.9.0 + - brotli=1.1.0 + - brotli-bin=1.1.0 + - brotli-python=1.1.0 - bzip2=1.0.8 - - ca-certificates=2022.12.7 - - comm=0.1.3 - - debugpy=1.6.7 - - decorator=5.1.1 - - executing=1.2.0 - - importlib-metadata=6.6.0 - - importlib_metadata=6.6.0 - - ipykernel=6.22.0 - - ipython=8.13.1 - - jedi=0.18.2 - - jupyter_client=8.2.0 - - jupyter_core=5.3.0 - - libcxx=16.0.2 + - c-ares=1.34.4 + - ca-certificates=2025.1.31 + - cachetools=5.5.1 + - certifi=2024.12.14 + - cffi=1.17.1 + - charset-normalizer=3.4.1 + - click=8.1.8 + - cloudpickle=3.1.1 + - colorama=0.4.6 + - colorlog=6.9.0 + - contourpy=1.3.1 + - cryptography=44.0.0 + - cycler=0.12.1 + - databricks-sdk=0.43.0 + - deprecated=1.2.18 + - docker-py=7.1.0 + - entrypoints=0.4 + - flask=3.1.0 + - fonttools=4.55.8 + - freetype=2.12.1 + - frozenlist=1.5.0 + - gflags=2.2.2 + - gitdb=4.0.12 + - gitpython=3.1.44 + - glog=0.7.1 + - google-auth=2.38.0 + - graphene=3.4.3 + - graphql-core=3.2.6 + - graphql-relay=3.2.0 + - greenlet=3.1.1 + - gunicorn=23.0.0 + - h2=4.2.0 + - hpack=4.1.0 + - hyperframe=6.1.0 + - icu=75.1 + - idna=3.10 + - importlib-metadata=8.6.1 + - itsdangerous=2.2.0 + - jinja2=3.1.5 + - joblib=1.4.2 + - kiwisolver=1.4.7 + - krb5=1.21.3 + - lcms2=2.16 + - lerc=4.0.0 + - libabseil=20240722.0 + - libarrow=18.1.0 + - libarrow-acero=18.1.0 + - libarrow-dataset=18.1.0 + - libarrow-substrait=18.1.0 + - libblas=3.9.0 + - libbrotlicommon=1.1.0 + - libbrotlidec=1.1.0 + - libbrotlienc=1.1.0 + - libcblas=3.9.0 + - libcrc32c=1.1.2 + - libcurl=8.11.1 + - libcxx=19.1.7 + - libdeflate=1.23 + - libedit=3.1.20250104 + - libev=4.33 + - libevent=2.1.12 - libffi=3.4.2 - - libsodium=1.0.18 - - libsqlite=3.40.0 - - libzlib=1.2.13 - - matplotlib-inline=0.1.6 - - ncurses=6.3 - - nest-asyncio=1.5.6 - - openssl=3.1.0 - - packaging=23.1 - - parso=0.8.3 - - pexpect=4.8.0 - - pickleshare=0.7.5 - - pip=23.1.2 - - platformdirs=3.5.0 - - prompt-toolkit=3.0.38 - - prompt_toolkit=3.0.38 - - psutil=5.9.5 - - ptyprocess=0.7.0 - - pure_eval=0.2.2 - - pygments=2.15.1 + - libgfortran=5.0.0 + - libgfortran5=13.2.0 + - libgoogle-cloud=2.34.0 + - libgoogle-cloud-storage=2.34.0 + - libgrpc=1.67.1 + - libiconv=1.17 + - libjpeg-turbo=3.0.0 + - liblapack=3.9.0 + - liblzma=5.6.4 + - liblzma-devel=5.6.4 + - libnghttp2=1.64.0 + - libopenblas=0.3.28 + - libparquet=18.1.0 + - libpng=1.6.46 + - libprotobuf=5.28.3 + - libre2-11=2024.07.02 + - libsodium=1.0.20 + - libsqlite=3.48.0 + - libssh2=1.11.1 + - libthrift=0.21.0 + - libtiff=4.7.0 + - libutf8proc=2.10.0 + - libwebp-base=1.5.0 + - libxcb=1.17.0 + - libxml2=2.13.5 + - libzlib=1.3.1 + - llvm-openmp=19.1.7 + - lz4-c=1.10.0 + - mako=1.3.9 + - markdown=3.6 + - markupsafe=3.0.2 + - matplotlib-base=3.10.0 + - mlflow=2.20.1 + - mlflow-skinny=2.20.1 + - mlflow-ui=2.20.1 + - multidict=6.1.0 + - munkres=1.1.4 + - ncurses=6.5 + - numpy=1.26.4 + - openjpeg=2.5.3 + - openssl=3.4.0 + - opentelemetry-api=1.16.0 + - opentelemetry-sdk=1.16.0 + - opentelemetry-semantic-conventions=0.37b0 + - optuna=4.2.0 + - orc=2.0.3 + - packaging=24.2 + - pandas=2.2.2 + - paramiko=3.5.1 + - pillow=11.1.0 + - pip=25.0 + - prometheus_client=0.21.1 + - prometheus_flask_exporter=0.23.1 + - propcache=0.2.1 + - protobuf=5.28.3 + - pthread-stubs=0.4 + - py4j=0.10.9.7 + - pyarrow=18.1.0 + - pyarrow-core=18.1.0 + - pyasn1=0.6.1 + - pyasn1-modules=0.4.1 + - pycparser=2.22 + - pydantic=2.10.6 + - pydantic-core=2.27.2 + - pynacl=1.5.0 + - pyopenssl=25.0.0 + - pyparsing=3.2.1 + - pysocks=1.7.1 + - pyspark=3.5.4 - python=3.10.8 - - python-dateutil=2.8.2 + - python-dateutil=2.9.0.post0 + - python-tzdata=2025.1 - python_abi=3.10 - - pyzmq=25.0.2 + - pytz=2024.2 + - pyu2f=0.1.5 + - pywin32-on-windows=0.1.0 + - pyyaml=6.0.2 + - qhull=2020.2 + - querystring_parser=1.2.4 + - re2=2024.07.02 - readline=8.2 - - setuptools=67.7.2 - - six=1.16.0 - - stack_data=0.6.2 - - tk=8.6.12 - - traitlets=5.9.0 - - typing-extensions=4.5.0 - - typing_extensions=4.5.0 - - wcwidth=0.2.6 - - wheel=0.40.0 - - xz=5.2.6 - - zeromq=4.3.4 - - zipp=3.15.0 - - pip: - - absl-py==1.4.0 - - alembic==1.10.4 - - alibi-detect==0.11.2 - - anyio==3.6.2 - - argon2-cffi==21.3.0 - - argon2-cffi-bindings==21.2.0 - - arrow==1.2.3 - - astunparse==1.6.3 - - attrs==23.1.0 - - auto-sklearn==0.15.0 - - beautifulsoup4==4.12.2 - - bleach==6.0.0 - - blinker==1.6.2 - - cachetools==5.3.0 - - catalogue==2.0.8 - - certifi==2022.12.7 - - cffi==1.15.1 - - charset-normalizer==3.1.0 - - click==8.1.3 - - cloudpickle==2.2.1 - - cmaes==0.9.1 - - colorlog==6.7.0 - - configspace==0.4.21 - - contourpy==1.0.7 - - cycler==0.11.0 - - cython==0.29.34 - - dask==2023.4.1 - - databricks-cli==0.17.6 - - defusedxml==0.7.1 - - dill==0.3.6 - - distributed==2023.4.1 - - distro==1.8.0 - - docker==6.0.1 - - emcee==3.1.4 - - entrypoints==0.4 - - evidently==0.3.1 - - fastjsonschema==2.16.3 - - filelock==3.12.0 - - flask==2.3.1 - - flatbuffers==23.3.3 - - fonttools==4.39.3 - - fqdn==1.5.1 - - fsspec==2023.4.0 - - future==0.18.3 - - gast==0.4.0 - - gitdb==4.0.10 - - gitpython==3.1.31 - - google-auth==2.17.3 - - google-auth-oauthlib==1.0.0 - - google-pasta==0.2.0 - - grpcio==1.54.0 - - gunicorn==20.1.0 - - h5py==3.8.0 - - huggingface-hub==0.14.1 - - hyperopt==0.2.7 - - idna==3.4 - - imageio==2.28.0 - - ipython-genutils==0.2.0 - - isoduration==20.11.0 - - itsdangerous==2.1.2 - - jax==0.4.8 - - jinja2==3.1.2 - - joblib==1.2.0 - - jsonpointer==2.3 - - jsonschema==4.17.3 - - jupyter-events==0.6.3 - - jupyter-server==2.5.0 - - jupyter-server-terminals==0.4.4 - - jupyterlab-pygments==0.2.2 - - keras==2.12.0 - - kiwisolver==1.4.4 - - lazy-loader==0.2 - - liac-arff==2.5.0 - - libclang==16.0.0 - - llvmlite==0.39.1 - - locket==1.0.0 - - mako==1.2.4 - - markdown==3.4.3 - - markupsafe==2.1.2 - - matplotlib==3.7.1 - - mistune==2.0.5 - - ml-dtypes==0.1.0 - - mlflow==2.3.1 - - msgpack==1.0.5 - - nbclassic==0.5.6 - - nbclient==0.7.4 - - nbconvert==7.3.1 - - nbformat==5.8.0 - - networkx==3.1 - - nltk==3.8.1 - - notebook==6.5.4 - - notebook-shim==0.2.3 - - numba==0.56.4 - - numpy==1.23.5 - - oauthlib==3.2.2 - - opencv-python==4.7.0.72 - - opt-einsum==3.3.0 - - optuna==3.1.1 - - pandas==2.0.1 - - pandocfilters==1.5.0 - - partd==1.4.0 - - patsy==0.5.3 - - pillow==9.5.0 - - plotly==5.14.1 - - prometheus-client==0.16.0 - - protobuf==4.22.3 - - py4j==0.10.9.7 - - pyarrow==11.0.0 - - pyasn1==0.5.0 - - pyasn1-modules==0.3.0 - - pycparser==2.21 - - pydantic==1.10.7 - - pyjwt==2.6.0 - - pynisher==0.6.4 - - pynndescent==0.5.10 - - pyparsing==3.0.9 - - pyrfr==0.8.3 - - pyrsistent==0.19.3 - - python-json-logger==2.0.7 - - pytz==2023.3 - - pywavelets==1.4.1 - - pyyaml==6.0 - - querystring-parser==1.2.4 - - regex==2023.3.23 - - requests==2.29.0 - - requests-oauthlib==1.3.1 - - rfc3339-validator==0.1.4 - - rfc3986-validator==0.1.1 - - rsa==4.9 - - scikit-image==0.20.0 - - scikit-learn==0.24.2 - - scipy==1.10.1 - - send2trash==1.8.2 - - smac==1.2 - - smmap==5.0.0 - - sniffio==1.3.0 - - sortedcontainers==2.4.0 - - soupsieve==2.4.1 - - sqlalchemy==2.0.11 - - sqlparse==0.4.4 - - statsmodels==0.13.5 - - tabulate==0.9.0 - - tblib==1.7.0 - - tenacity==8.2.2 - - tensorboard==2.12.2 - - tensorboard-data-server==0.7.0 - - tensorboard-plugin-wit==1.8.1 - - tensorflow-estimator==2.12.0 - - tensorflow-macos==2.12.0 - - tensorflow-metal==0.8.0 - - termcolor==2.3.0 - - terminado==0.17.1 - - threadpoolctl==3.1.0 - - tifffile==2023.4.12 - - tinycss2==1.2.1 - - tokenizers==0.13.3 - - toml==0.10.2 - - toolz==0.12.0 - - tornado==6.3.1 - - tqdm==4.65.0 - - transformers==4.28.1 - - tzdata==2023.3 - - umap-learn==0.5.3 - - uri-template==1.2.0 - - urllib3==1.26.15 - - webcolors==1.13 - - webencodings==0.5.1 - - websocket-client==1.5.1 - - werkzeug==2.3.2 - - wrapt==1.14.1 - - zict==3.0.0 -prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03 + - requests=2.32.3 + - rsa=4.9 + - scikit-learn=1.6.1 + - scipy=1.15.1 + - setuptools=75.8.0 + - six=1.17.0 + - smmap=5.0.0 + - snappy=1.2.1 + - sqlalchemy=2.0.37 + - sqlparse=0.5.3 + - threadpoolctl=3.5.0 + - tk=8.6.13 + - tqdm=4.67.1 + - typing-extensions=4.12.2 + - typing_extensions=4.12.2 + - tzdata=2025a + - unicodedata2=16.0.0 + - urllib3=2.3.0 + - websocket-client=1.8.0 + - werkzeug=3.1.3 + - wheel=0.45.1 + - wrapt=1.17.2 + - xorg-libxau=1.0.12 + - xorg-libxdmcp=1.1.5 + - xz=5.6.4 + - xz-gpl-tools=5.6.4 + - xz-tools=5.6.4 + - yaml=0.2.5 + - yarl=1.18.3 + - zipp=3.21.0 + - zstandard=0.23.0 + - zstd=1.5.6 +prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix From 6e60c8a71fcc91a20d8cb528e9c993844941fac1 Mon Sep 17 00:00:00 2001 From: andymc629 Date: Fri, 7 Feb 2025 22:16:02 +0000 Subject: [PATCH 5/6] Now have the autosklearn example work. --- Chapter03/automl/Dockerfile | 39 +++++++++++++++++++++ Chapter03/automl/README.md | 10 ++++++ Chapter03/automl/autosklearn_example.py | 7 +++- Chapter03/automl/run_autosklearn_example.sh | 2 ++ Chapter03/mlewp-chapter03.yml | 2 +- 5 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 Chapter03/automl/Dockerfile create mode 100644 Chapter03/automl/README.md create mode 100644 Chapter03/automl/run_autosklearn_example.sh diff --git a/Chapter03/automl/Dockerfile b/Chapter03/automl/Dockerfile new file mode 100644 index 0000000..741bca3 --- /dev/null +++ b/Chapter03/automl/Dockerfile @@ -0,0 +1,39 @@ +FROM ubuntu:20.04 + +# install linux packages +RUN apt-get update + +# Set the locale +# workaround for https://github.com/automl/auto-sklearn/issues/867 +RUN apt-get -y install locales +RUN touch /usr/share/locale/locale.alias +RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && locale-gen +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 + +# set environment variables to only use one core +RUN export OPENBLAS_NUM_THREADS=1 +RUN export MKL_NUM_THREADS=1 +RUN export BLAS_NUM_THREADS=1 +RUN export OMP_NUM_THREADS=1 + +# install build requirements +RUN apt install -y python3-dev python3-pip +RUN pip3 install --upgrade setuptools +RUN apt install -y build-essential + +RUN apt install -y swig + +# Copy the checkout autosklearn version for installation +#ADD . /auto-sklearn/ + +# Upgrade pip then install dependencies +RUN pip3 install --upgrade pip + +# Install +RUN pip3 install "auto-sklearn[test, examples]" + +COPY autosklearn_example.py autosklearn_example.py + +CMD ["python3", "autosklearn_example.py"] \ No newline at end of file diff --git a/Chapter03/automl/README.md b/Chapter03/automl/README.md new file mode 100644 index 0000000..ff54441 --- /dev/null +++ b/Chapter03/automl/README.md @@ -0,0 +1,10 @@ +# Autosklearn example +There are known issues around installing auto-sklearn on MacOS and Windows systems so I have set this up to run in a docker container. + +To run this example just run the following: + +```bash +docker build -t autosklearn . +docker run autosklearn +``` + diff --git a/Chapter03/automl/autosklearn_example.py b/Chapter03/automl/autosklearn_example.py index 2e7c577..8a00fe3 100644 --- a/Chapter03/automl/autosklearn_example.py +++ b/Chapter03/automl/autosklearn_example.py @@ -2,15 +2,20 @@ import sklearn.datasets import sklearn.metrics import autosklearn.classification +from sklearn.datasets import load_wine +from sklearn.model_selection import train_test_split automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=60, per_run_time_limit=30 ) +X, y = load_wine(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) + automl.fit(X_train, y_train, dataset_name='wine') print(automl.show_models()) print(automl.sprint_statistics()) predictions = automl.predict(X_test) -sklearn.metrics.accuracy_score(y_test, predictions) +print(sklearn.metrics.accuracy_score(y_test, predictions)) \ No newline at end of file diff --git a/Chapter03/automl/run_autosklearn_example.sh b/Chapter03/automl/run_autosklearn_example.sh new file mode 100644 index 0000000..5509468 --- /dev/null +++ b/Chapter03/automl/run_autosklearn_example.sh @@ -0,0 +1,2 @@ +docker build -t autosklearn_image . +docker run -it autosklearn_image \ No newline at end of file diff --git a/Chapter03/mlewp-chapter03.yml b/Chapter03/mlewp-chapter03.yml index d9541a3..3f5602f 100644 --- a/Chapter03/mlewp-chapter03.yml +++ b/Chapter03/mlewp-chapter03.yml @@ -1,4 +1,4 @@ -name: mlewp-chapter03-hotfix +name: mlewp-chapter03 channels: - conda-forge dependencies: From 8c8ab830aea0f88374af4d277bdfadb7402ad895 Mon Sep 17 00:00:00 2001 From: andymc629 Date: Fri, 7 Feb 2025 22:18:18 +0000 Subject: [PATCH 6/6] Readme updates. --- Chapter03/automl/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Chapter03/automl/README.md b/Chapter03/automl/README.md index ff54441..c9337a3 100644 --- a/Chapter03/automl/README.md +++ b/Chapter03/automl/README.md @@ -1,7 +1,7 @@ # Autosklearn example There are known issues around installing auto-sklearn on MacOS and Windows systems so I have set this up to run in a docker container. -To run this example just run the following: +To run this example just run the following (this assumes you have already run ```conda env create -f mlewp-chapter03.yml```): ```bash docker build -t autosklearn .