diff --git a/.binder/postBuild b/.binder/postBuild index 93c70aac..fae5b164 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -14,4 +14,4 @@ mv docs/build/html/jupyter_notebooks . shopt -s extglob rm -rf .[!.]* rm -rf !(jupyter_notebooks|docs) -(cd docs && rm -rf !(src)) \ No newline at end of file +(cd docs && rm -rf !(src)) diff --git a/.binder/requirements.txt b/.binder/requirements.txt index 9149df9c..3c8d7e78 100644 --- a/.binder/requirements.txt +++ b/.binder/requirements.txt @@ -1 +1 @@ --r ../requirements.txt \ No newline at end of file +-r ../requirements.txt diff --git a/.binder/runtime.txt b/.binder/runtime.txt index 032aea2d..8fdd9071 100644 --- a/.binder/runtime.txt +++ b/.binder/runtime.txt @@ -1 +1 @@ -python-3.9 \ No newline at end of file +python-3.9 diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index c4fbd977..522266b1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -12,7 +12,7 @@ A clear and concise description of what the bug is. *NB:* for questions about pydeseq2 that are not related to a bug, please open a topic on the [scverse ecosystem Discourse forum](https://discourse.scverse.org/c/ecosystem/38). **To Reproduce** -Provide snippets of code and steps on how to reproduce the behavior. +Provide snippets of code and steps on how to reproduce the behavior. Please also specify the version you are using. **Expected behavior** diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 03faba52..6222fe20 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -14,4 +14,3 @@ https://github.com/blog/1506-closing-issues-via-pull-requests #### What does your PR implement? Be specific. - diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml index 79f2ef8b..178fa458 100644 --- a/.github/workflows/pr_validation.yml +++ b/.github/workflows/pr_validation.yml @@ -2,6 +2,7 @@ name: pr-validation # Controls when the workflow will run on: + workflow_dispatch: push: branches: - '**' @@ -18,31 +19,41 @@ jobs: matrix: include: - os: ubuntu-latest - python: "3.10" + python: "3.11" - os: ubuntu-latest - python: "3.10" - pip-flags: "--pre" + python: "3.12" - os: ubuntu-latest - python: "3.11" + python: "3.13" - os: ubuntu-latest - python: "3.11" + python: "3.13" pip-flags: "--pre" steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} + - name: Install the latest version of uv + uses: astral-sh/setup-uv@v6 + with: + version: "latest" - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install -e ."[dev]" - pip install -r docs/requirements.txt + uv sync --extra doc --extra dev + - name: Lint with ruff + run: | + uv run ruff check --fix --exit-non-zero-on-fix pydeseq2 + - name: Format with ruff + run: | + uv run ruff format --check pydeseq2 + - name: Type check with mypy + run: | + uv run mypy -p pydeseq2 - name: Test with pytest run: | - coverage run -m pytest + uv run coverage run -m pytest - name: Generate code coverage report run: | - coverage html + uv run coverage html - name: Upload coverage artifacts uses: actions/upload-artifact@v4 with: @@ -51,5 +62,9 @@ jobs: retention-days: 20 - name: Compile docs run: | + uv pip freeze > requirements.txt cd docs - make clean html + uv run make clean html + - name: Build package + run: | + uv build diff --git a/.gitignore b/.gitignore index c3b28032..c6f9823a 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +.ruff_cache/ # Translations *.mo @@ -133,3 +134,12 @@ dmypy.json # IDEs .vscode + +# Docs files +output_files/ +docs/source/auto_examples/ +docs/source/sg_execution_times.rst + +# Requirement files +uv.lock +requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 64895ad0..1af241d5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,18 +1,35 @@ +fail_fast: false +default_language_version: + python: python3 +default_stages: + - pre-commit + - pre-push +minimum_pre_commit_version: 2.16.0 repos: -- repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.9.0 - hooks: - - id: black - additional_dependencies: ['click==8.0.4'] - args: # arguments to configure black - - --line-length=89 -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.0 - hooks: - - id: ruff - args: [ --fix, --exit-non-zero-on-fix] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: detect-private-key + - id: check-ast + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=lf] + - id: check-added-large-files + stages: [pre-commit] + args: ["--maxkb=50000"] + - id: trailing-whitespace + - id: check-case-conflict +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.0 + hooks: + - id: ruff-check + types_or: [python, pyi, jupyter] + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + types_or: [python, pyi, jupyter] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.18.2 hooks: - id: mypy - exclude: ^(tests/|docs/source/conf.py) \ No newline at end of file + pass_filenames: false + args: [-p, pydeseq2, --ignore-missing-imports] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ebd2fbcf..20504921 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,4 +11,4 @@ Code of Conduct --------------- `PyDeseq2` follows the code of conduct defined by -the Python Software Foundation: https://www.python.org/psf/codeofconduct/. \ No newline at end of file +the Python Software Foundation: https://www.python.org/psf/codeofconduct/. diff --git a/README.md b/README.md index 9f1b45b3..c487c7e9 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,14 @@ [![condaDownloads](https://img.shields.io/conda/dn/bioconda/pydeseq2?logo=Anaconda)](https://anaconda.org/bioconda/pydeseq2) [![license](https://img.shields.io/pypi/l/pydeseq2)](LICENSE) -PyDESeq2 is a python implementation of the [DESeq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) +PyDESeq2 is a python implementation of the [DESeq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) method [1] for differential expression analysis (DEA) with bulk RNA-seq data, originally in R. It aims to facilitate DEA experiments for python users. -As PyDESeq2 is a re-implementation of [DESeq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) from +As PyDESeq2 is a re-implementation of [DESeq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) from scratch, you may experience some differences in terms of retrieved values or available features. -Currently, available features broadly correspond to the default settings of DESeq2 (v1.34.0) for single-factor and +Currently, available features broadly correspond to the default settings of DESeq2 (v1.34.0) for single-factor and multi-factor analysis (with categorical or continuous factors) using Wald tests. We plan to implement more in the future. In case there is a feature you would particularly like to be implemented, feel free to open an issue. @@ -40,17 +40,25 @@ In case there is a feature you would particularly like to be implemented, feel f `PyDESeq2` can be installed from PyPI using `pip`: -`pip install pydeseq2` +```bash +pip install pydeseq2 +``` We recommend installing within a conda environment: -``` +```bash conda create -n pydeseq2 conda activate pydeseq2 conda install pip pip install pydeseq2 ``` +You can also add it to your projects through `uv`: + +```bash +uv add pydeseq2 +``` + ### Bioconda `PyDESeq2` can also be installed from Bioconda with `conda`: @@ -61,16 +69,20 @@ If you're interested in contributing or want access to the development version, ### Requirements -The list of package version requirements is available in `setup.py`. +The list of package version requirements is available in `pyproject.toml`. For reference, the code is being tested in a github workflow (CI) with python -3.10 to 3.11 and the following package versions: +3.11 to 3.13 and the latest versions of the following packages: + ``` -- anndata 0.8.0 -- numpy 1.23.0 -- pandas 1.4.3 -- scikit-learn 1.1.1 -- scipy 1.11.0 +- anndata +- formulaic +- numpy +- pandas +- scikit-learn +- scipy +- formulaic-contrasts +- matplotlib ``` Please don't hesitate to open an issue in case you encounter any issue due to possible deprecations. @@ -84,7 +96,7 @@ contains downloadable examples on how to use PyDESeq2. ### Documentation -The documentation is hosted [here on ReadTheDocs](https://pydeseq2.readthedocs.io/en/latest/). +The documentation is hosted [here on ReadTheDocs](https://pydeseq2.readthedocs.io/en/latest/). If you want to have the latest version of the documentation, you can build it from source. Please go to the dedicated [README.md](https://github.com/owkin/PyDESeq2/blob/main/docs/README.md) for information on how to do so. @@ -105,12 +117,12 @@ documentation to see how you can contribute to PyDESeq2. `git clone https://github.com/owkin/PyDESeq2.git` -### 2 - Create a conda environment +### 2 - Create a uv environment -Run `conda create -n pydeseq2 python=3.10` (or higher python version) to create the `pydeseq2` environment and then activate it: -`conda activate pydeseq2`. +Run `uv venv --python 3.13` (or higher python version) to create the `pydeseq2` environment and then activate it: +`source .venv/bin/activate`. -`cd` to the root of the repo and run `pip install -e ."[dev]"` to install in developer mode. +`cd` to the root of the repo and run `uv sync --extra dev --extra doc` to install in developer mode. Then, run `pre-commit install`. @@ -145,4 +157,3 @@ PyDESeq2 is a living project and any contributions are welcome! Feel free to ope ## License PyDESeq2 is released under an [MIT license](https://github.com/owkin/PyDESeq2/blob/main/LICENSE). - diff --git a/datasets/README.md b/datasets/README.md index bbff6fb8..a4afee51 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -3,9 +3,9 @@ This directory stores example data which can be retrieved using the `load_example_data` function from `pydeseq2.utils`. Only synthetic data is provided for now, but new datasets might be available in the future. -The `tests_clinical.csv` and `test_counts.csv` files were generated using a custom modification of the -`makeExampleDESeqDataSet` method of [DESeq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html), -to handle several factors. +The `tests_clinical.csv` and `test_counts.csv` files were generated using a custom modification of the +`makeExampleDESeqDataSet` method of [DESeq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html), +to handle several factors. ## Folder organisation @@ -15,7 +15,7 @@ PyDESeq2 │ └───datasets │ - └───synthetic + └───synthetic │ test_clinical.csv │ test_counts.csv ``` diff --git a/docs/Makefile b/docs/Makefile index 113942a4..284fa60b 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -23,4 +23,3 @@ help: livehtml: sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html - diff --git a/docs/README.md b/docs/README.md index 67eb4932..86ec182f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ # Documentation -The documentation of PyDESeq2 is generated using Sphinx and hosted on ReadTheDoc. +The documentation of PyDESeq2 is generated using Sphinx and hosted on ReadTheDocs. If you want to build the documentation from source, start by cloning the repository, if you have not done it yet. ```bash @@ -8,22 +8,18 @@ git clone https://github.com/owkin/PyDESeq2.git cd PyDESeq2 ``` -We recommend installing within a conda environment, using the `environment.yaml` provided with the sources.: +We recommend installing with `uv`: ```bash -conda env create -f environment.yml -conda activate pydeseq2 +uv venv +source .venv/bin/activate +uv sync --extra dev --extra doc +uv pip freeze > requirements.txt # Required for Binder to work ``` -Now go to the doc folder, and install the extraneeded dependencies. +You can now go to the doc folder and trigger the build using `make` command line. ```bash cd docs -pip install -r requirements.txt -``` - -You can now trigger the build using `make` command line. - -```bash make clean html -``` \ No newline at end of file +``` diff --git a/docs/make.bat b/docs/make.bat index 747ffb7b..dc1312ab 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,35 +1,35 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100755 index 521c6ba5..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -jupyter -sphinx~=6.0.0 -sphinx_rtd_theme~=2.0.0 -sphinx-autobuild==2020.9.1 -texttable==1.6.3 -myst-parser~=2.0.0 -# Sphinx 3.3.1 does not require a specific version of docutils -# but docutils 0.17 changed the output html markup, breaking the RTD theme -# original issue: https://github.com/sphinx-doc/sphinx/issues/9051 -docutils~=0.18.0 -sphinx_click==3.1.0 -sphinx_gallery==0.11.1 -sphinx_autodoc_typehints -sphinxcontrib-bibtex==2.5.0 -gitpython>=3.1.27 -ipython diff --git a/docs/source/api/docstrings/pydeseq2.dds.DeseqDataSet.rst b/docs/source/api/docstrings/pydeseq2.dds.DeseqDataSet.rst index c2df993c..d4fd1069 100644 --- a/docs/source/api/docstrings/pydeseq2.dds.DeseqDataSet.rst +++ b/docs/source/api/docstrings/pydeseq2.dds.DeseqDataSet.rst @@ -8,7 +8,7 @@ .. rubric:: Methods .. autosummary:: - + ~DeseqDataSet.calculate_cooks ~DeseqDataSet.cond ~DeseqDataSet.deseq2 @@ -22,4 +22,3 @@ ~DeseqDataSet.refit ~DeseqDataSet.to_picklable_anndata ~DeseqDataSet.vst - diff --git a/docs/source/api/docstrings/pydeseq2.ds.DeseqStats.rst b/docs/source/api/docstrings/pydeseq2.ds.DeseqStats.rst index 22275d83..f9564a98 100644 --- a/docs/source/api/docstrings/pydeseq2.ds.DeseqStats.rst +++ b/docs/source/api/docstrings/pydeseq2.ds.DeseqStats.rst @@ -8,13 +8,7 @@ .. rubric:: Methods .. autosummary:: - + ~DeseqStats.lfc_shrink ~DeseqStats.run_wald_test ~DeseqStats.summary - - - - - - \ No newline at end of file diff --git a/docs/source/api/docstrings/pydeseq2.grid_search.rst b/docs/source/api/docstrings/pydeseq2.grid_search.rst index 57f79882..5feec552 100644 --- a/docs/source/api/docstrings/pydeseq2.grid_search.rst +++ b/docs/source/api/docstrings/pydeseq2.grid_search.rst @@ -3,30 +3,17 @@ .. automodule:: pydeseq2.grid_search - - - - - + + + + + .. rubric:: Functions .. autosummary:: - + grid_fit_alpha grid_fit_beta grid_fit_shrink_beta vec_nb_nll - - - - - - - - - - - - - diff --git a/docs/source/api/docstrings/pydeseq2.preprocessing.rst b/docs/source/api/docstrings/pydeseq2.preprocessing.rst index ffade48f..c42387ae 100644 --- a/docs/source/api/docstrings/pydeseq2.preprocessing.rst +++ b/docs/source/api/docstrings/pydeseq2.preprocessing.rst @@ -3,27 +3,14 @@ .. automodule:: pydeseq2.preprocessing - - - - - - .. rubric:: Functions - .. autosummary:: - - deseq2_norm - - - - - - - - + .. rubric:: Functions + .. autosummary:: + + deseq2_norm diff --git a/docs/source/api/docstrings/pydeseq2.utils.rst b/docs/source/api/docstrings/pydeseq2.utils.rst index 0016b550..7e1fac29 100644 --- a/docs/source/api/docstrings/pydeseq2.utils.rst +++ b/docs/source/api/docstrings/pydeseq2.utils.rst @@ -3,16 +3,16 @@ .. automodule:: pydeseq2.utils - - - - - + + + + + .. rubric:: Functions .. autosummary:: - + dispersion_trend dnb_nll fit_alpha_mle @@ -32,16 +32,3 @@ trimmed_mean trimmed_variance wald_test - - - - - - - - - - - - - diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index edbd320b..cd309aab 100644 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -16,4 +16,3 @@ PyDESeq2 ~utils ~grid_search ~preprocessing - diff --git a/docs/source/conf.py b/docs/source/conf.py index d74ce929..f31fb9af 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,7 +18,6 @@ import pydeseq2 - # -- Project information ----------------------------------------------------- project = "PyDESeq2" @@ -50,6 +49,7 @@ "sphinx_gallery.gen_gallery", "sphinx_autodoc_typehints", "sphinxcontrib.bibtex", + "sphinxcontrib.googleanalytics", ] @@ -62,6 +62,9 @@ autosectionlabel_prefix_document = True +## Google Analytics +googleanalytics_id = "UA-83738774-2" + # autodoc settings autodoc_default_options = { "show-inheritance": False, @@ -155,9 +158,7 @@ # documentation. # html_theme_options = { - "analytics_id": "UA-83738774-2", "logo_only": True, - "display_version": True, } # Add any paths that contain custom static files (such as style sheets) here, @@ -262,7 +263,7 @@ "branch": current_commit, # Can be any branch, tag, or commit hash. # Use a branch that hosts your docs. "binderhub_url": "https://mybinder.org", # public binderhub url - "dependencies": str(Path(__file__).parents[2] / "environment.yml"), + "dependencies": str(Path(__file__).parents[2] / "requirements.txt"), "notebooks_dir": "jupyter_notebooks", "use_jupyter_lab": True, }, diff --git a/docs/source/index.rst b/docs/source/index.rst index 4de749ec..0bfeb4d3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -63,9 +63,3 @@ PyDESeq2 is released under an `MIT license /pydeseq2.git cd pydeseq2 -We recommend using conda environment, run: +We recommend using `uv` with a virtual environment, run: .. code-block:: bash - conda env create -n pydeseq2-dev python=3.10 - conda activate pydeseq2-dev + uv venv --python 3.13 # or higher + source venv/bin/activate and then install ``pydeseq2`` in the development mode. This will also install all the required dependencies. .. code-block:: bash - pip install -e ."[dev]" - pre-commit install + uv sync --extra dev --extra doc -The pre-commit tool will automatically run ``black`` and ``isort``, and check ``flake8`` -compatibility +The pre-commit tool will automatically run ``ruff`` and ``isort``, and check ``mypy`` +type compatibility 3. Add the upstream remote: @@ -147,19 +146,22 @@ Improving the documentation If you wish to contribute to the documentation you need to follow the same guidelines as for the code PR (:ref:`code_contrib`) and additionally install the dependencies required for building the documentation. -Once you have your environment for development ready, navigate to docs directory -and install required libraries +Once you have your environment for development ready, make sure that you installed +the ``dev`` and ``doc`` extra dependencies (see step 2 of :ref:`code_contrib`). +For the documentation to work, you need to export the requirements to about +``requirements.txt`` by running: .. code-block:: bash - cd docs - pip install -r requirements.txt + uv sync --extra dev --extra doc + uv pip freeze > requirements.txt After you make the changes in the documentation you can check if it builds correctly by running (in the docs directory): .. code-block:: bash + cd docs make clean html If the build was correct you can now view the new document in the diff --git a/docs/source/usage/references.rst b/docs/source/usage/references.rst index 0bd8d24b..d3144ec0 100644 --- a/docs/source/usage/references.rst +++ b/docs/source/usage/references.rst @@ -2,4 +2,4 @@ References ---------- .. bibliography:: - :all: \ No newline at end of file + :all: diff --git a/docs/source/usage/requirements.rst b/docs/source/usage/requirements.rst index 94a8a7a0..b4bbe984 100644 --- a/docs/source/usage/requirements.rst +++ b/docs/source/usage/requirements.rst @@ -2,16 +2,16 @@ Requirements ------------ The list of package version requirements is available in ``setup.py``, and will be automatically installed along PyDESeq2 when using PyPI. -For reference, the code was tested with python 3.10 and the following package versions:: +For reference, the code was tested with python 3.11 and the following package versions:: - - anndata >= 0.8.0 - - formulaic >= 1.0.2 - - numpy >= 1.23.0 - - pandas >= 1.4.0 - - scikit-learn >= 1.1.0 - - scipy >= 1.11.0 - - formulaic-contrasts >= 0.2.0 - - matplotlib >= 3.6.2 + - anndata>=0.11.0 + - formulaic>=1.0.2 + - numpy>=2.0.0 + - pandas>=2.2.0 + - scikit-learn>=1.4.0 + - scipy>=1.12.0 + - formulaic-contrasts>=0.2.0 + - matplotlib>=3.9.0 -Note that starting from pydeseq2 0.5.0, python 3.9 is no longer supported. -Please don't hesitate to open an issue in case you encounter any problems due to possible deprecations. \ No newline at end of file +Note that starting from pydeseq2 0.5.3, python 3.10 is no longer supported. +Please don't hesitate to open an issue in case you encounter any problems due to possible deprecations. diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 7d879ad8..00000000 --- a/environment.yml +++ /dev/null @@ -1,6 +0,0 @@ -name: pydeseq2 -dependencies: - - python=3.10 - - pip - - pip: - - -e . diff --git a/pydeseq2/__version__.py b/pydeseq2/__version__.py index 72251527..467b7113 100644 --- a/pydeseq2/__version__.py +++ b/pydeseq2/__version__.py @@ -1 +1,3 @@ -__version__ = "0.5.2" +from importlib.metadata import version + +__version__ = version("pydeseq2") diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index cfff35a3..58d1f1c1 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -2,11 +2,12 @@ import time import warnings from typing import Literal +from typing import cast import anndata as ad # type: ignore import numpy as np import pandas as pd -from formulaic_contrasts import FormulaicContrasts +from formulaic_contrasts import FormulaicContrasts # type: ignore[import-untyped] from scipy.optimize import minimize from scipy.special import polygamma # type: ignore from scipy.stats import f # type: ignore @@ -285,7 +286,7 @@ def __init__( self.design = "~" + " + ".join(design_factors) if not ( - isinstance(self.design, (str, pd.DataFrame)) or isinstance(self.design, str) + isinstance(self.design, (str | pd.DataFrame)) or isinstance(self.design, str) ): raise ValueError( "design must be a string representing a formulaic formula," @@ -316,8 +317,8 @@ def __init__( self.low_memory = low_memory self.size_factors_fit_type = size_factors_fit_type self.control_genes = control_genes - self.logmeans = None - self.filtered_genes = None + self.logmeans: np.ndarray | None = None + self.filtered_genes: np.ndarray | None = None if inference: if n_cpus: @@ -475,8 +476,12 @@ def vst_transform(self, counts: np.ndarray | None = None) -> np.ndarray: stacklevel=2, ) logmeans, filtered_genes = deseq2_norm_fit(counts) - else: + elif self.filtered_genes is not None: logmeans, filtered_genes = self.logmeans, self.filtered_genes + else: + raise RuntimeError( + "Logmeans is set but filtered_genes is None. This should not happen." + ) normed_counts, _ = deseq2_norm_transform(counts, logmeans, filtered_genes) @@ -684,14 +689,20 @@ def sizeFactor(x): ) self._fit_iterate_size_factors() - else: - self.logmeans, self.filtered_genes = deseq2_norm_fit(self.X) + elif self.X is not None: + self.logmeans, self.filtered_genes = deseq2_norm_fit( + self.X.toarray() if not isinstance(self.X, np.ndarray) else self.X + ) _control_mask &= self.filtered_genes ( self.layers["normed_counts"], self.obs["size_factors"], - ) = deseq2_norm_transform(self.X, self.logmeans, _control_mask) + ) = deseq2_norm_transform( + self.X, cast(np.ndarray, self.logmeans), _control_mask + ) + else: + raise ValueError("Counts matrix 'X' is None, cannot fit size factors.") end = time.time() self.var["_normed_means"] = self.layers["normed_counts"].mean(0) @@ -1220,8 +1231,8 @@ def _fit_parametric_dispersion_trend(self, vst: bool = False): covariates.drop(labels=[gene], inplace=True) # Initialize coefficients - old_coeffs = pd.Series([0.1, 0.1]) - coeffs = pd.Series([1.0, 1.0]) + old_coeffs: np.ndarray | pd.Series = pd.Series([0.1, 0.1]) + coeffs: np.ndarray | pd.Series = pd.Series([1.0, 1.0]) while (coeffs > 1e-10).all() and ( np.log(np.abs(coeffs / old_coeffs)) ** 2 ).sum() >= 1e-6: @@ -1229,7 +1240,6 @@ def _fit_parametric_dispersion_trend(self, vst: bool = False): coeffs, predictions, converged = self.inference.dispersion_trend_gamma_glm( covariates, targets ) - if not converged or (coeffs <= 1e-10).any(): warnings.warn( "The dispersion trend curve fitting did not converge. " @@ -1320,10 +1330,13 @@ def _replace_outliers(self) -> None: self.counts_to_refit = self[:, self.var["replaced"]].copy() trim_base_mean = pd.DataFrame( - trimmed_mean( - self.counts_to_refit.X / self.obs["size_factors"].values[:, None], - trim=0.2, - axis=0, + np.asarray( + trimmed_mean( + self.counts_to_refit.X + / self.obs["size_factors"].values[:, None], + trim=0.2, + axis=0, + ) ), index=self.counts_to_refit.var_names, ) @@ -1348,9 +1361,9 @@ def _refit_without_outliers( self, ) -> None: """Re-run the whole DESeq2 pipeline with replaced outliers.""" - assert ( - self.refit_cooks - ), "Trying to refit Cooks outliers but the 'refit_cooks' flag is set to False" + assert self.refit_cooks, ( + "Trying to refit Cooks outliers but the 'refit_cooks' flag is set to False" + ) # Check that _replace_outliers() was previously run. if "replaced" not in self.var: diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py index 3419607f..14491567 100644 --- a/pydeseq2/ds.py +++ b/pydeseq2/ds.py @@ -144,9 +144,9 @@ def __init__( quiet: bool = False, n_cpus: int | None = None, ) -> None: - assert ( - "LFC" in dds.varm - ), "Please provide a fitted DeseqDataSet by first running the `deseq2` method." + assert "LFC" in dds.varm, ( + "Please provide a fitted DeseqDataSet by first running the `deseq2` method." + ) self.dds = dds @@ -170,6 +170,7 @@ def __init__( self.LFC = self.dds.varm["LFC"].copy() # Check the validity of the contrast (if provided) or build it. + self.contrast: list[str] | np.ndarray if contrast is None: raise ValueError( """Default contrasts are no longer supported. @@ -510,7 +511,7 @@ def _independent_filtering(self) -> None: U2 = self.p_values[use] if not U2.empty: result.loc[use, i] = false_discovery_control(U2, method="bh") - num_rej = (result < self.alpha).sum(0).values + num_rej = (result < self.alpha).sum(0).to_numpy().astype(int) lowess_res = lowess(theta, num_rej, frac=1 / 5) if num_rej.max() <= 10: @@ -584,7 +585,7 @@ def objective(a: float) -> float: if objective(min_var) < 0: return min_var else: - return root_scalar(objective, bracket=[min_var, max_var]).root + return root_scalar(objective, bracket=(min_var, max_var)).root def _build_contrast_vector(self) -> None: """ diff --git a/pydeseq2/grid_search.py b/pydeseq2/grid_search.py index a8f1e77b..a172ce96 100644 --- a/pydeseq2/grid_search.py +++ b/pydeseq2/grid_search.py @@ -4,7 +4,9 @@ import pydeseq2.utils -def vec_nb_nll(counts: np.ndarray, mu: np.ndarray, alpha: np.ndarray) -> np.ndarray: +def vec_nb_nll( + counts: np.ndarray, mu: np.ndarray, alpha: np.ndarray | float +) -> np.ndarray: r"""Return the negative log-likelihood of a negative binomial. Vectorized version. @@ -17,7 +19,7 @@ def vec_nb_nll(counts: np.ndarray, mu: np.ndarray, alpha: np.ndarray) -> np.ndar mu : ndarray Mean of the distribution. - alpha : ndarray + alpha : ndarray or float Dispersion of the distribution, s.t. the variance is :math:`\mu + \alpha \mu^2`. diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 4ba81352..1e3bb23f 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -61,9 +61,9 @@ def load_example_data( "metadata", ], "The modality argument must be one of the following: raw_counts, metadata" - assert dataset in [ - "synthetic" - ], "The dataset argument must be one of the following: synthetic." + assert dataset in ["synthetic"], ( + "The dataset argument must be one of the following: synthetic." + ) # Load data datasets_path = Path(pydeseq2.__file__).parent.parent / "datasets" @@ -120,9 +120,7 @@ def test_valid_counts(counts: pd.DataFrame | np.ndarray) -> None: if isinstance(counts, pd.DataFrame): if counts.isna().any().any(): raise ValueError("NaNs are not allowed in the count matrix.") - if ~counts.apply( - lambda s: pd.to_numeric(s, errors="coerce").notnull().all() - ).all(): + if not np.issubdtype(counts.to_numpy().dtype, np.number): raise ValueError("The count matrix should only contain numbers.") else: if np.isnan(counts).any().any(): @@ -419,7 +417,7 @@ def df(beta: np.ndarray) -> np.ndarray: # Compute deviation old_dev = dev # Replaced deviation with -2 * nll, as in the R code - dev = -2 * nb_nll(counts, mu, disp) + dev = -2 * cast(float, nb_nll(counts, mu, disp)) dev_ratio = np.abs(dev - old_dev) / (np.abs(dev) + 0.1) # Compute H diagonal (useful for Cook distance outlier filtering) @@ -502,9 +500,9 @@ def fit_alpha_mle( if prior_reg: # Note: assertion is not working when using numpy - assert ( - prior_disp_var is not None - ), "Sigma_prior is required for prior regularization" + assert prior_disp_var is not None, ( + "Sigma_prior is required for prior regularization" + ) log_alpha_hat = np.log(alpha_hat) @@ -519,7 +517,7 @@ def loss(log_alpha: float) -> float: if prior_disp_var is None: raise ValueError("Sigma_prior is required for prior regularization") reg += (log_alpha - log_alpha_hat) ** 2 / (2 * prior_disp_var) - return nb_nll(counts, mu, alpha) + reg + return cast(float, nb_nll(counts, mu, alpha)) + reg def dloss(log_alpha: float) -> float: # gradient closure @@ -547,8 +545,8 @@ def dloss(log_alpha: float) -> float: res = minimize( lambda x: loss(x[0]), - x0=np.log(alpha_hat), - jac=lambda x: dloss(x[0]), + x0=np.asarray([np.log(alpha_hat)]), + jac=lambda x: np.asarray([dloss(x[0])]), method=optimizer, bounds=( [(np.log(min_disp), np.log(max_disp))] if optimizer == "L-BFGS-B" else None @@ -589,7 +587,7 @@ def trimmed_mean(x: np.ndarray, trim: float = 0.1, **kwargs) -> float | np.ndarr """ assert trim <= 0.5 if "axis" in kwargs: - axis = kwargs.get("axis") + axis = kwargs["axis"] s = np.sort(x, axis=axis) n = x.shape[axis] ntrim = floor(n * trim) @@ -799,7 +797,7 @@ def less_abs(lfc_null): wald_statistic: float wald_p_value: float - if alt_hypothesis: + if alt_hypothesis is not None: wald_statistic, wald_p_value = { "greaterAbs": greater_abs(lfc_null), "lessAbs": less_abs(lfc_null), @@ -807,7 +805,7 @@ def less_abs(lfc_null): "less": less(lfc_null), }[alt_hypothesis] else: - wald_statistic = contrast @ (lfc - lfc_null) / wald_se + wald_statistic = float(contrast @ (lfc - lfc_null) / wald_se) wald_p_value = 2 * norm.sf(np.abs(wald_statistic)) return wald_p_value, wald_statistic, wald_se @@ -941,7 +939,7 @@ def robust_method_of_moments_disp( # 1 - group rows by unique combinations of design factors # 2 - keep only groups with 3 or more replicates # 3 - filter the counts matrix to only keep rows in those groups - filtered_counts = normed_counts[three_or_more.values, :] + filtered_counts = normed_counts[three_or_more.to_numpy(), :] filtered_design = design_matrix.loc[three_or_more, :] cell_id = pd.Series( filtered_design.groupby(filtered_design.columns.values.tolist()).ngroup(), @@ -949,7 +947,9 @@ def robust_method_of_moments_disp( ) v = trimmed_cell_variance(filtered_counts, cell_id) else: - v = trimmed_variance(normed_counts) + v = cast( + np.ndarray, trimmed_variance(normed_counts) + ) # Since normed_counts is always 2D, trimmed_variance returns ndarray m = normed_counts.mean(0) alpha = (v - m) / m**2 @@ -1080,7 +1080,7 @@ def df(beta: np.ndarray, cnst: float = scale_cnst) -> np.ndarray: # Gradient of the function to optimize xbeta = design_matrix @ beta d_neg_prior = ( - beta * no_shrink_mask / prior_no_shrink_scale**2 + beta * no_shrink_mask / prior_no_shrink_scale** 2 + 2 * beta * shrink_mask / (prior_scale**2 + beta[shrink_index] ** 2) ) @@ -1230,7 +1230,7 @@ def mean_absolute_deviation(x: np.ndarray) -> float: def make_scatter( disps: list, legend_labels: list, - x_val: np.array, + x_val: np.ndarray, log: bool = True, save_path: str | None = None, **kwargs, diff --git a/pyproject.toml b/pyproject.toml index eecde408..3828a74c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,73 @@ -[tool.black] -line-length = 89 +[build-system] +build-backend = "hatchling.build" +requires = [ "hatchling" ] + +[project] +name = "pydeseq2" +version = "0.5.3" +description = "A python implementation of DESeq2." +readme = "README.md" +license = { file = "LICENSE" } +maintainers = [ + { name = "Boris Muzellec", email = "boris.muzellec@owkin.com" }, +] +authors = [ + { name = "Boris Muzellec" }, + { name = "Maria Telenczuk" }, + { name = "Vincent Cabelli" }, + { name = "Mathieu Andreux" }, +] +requires-python = ">=3.11" +classifiers = [ + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "anndata>=0.11.0", + "formulaic>=1.0.2", + "numpy>=2.0.0", + "pandas>=2.2.0", + "scikit-learn>=1.4.0", + "scipy>=1.12.0", + "formulaic-contrasts>=0.2.0", + "matplotlib>=3.9.0", +] +optional-dependencies.dev = [ + "pytest>=8.4.0", + "pre-commit>=2.16.0", + "numpydoc", + "coverage", + "mypy==1.18.2", + "pandas-stubs", + "ruff==0.14.0", + "scipy-stubs", +] +optional-dependencies.doc = [ + "jupyter", + "sphinx>=8.0.0", + "sphinx_rtd_theme", + "sphinx-autobuild", + "texttable", + "myst-parser", + "docutils", + "sphinx_click", + "sphinx_gallery", + "sphinx_autodoc_typehints", + "sphinxcontrib-bibtex", + "ipython", + "gitpython>=3.1.42", + "sphinxcontrib-googleanalytics>=0.5", +] +urls.Documentation = "https://pydeseq2.readthedocs.io/" +urls.Homepage = "https://github.com/owkin/PyDESeq2" +urls.Source = "https://github.com/owkin/PyDESeq2" [tool.ruff] line-length = 89 + +[tool.ruff.lint] select = [ "F", # Errors detected by Pyflakes "E", # Error detected by Pycodestyle @@ -29,13 +94,13 @@ ignore = [ "D213", ] -[tool.ruff.isort] +[tool.ruff.lint.isort] force-single-line = true -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "docs/*" = ["I"] "tests/*" = ["D"] # Ignore errors linked to sphinx gallery syntax @@ -51,4 +116,4 @@ filterwarnings = [ # ignore Pyarrow deprecation warnings '''ignore:\s*A value is trying to be set on a copy of a DataFrame:FutureWarning''', '''ignore:\s*Setting an item of incompatible dtype:FutureWarning''', -] \ No newline at end of file +] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d523cd77..00000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ --r docs/requirements.txt -. \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 1cee3f1f..00000000 --- a/setup.py +++ /dev/null @@ -1,50 +0,0 @@ -"""Packaging settings.""" - -import os -from codecs import open - -from setuptools import find_packages -from setuptools import setup - -here = os.path.abspath(os.path.dirname(__file__)) - - -with open(os.path.join(here, "README.md"), "r", "utf-8") as fp: - readme = fp.read() - -about: dict = {} -with open(os.path.join(here, "pydeseq2", "__version__.py"), "r", "utf-8") as fp: - exec(fp.read(), about) - -setup( - name="pydeseq2", - version=about["__version__"], - python_requires=">=3.10.0", - license="MIT", - description="A python implementation of DESeq2.", - long_description=readme, - long_description_content_type="text/markdown", - author="Boris Muzellec, Maria Telenczuk, Vincent Cabelli and Mathieu Andreux", - author_email="boris.muzellec@owkin.com", - packages=find_packages(exclude=["tests*"]), - install_requires=[ - "anndata>=0.11.0", - "formulaic>=1.0.2", - "numpy>=1.23.0", - "pandas>=1.4.0", - "scikit-learn>=1.1.0", - "scipy>=1.11.0", - "formulaic-contrasts>=0.2.0", - "matplotlib>=3.6.2", # not sure why sphinx_gallery does not work without it - ], # external packages as dependencies - extras_require={ - "dev": [ - "pytest>=6.2.4", - "pre-commit>=2.13.0", - "numpydoc", - "coverage", - "mypy", - "pandas-stubs", - ], - }, -) diff --git a/tests/data/README.md b/tests/data/README.md index 0df31657..b496a61c 100644 --- a/tests/data/README.md +++ b/tests/data/README.md @@ -11,4 +11,4 @@ in `/datasets/synthetic/`, respectively using `~condition` and `~condition + gro - `r_test_res.csv` contains DESeq2's `results` output, - `r_test_size_factors.csv` contains DESeq2's `estimateSizeFactors` output, - `r_vst.csv` contains DESeq2's `varianceStabilizingTransformation` output with `blind=TRUE` and `fitType="parametric"`, -- `r_vst_with_design.csv` contains DESeq2's `varianceStabilizingTransformation` output with `blind=FALSE` and `fitType="parametric"`. \ No newline at end of file +- `r_vst_with_design.csv` contains DESeq2's `varianceStabilizingTransformation` output with `blind=FALSE` and `fitType="parametric"`.