From 229412c2359a20496761fab3035edce554663d52 Mon Sep 17 00:00:00 2001
From: naik-aakash <aakash.naik@bam.de>
Date: Sat, 20 Dec 2025 15:18:33 +0100
Subject: [PATCH 1/4] include charge stats in ChargeFeaturizer

---
 src/lobsterpy/featurize/core.py | 46 +++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py
index 6c55f5e2..e8ed1279 100644
--- a/src/lobsterpy/featurize/core.py
+++ b/src/lobsterpy/featurize/core.py
@@ -954,7 +954,7 @@ def get_summarized_coxx_df(
 )
 class FeaturizeCharges:
     """
-    Class to compute Ionicity from CHARGE.lobster data.
+    Class to compute Ionicity and statistics from CHARGE.lobster data.
 
     :param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`)
     :param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`)
@@ -969,7 +969,7 @@ def __init__(
         charge_type: Literal["mulliken", "loewdin"],
     ):
         """
-        Compute the Ionicity of the structure from CHARGE.lobster data.
+        Compute the Ionicity of the structure and charge statistics from CHARGE.lobster data.
 
         :param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`)
         :param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`)
@@ -980,6 +980,9 @@ def __init__(
         self.path_to_charge = path_to_charge
         self.charge_type = charge_type
 
+        if self.charge_type.lower() not in ["mulliken", "loewdin"]:
+            raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`")
+
     def _calc_ionicity(self) -> float:
         r"""
         Calculate the ionicity of the crystal structure based on quantum chemical charges.
@@ -994,9 +997,6 @@ def _calc_ionicity(self) -> float:
         chargeobj = Charge(filename=self.path_to_charge)
         structure = Structure.from_file(self.path_to_structure)
 
-        if self.charge_type.lower() not in ["mulliken", "loewdin"]:
-            raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`")
-
         ch_veff = []
         tol = 1e-6
         for i, j in enumerate(getattr(chargeobj, self.charge_type.capitalize())):
@@ -1046,16 +1046,37 @@ def _calc_ionicity(self) -> float:
                 ch_veff.append(val)
 
         return sum(ch_veff) / structure.num_sites
+    
+    def _calc_stats(self, ids: str | None = None) -> dict[str, float]:
+        """
+        Calculate standard statistics of the atomic-charges in CHARGE.lobster.
+
+        :param ids: set index name in the pandas dataframe. Default is None.
+            When None, LOBSTER calc directory name is used as index name.
+
+        Returns:
+            A dictionary with charge statistics
+        """
+        
+        chargeobj = Charge(filename=self.path_to_charge)
+        charges = getattr(chargeobj, self.charge_type.capitalize())
+        stats = {
+            f"{self.charge_type.capitalize()}_mean": np.mean(charges),
+            f"{self.charge_type.capitalize()}_min": np.min(charges),
+            f"{self.charge_type.capitalize()}_max": np.max(charges),
+            f"{self.charge_type.capitalize()}_std": np.std(charges),
+        }
+        return stats
 
     def get_df(self, ids: str | None = None) -> pd.DataFrame:
         """
-        Return a pandas dataframe with computed ionicity as columns.
+        Return a pandas dataframe with computed ionicity and charge statistics as columns.
 
         :param ids: set index name in the pandas dataframe. Default is None.
             When None, LOBSTER calc directory name is used as index name.
 
         Returns:
-            Returns a pandas dataframe with ionicity
+            Returns a pandas dataframe with ionicity and charge statistics as columns.
 
         """
         if ids:
@@ -1064,12 +1085,15 @@ def get_df(self, ids: str | None = None) -> pd.DataFrame:
             ids = Path(self.path_to_charge).parent.name
             df = pd.DataFrame(index=[ids])
 
-        if self.charge_type.lower() == "mulliken":
-            df.loc[ids, "Ionicity_Mull"] = self._calc_ionicity()
+        data = self._calc_stats(ids=ids)
+
+        if self.charge_type.lower() == "mulliken":            
+            data["Ionicity_Mull"] = self._calc_ionicity()
         else:
-            df.loc[ids, "Ionicity_Loew"] = self._calc_ionicity()
+            data["Ionicity_Loew"] = self._calc_ionicity()
+        
 
-        return df
+        return pd.DataFrame(index=[ids], data=data)
 
 
 class FeaturizeDoscar:

From b4a5ec0e23512a5d7b35cf79847d0b20c228328b Mon Sep 17 00:00:00 2001
From: naik-aakash <aakash.naik@bam.de>
Date: Sat, 20 Dec 2025 15:18:48 +0100
Subject: [PATCH 2/4] update tests

---
 tests/featurize/test_batch.py | 48 +++++++++++++++++++++++++++++++++++
 tests/featurize/test_core.py  | 26 +++++++++++++++++--
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/tests/featurize/test_batch.py b/tests/featurize/test_batch.py
index 28a30320..03dc1845 100644
--- a/tests/featurize/test_batch.py
+++ b/tests/featurize/test_batch.py
@@ -66,6 +66,14 @@ def test_summary_featurize_with_json(self):
             "edge_COHP",
             "Ionicity_Mull",
             "Ionicity_Loew",
+            "Loewdin_mean",
+            "Loewdin_min",
+            "Loewdin_max",
+            "Loewdin_std",
+            "Mulliken_mean",
+            "Mulliken_min",
+            "Mulliken_max",
+            "Mulliken_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
@@ -120,6 +128,14 @@ def test_summary_featurize_with_no_bonds(self):
             "edge_COHP",
             "Ionicity_Mull",
             "Ionicity_Loew",
+            "Loewdin_mean",
+            "Loewdin_min",
+            "Loewdin_max",
+            "Loewdin_std",
+            "Mulliken_mean",
+            "Mulliken_min",
+            "Mulliken_max",
+            "Mulliken_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
@@ -197,6 +213,14 @@ def test_summary_featurize_orbitalwise(self):
             "edge_COHP",
             "Ionicity_Mull",
             "Ionicity_Loew",
+            "Mulliken_mean", 
+            "Mulliken_min", 
+            "Mulliken_max",
+            "Mulliken_std",
+            "Loewdin_mean",
+            "Loewdin_min",
+            "Loewdin_max",
+            "Loewdin_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
@@ -249,6 +273,14 @@ def test_summary_featurize_without_json(self):
             "edge_COHP",
             "Ionicity_Mull",
             "Ionicity_Loew",
+            "Mulliken_mean",
+            "Mulliken_min",
+            "Mulliken_max",
+            "Mulliken_std",
+            "Loewdin_mean",
+            "Loewdin_min",
+            "Loewdin_max",
+            "Loewdin_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
@@ -321,6 +353,14 @@ def test_summary_featurize_with_json_overall(self):
             "edge_COOP",
             "Ionicity_Mull",
             "Ionicity_Loew",
+            "Loewdin_mean", 
+            "Loewdin_min", 
+            "Loewdin_max",
+            "Loewdin_std",
+            "Mulliken_mean", 
+            "Mulliken_min", 
+            "Mulliken_max",
+            "Mulliken_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
@@ -375,6 +415,10 @@ def test_summary_featurize_with_json_bonding(self):
             "kurtosis_COHP",
             "edge_COHP",
             "Ionicity_Mull",
+            "Mulliken_mean", 
+            "Mulliken_min", 
+            "Mulliken_max",
+            "Mulliken_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
@@ -425,6 +469,10 @@ def test_summary_featurize_with_json_antibonding(self):
             "kurtosis_COHP",
             "edge_COHP",
             "Ionicity_Loew",
+            "Loewdin_mean", 
+            "Loewdin_min", 
+            "Loewdin_max",
+            "Loewdin_std",
         ]
 
         assert sorted(df.columns) == sorted(expected_cols)
diff --git a/tests/featurize/test_core.py b/tests/featurize/test_core.py
index c7aaf4d9..02299d14 100644
--- a/tests/featurize/test_core.py
+++ b/tests/featurize/test_core.py
@@ -739,6 +739,10 @@ def test_featurize_c_charge(self):
 
         # Test that the DataFrame has the expected columns
         expected_cols = [
+            "Mulliken_mean", 
+            "Mulliken_min", 
+            "Mulliken_max",
+            "Mulliken_std",
             "Ionicity_Mull",
         ]
         assert sorted(df.columns) == sorted(expected_cols)
@@ -748,6 +752,10 @@ def test_featurize_c_charge(self):
 
         # Test that all the values in the DataFrame
         assert df.loc["C", "Ionicity_Mull"] == pytest.approx(0.0, abs=1e-05)
+        assert df.loc["C", "Mulliken_mean"] == pytest.approx(0.0, abs=1e-05)
+        assert df.loc["C", "Mulliken_min"] == pytest.approx(0.0, abs=1e-05)
+        assert df.loc["C", "Mulliken_max"] == pytest.approx(0.0, abs=1e-05)
+        assert df.loc["C", "Mulliken_std"] == pytest.approx(0.0, abs=1e-05)
 
     def test_featurize_cdf_charge(self):
         featurize_cdf_charge = FeaturizeCharges(
@@ -762,6 +770,10 @@ def test_featurize_cdf_charge(self):
 
         # Test that the DataFrame has the expected columns
         expected_cols = [
+            "Mulliken_mean", 
+            "Mulliken_min", 
+            "Mulliken_max",
+            "Mulliken_std",
             "Ionicity_Mull",
         ]
         assert sorted(df.columns) == sorted(expected_cols)
@@ -771,6 +783,10 @@ def test_featurize_cdf_charge(self):
 
         # Test that all the values in the DataFrame
         assert df.loc["CdF", "Ionicity_Mull"] == pytest.approx(0.788333, abs=1e-05)
+        assert df.loc["CdF", "Mulliken_mean"] == pytest.approx(-0.003333, abs=1e-05)
+        assert df.loc["CdF", "Mulliken_min"] == pytest.approx(-0.79, abs=1e-05)
+        assert df.loc["CdF", "Mulliken_max"] == pytest.approx(1.57, abs=1e-05)
+        assert df.loc["CdF", "Mulliken_std"] == pytest.approx(1.112515, abs=1e-05)
 
     def test_featurize_k3sb_charge(self):
         featurize_k3sb_charge = FeaturizeCharges(
@@ -785,6 +801,10 @@ def test_featurize_k3sb_charge(self):
 
         # Test that the DataFrame has the expected columns
         expected_cols = [
+            "Loewdin_mean", 
+            "Loewdin_min", 
+            "Loewdin_max",
+            "Loewdin_std",
             "Ionicity_Loew",
         ]
         assert sorted(df.columns) == sorted(expected_cols)
@@ -794,6 +814,10 @@ def test_featurize_k3sb_charge(self):
 
         # Test that all the values in the DataFrame
         assert df.loc["K3Sb", "Ionicity_Loew"] == pytest.approx(0.563333, abs=1e-05)
+        assert df.loc["K3Sb", "Loewdin_mean"] == pytest.approx(5.551115e-17, abs=1e-05)
+        assert df.loc["K3Sb", "Loewdin_min"] == pytest.approx(-1.69, abs=1e-05)
+        assert df.loc["K3Sb", "Loewdin_max"] == pytest.approx(0.63, abs=1e-05)
+        assert df.loc["K3Sb", "Loewdin_std"] == pytest.approx(0.976576, abs=1e-05)
 
 
 class TestExceptions:
@@ -843,8 +867,6 @@ def test_featurize_charges(self):
                 charge_type="Mull",
             )
 
-            _ = self.featurize_cdf_charge.get_df()
-
         assert str(err.value) == "Please check the requested charge_type. Possible options are `mulliken` or `loewdin`"
 
     def test_featurize_coxx(self):

From 46b221b0fa19868ff0470500d78f22caa06fd744 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 20 Dec 2025 14:19:58 +0000
Subject: [PATCH 3/4] pre-commit auto-fixes

---
 src/lobsterpy/featurize/core.py |  6 ++----
 tests/featurize/test_batch.py   | 20 ++++++++++----------
 tests/featurize/test_core.py    | 14 +++++++-------
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py
index e8ed1279..dc4a4086 100644
--- a/src/lobsterpy/featurize/core.py
+++ b/src/lobsterpy/featurize/core.py
@@ -1046,7 +1046,7 @@ def _calc_ionicity(self) -> float:
                 ch_veff.append(val)
 
         return sum(ch_veff) / structure.num_sites
-    
+
     def _calc_stats(self, ids: str | None = None) -> dict[str, float]:
         """
         Calculate standard statistics of the atomic-charges in CHARGE.lobster.
@@ -1057,7 +1057,6 @@ def _calc_stats(self, ids: str | None = None) -> dict[str, float]:
         Returns:
             A dictionary with charge statistics
         """
-        
         chargeobj = Charge(filename=self.path_to_charge)
         charges = getattr(chargeobj, self.charge_type.capitalize())
         stats = {
@@ -1087,11 +1086,10 @@ def get_df(self, ids: str | None = None) -> pd.DataFrame:
 
         data = self._calc_stats(ids=ids)
 
-        if self.charge_type.lower() == "mulliken":            
+        if self.charge_type.lower() == "mulliken":
             data["Ionicity_Mull"] = self._calc_ionicity()
         else:
             data["Ionicity_Loew"] = self._calc_ionicity()
-        
 
         return pd.DataFrame(index=[ids], data=data)
 
diff --git a/tests/featurize/test_batch.py b/tests/featurize/test_batch.py
index 03dc1845..c6d9b0e5 100644
--- a/tests/featurize/test_batch.py
+++ b/tests/featurize/test_batch.py
@@ -213,8 +213,8 @@ def test_summary_featurize_orbitalwise(self):
             "edge_COHP",
             "Ionicity_Mull",
             "Ionicity_Loew",
-            "Mulliken_mean", 
-            "Mulliken_min", 
+            "Mulliken_mean",
+            "Mulliken_min",
             "Mulliken_max",
             "Mulliken_std",
             "Loewdin_mean",
@@ -353,12 +353,12 @@ def test_summary_featurize_with_json_overall(self):
             "edge_COOP",
             "Ionicity_Mull",
             "Ionicity_Loew",
-            "Loewdin_mean", 
-            "Loewdin_min", 
+            "Loewdin_mean",
+            "Loewdin_min",
             "Loewdin_max",
             "Loewdin_std",
-            "Mulliken_mean", 
-            "Mulliken_min", 
+            "Mulliken_mean",
+            "Mulliken_min",
             "Mulliken_max",
             "Mulliken_std",
         ]
@@ -415,8 +415,8 @@ def test_summary_featurize_with_json_bonding(self):
             "kurtosis_COHP",
             "edge_COHP",
             "Ionicity_Mull",
-            "Mulliken_mean", 
-            "Mulliken_min", 
+            "Mulliken_mean",
+            "Mulliken_min",
             "Mulliken_max",
             "Mulliken_std",
         ]
@@ -469,8 +469,8 @@ def test_summary_featurize_with_json_antibonding(self):
             "kurtosis_COHP",
             "edge_COHP",
             "Ionicity_Loew",
-            "Loewdin_mean", 
-            "Loewdin_min", 
+            "Loewdin_mean",
+            "Loewdin_min",
             "Loewdin_max",
             "Loewdin_std",
         ]
diff --git a/tests/featurize/test_core.py b/tests/featurize/test_core.py
index 02299d14..090fd033 100644
--- a/tests/featurize/test_core.py
+++ b/tests/featurize/test_core.py
@@ -739,8 +739,8 @@ def test_featurize_c_charge(self):
 
         # Test that the DataFrame has the expected columns
         expected_cols = [
-            "Mulliken_mean", 
-            "Mulliken_min", 
+            "Mulliken_mean",
+            "Mulliken_min",
             "Mulliken_max",
             "Mulliken_std",
             "Ionicity_Mull",
@@ -770,8 +770,8 @@ def test_featurize_cdf_charge(self):
 
         # Test that the DataFrame has the expected columns
         expected_cols = [
-            "Mulliken_mean", 
-            "Mulliken_min", 
+            "Mulliken_mean",
+            "Mulliken_min",
             "Mulliken_max",
             "Mulliken_std",
             "Ionicity_Mull",
@@ -801,8 +801,8 @@ def test_featurize_k3sb_charge(self):
 
         # Test that the DataFrame has the expected columns
         expected_cols = [
-            "Loewdin_mean", 
-            "Loewdin_min", 
+            "Loewdin_mean",
+            "Loewdin_min",
             "Loewdin_max",
             "Loewdin_std",
             "Ionicity_Loew",
@@ -860,7 +860,7 @@ def test_lobsterpy_featurize_exception(self):
         assert str(err.value) == "No cation-anion bonds detected for C structure. Please switch to `all` bonds mode"
 
     def test_featurize_charges(self):
-        with pytest.raises(Exception) as err:  # noqa: PT012, PT011
+        with pytest.raises(Exception) as err:  # noqa: PT011
             self.featurize_cdf_charge = FeaturizeCharges(
                 path_to_structure=TestDir / "test_data/CdF/CONTCAR.gz",
                 path_to_charge=TestDir / "test_data/CdF/CHARGE.lobster.gz",

From 0b005101b9958c8049b5f16fa1318c2c1b8643ae Mon Sep 17 00:00:00 2001
From: naik-aakash <aakash.naik@bam.de>
Date: Sat, 20 Dec 2025 15:29:14 +0100
Subject: [PATCH 4/4] remove unused variables and fix lint

---
 src/lobsterpy/featurize/core.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py
index dc4a4086..9efc8adb 100644
--- a/src/lobsterpy/featurize/core.py
+++ b/src/lobsterpy/featurize/core.py
@@ -1047,25 +1047,21 @@ def _calc_ionicity(self) -> float:
 
         return sum(ch_veff) / structure.num_sites
 
-    def _calc_stats(self, ids: str | None = None) -> dict[str, float]:
+    def _calc_stats(self) -> dict[str, float]:
         """
         Calculate standard statistics of the atomic-charges in CHARGE.lobster.
 
-        :param ids: set index name in the pandas dataframe. Default is None.
-            When None, LOBSTER calc directory name is used as index name.
-
         Returns:
             A dictionary with charge statistics
         """
         chargeobj = Charge(filename=self.path_to_charge)
         charges = getattr(chargeobj, self.charge_type.capitalize())
-        stats = {
+        return {
             f"{self.charge_type.capitalize()}_mean": np.mean(charges),
             f"{self.charge_type.capitalize()}_min": np.min(charges),
             f"{self.charge_type.capitalize()}_max": np.max(charges),
             f"{self.charge_type.capitalize()}_std": np.std(charges),
         }
-        return stats
 
     def get_df(self, ids: str | None = None) -> pd.DataFrame:
         """
@@ -1078,13 +1074,10 @@ def get_df(self, ids: str | None = None) -> pd.DataFrame:
             Returns a pandas dataframe with ionicity and charge statistics as columns.
 
         """
-        if ids:
-            df = pd.DataFrame(index=[ids])
-        else:
+        if not ids:
             ids = Path(self.path_to_charge).parent.name
-            df = pd.DataFrame(index=[ids])
 
-        data = self._calc_stats(ids=ids)
+        data = self._calc_stats()
 
         if self.charge_type.lower() == "mulliken":
             data["Ionicity_Mull"] = self._calc_ionicity()