From 229412c2359a20496761fab3035edce554663d52 Mon Sep 17 00:00:00 2001 From: naik-aakash Date: Sat, 20 Dec 2025 15:18:33 +0100 Subject: [PATCH 1/4] include charge stats in ChargeFeaturizer --- src/lobsterpy/featurize/core.py | 46 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py index 6c55f5e2..e8ed1279 100644 --- a/src/lobsterpy/featurize/core.py +++ b/src/lobsterpy/featurize/core.py @@ -954,7 +954,7 @@ def get_summarized_coxx_df( ) class FeaturizeCharges: """ - Class to compute Ionicity from CHARGE.lobster data. + Class to compute Ionicity and statistics from CHARGE.lobster data. :param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`) :param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`) @@ -969,7 +969,7 @@ def __init__( charge_type: Literal["mulliken", "loewdin"], ): """ - Compute the Ionicity of the structure from CHARGE.lobster data. + Compute the Ionicity of the structure and charge statistics from CHARGE.lobster data. :param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`) :param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`) @@ -980,6 +980,9 @@ def __init__( self.path_to_charge = path_to_charge self.charge_type = charge_type + if self.charge_type.lower() not in ["mulliken", "loewdin"]: + raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`") + def _calc_ionicity(self) -> float: r""" Calculate the ionicity of the crystal structure based on quantum chemical charges. @@ -994,9 +997,6 @@ def _calc_ionicity(self) -> float: chargeobj = Charge(filename=self.path_to_charge) structure = Structure.from_file(self.path_to_structure) - if self.charge_type.lower() not in ["mulliken", "loewdin"]: - raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`") - ch_veff = [] tol = 1e-6 for i, j in enumerate(getattr(chargeobj, self.charge_type.capitalize())): @@ -1046,16 +1046,37 @@ def _calc_ionicity(self) -> float: ch_veff.append(val) return sum(ch_veff) / structure.num_sites + + def _calc_stats(self, ids: str | None = None) -> dict[str, float]: + """ + Calculate standard statistics of the atomic-charges in CHARGE.lobster. + + :param ids: set index name in the pandas dataframe. Default is None. + When None, LOBSTER calc directory name is used as index name. + + Returns: + A dictionary with charge statistics + """ + + chargeobj = Charge(filename=self.path_to_charge) + charges = getattr(chargeobj, self.charge_type.capitalize()) + stats = { + f"{self.charge_type.capitalize()}_mean": np.mean(charges), + f"{self.charge_type.capitalize()}_min": np.min(charges), + f"{self.charge_type.capitalize()}_max": np.max(charges), + f"{self.charge_type.capitalize()}_std": np.std(charges), + } + return stats def get_df(self, ids: str | None = None) -> pd.DataFrame: """ - Return a pandas dataframe with computed ionicity as columns. + Return a pandas dataframe with computed ionicity and charge statistics as columns. :param ids: set index name in the pandas dataframe. Default is None. When None, LOBSTER calc directory name is used as index name. Returns: - Returns a pandas dataframe with ionicity + Returns a pandas dataframe with ionicity and charge statistics as columns. """ if ids: @@ -1064,12 +1085,15 @@ def get_df(self, ids: str | None = None) -> pd.DataFrame: ids = Path(self.path_to_charge).parent.name df = pd.DataFrame(index=[ids]) - if self.charge_type.lower() == "mulliken": - df.loc[ids, "Ionicity_Mull"] = self._calc_ionicity() + data = self._calc_stats(ids=ids) + + if self.charge_type.lower() == "mulliken": + data["Ionicity_Mull"] = self._calc_ionicity() else: - df.loc[ids, "Ionicity_Loew"] = self._calc_ionicity() + data["Ionicity_Loew"] = self._calc_ionicity() + - return df + return pd.DataFrame(index=[ids], data=data) class FeaturizeDoscar: From b4a5ec0e23512a5d7b35cf79847d0b20c228328b Mon Sep 17 00:00:00 2001 From: naik-aakash Date: Sat, 20 Dec 2025 15:18:48 +0100 Subject: [PATCH 2/4] update tests --- tests/featurize/test_batch.py | 48 +++++++++++++++++++++++++++++++++++ tests/featurize/test_core.py | 26 +++++++++++++++++-- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tests/featurize/test_batch.py b/tests/featurize/test_batch.py index 28a30320..03dc1845 100644 --- a/tests/featurize/test_batch.py +++ b/tests/featurize/test_batch.py @@ -66,6 +66,14 @@ def test_summary_featurize_with_json(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -120,6 +128,14 @@ def test_summary_featurize_with_no_bonds(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -197,6 +213,14 @@ def test_summary_featurize_orbitalwise(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -249,6 +273,14 @@ def test_summary_featurize_without_json(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -321,6 +353,14 @@ def test_summary_featurize_with_json_overall(self): "edge_COOP", "Ionicity_Mull", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -375,6 +415,10 @@ def test_summary_featurize_with_json_bonding(self): "kurtosis_COHP", "edge_COHP", "Ionicity_Mull", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -425,6 +469,10 @@ def test_summary_featurize_with_json_antibonding(self): "kurtosis_COHP", "edge_COHP", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", ] assert sorted(df.columns) == sorted(expected_cols) diff --git a/tests/featurize/test_core.py b/tests/featurize/test_core.py index c7aaf4d9..02299d14 100644 --- a/tests/featurize/test_core.py +++ b/tests/featurize/test_core.py @@ -739,6 +739,10 @@ def test_featurize_c_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", "Ionicity_Mull", ] assert sorted(df.columns) == sorted(expected_cols) @@ -748,6 +752,10 @@ def test_featurize_c_charge(self): # Test that all the values in the DataFrame assert df.loc["C", "Ionicity_Mull"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_mean"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_min"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_max"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_std"] == pytest.approx(0.0, abs=1e-05) def test_featurize_cdf_charge(self): featurize_cdf_charge = FeaturizeCharges( @@ -762,6 +770,10 @@ def test_featurize_cdf_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", "Ionicity_Mull", ] assert sorted(df.columns) == sorted(expected_cols) @@ -771,6 +783,10 @@ def test_featurize_cdf_charge(self): # Test that all the values in the DataFrame assert df.loc["CdF", "Ionicity_Mull"] == pytest.approx(0.788333, abs=1e-05) + assert df.loc["CdF", "Mulliken_mean"] == pytest.approx(-0.003333, abs=1e-05) + assert df.loc["CdF", "Mulliken_min"] == pytest.approx(-0.79, abs=1e-05) + assert df.loc["CdF", "Mulliken_max"] == pytest.approx(1.57, abs=1e-05) + assert df.loc["CdF", "Mulliken_std"] == pytest.approx(1.112515, abs=1e-05) def test_featurize_k3sb_charge(self): featurize_k3sb_charge = FeaturizeCharges( @@ -785,6 +801,10 @@ def test_featurize_k3sb_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", "Ionicity_Loew", ] assert sorted(df.columns) == sorted(expected_cols) @@ -794,6 +814,10 @@ def test_featurize_k3sb_charge(self): # Test that all the values in the DataFrame assert df.loc["K3Sb", "Ionicity_Loew"] == pytest.approx(0.563333, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_mean"] == pytest.approx(5.551115e-17, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_min"] == pytest.approx(-1.69, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_max"] == pytest.approx(0.63, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_std"] == pytest.approx(0.976576, abs=1e-05) class TestExceptions: @@ -843,8 +867,6 @@ def test_featurize_charges(self): charge_type="Mull", ) - _ = self.featurize_cdf_charge.get_df() - assert str(err.value) == "Please check the requested charge_type. Possible options are `mulliken` or `loewdin`" def test_featurize_coxx(self): From 46b221b0fa19868ff0470500d78f22caa06fd744 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Dec 2025 14:19:58 +0000 Subject: [PATCH 3/4] pre-commit auto-fixes --- src/lobsterpy/featurize/core.py | 6 ++---- tests/featurize/test_batch.py | 20 ++++++++++---------- tests/featurize/test_core.py | 14 +++++++------- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py index e8ed1279..dc4a4086 100644 --- a/src/lobsterpy/featurize/core.py +++ b/src/lobsterpy/featurize/core.py @@ -1046,7 +1046,7 @@ def _calc_ionicity(self) -> float: ch_veff.append(val) return sum(ch_veff) / structure.num_sites - + def _calc_stats(self, ids: str | None = None) -> dict[str, float]: """ Calculate standard statistics of the atomic-charges in CHARGE.lobster. @@ -1057,7 +1057,6 @@ def _calc_stats(self, ids: str | None = None) -> dict[str, float]: Returns: A dictionary with charge statistics """ - chargeobj = Charge(filename=self.path_to_charge) charges = getattr(chargeobj, self.charge_type.capitalize()) stats = { @@ -1087,11 +1086,10 @@ def get_df(self, ids: str | None = None) -> pd.DataFrame: data = self._calc_stats(ids=ids) - if self.charge_type.lower() == "mulliken": + if self.charge_type.lower() == "mulliken": data["Ionicity_Mull"] = self._calc_ionicity() else: data["Ionicity_Loew"] = self._calc_ionicity() - return pd.DataFrame(index=[ids], data=data) diff --git a/tests/featurize/test_batch.py b/tests/featurize/test_batch.py index 03dc1845..c6d9b0e5 100644 --- a/tests/featurize/test_batch.py +++ b/tests/featurize/test_batch.py @@ -213,8 +213,8 @@ def test_summary_featurize_orbitalwise(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", - "Mulliken_mean", - "Mulliken_min", + "Mulliken_mean", + "Mulliken_min", "Mulliken_max", "Mulliken_std", "Loewdin_mean", @@ -353,12 +353,12 @@ def test_summary_featurize_with_json_overall(self): "edge_COOP", "Ionicity_Mull", "Ionicity_Loew", - "Loewdin_mean", - "Loewdin_min", + "Loewdin_mean", + "Loewdin_min", "Loewdin_max", "Loewdin_std", - "Mulliken_mean", - "Mulliken_min", + "Mulliken_mean", + "Mulliken_min", "Mulliken_max", "Mulliken_std", ] @@ -415,8 +415,8 @@ def test_summary_featurize_with_json_bonding(self): "kurtosis_COHP", "edge_COHP", "Ionicity_Mull", - "Mulliken_mean", - "Mulliken_min", + "Mulliken_mean", + "Mulliken_min", "Mulliken_max", "Mulliken_std", ] @@ -469,8 +469,8 @@ def test_summary_featurize_with_json_antibonding(self): "kurtosis_COHP", "edge_COHP", "Ionicity_Loew", - "Loewdin_mean", - "Loewdin_min", + "Loewdin_mean", + "Loewdin_min", "Loewdin_max", "Loewdin_std", ] diff --git a/tests/featurize/test_core.py b/tests/featurize/test_core.py index 02299d14..090fd033 100644 --- a/tests/featurize/test_core.py +++ b/tests/featurize/test_core.py @@ -739,8 +739,8 @@ def test_featurize_c_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ - "Mulliken_mean", - "Mulliken_min", + "Mulliken_mean", + "Mulliken_min", "Mulliken_max", "Mulliken_std", "Ionicity_Mull", @@ -770,8 +770,8 @@ def test_featurize_cdf_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ - "Mulliken_mean", - "Mulliken_min", + "Mulliken_mean", + "Mulliken_min", "Mulliken_max", "Mulliken_std", "Ionicity_Mull", @@ -801,8 +801,8 @@ def test_featurize_k3sb_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ - "Loewdin_mean", - "Loewdin_min", + "Loewdin_mean", + "Loewdin_min", "Loewdin_max", "Loewdin_std", "Ionicity_Loew", @@ -860,7 +860,7 @@ def test_lobsterpy_featurize_exception(self): assert str(err.value) == "No cation-anion bonds detected for C structure. Please switch to `all` bonds mode" def test_featurize_charges(self): - with pytest.raises(Exception) as err: # noqa: PT012, PT011 + with pytest.raises(Exception) as err: # noqa: PT011 self.featurize_cdf_charge = FeaturizeCharges( path_to_structure=TestDir / "test_data/CdF/CONTCAR.gz", path_to_charge=TestDir / "test_data/CdF/CHARGE.lobster.gz", From 0b005101b9958c8049b5f16fa1318c2c1b8643ae Mon Sep 17 00:00:00 2001 From: naik-aakash Date: Sat, 20 Dec 2025 15:29:14 +0100 Subject: [PATCH 4/4] remove unused variables and fix lint --- src/lobsterpy/featurize/core.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py index dc4a4086..9efc8adb 100644 --- a/src/lobsterpy/featurize/core.py +++ b/src/lobsterpy/featurize/core.py @@ -1047,25 +1047,21 @@ def _calc_ionicity(self) -> float: return sum(ch_veff) / structure.num_sites - def _calc_stats(self, ids: str | None = None) -> dict[str, float]: + def _calc_stats(self) -> dict[str, float]: """ Calculate standard statistics of the atomic-charges in CHARGE.lobster. - :param ids: set index name in the pandas dataframe. Default is None. - When None, LOBSTER calc directory name is used as index name. - Returns: A dictionary with charge statistics """ chargeobj = Charge(filename=self.path_to_charge) charges = getattr(chargeobj, self.charge_type.capitalize()) - stats = { + return { f"{self.charge_type.capitalize()}_mean": np.mean(charges), f"{self.charge_type.capitalize()}_min": np.min(charges), f"{self.charge_type.capitalize()}_max": np.max(charges), f"{self.charge_type.capitalize()}_std": np.std(charges), } - return stats def get_df(self, ids: str | None = None) -> pd.DataFrame: """ @@ -1078,13 +1074,10 @@ def get_df(self, ids: str | None = None) -> pd.DataFrame: Returns a pandas dataframe with ionicity and charge statistics as columns. """ - if ids: - df = pd.DataFrame(index=[ids]) - else: + if not ids: ids = Path(self.path_to_charge).parent.name - df = pd.DataFrame(index=[ids]) - data = self._calc_stats(ids=ids) + data = self._calc_stats() if self.charge_type.lower() == "mulliken": data["Ionicity_Mull"] = self._calc_ionicity()