diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py index 6c55f5e2..9efc8adb 100644 --- a/src/lobsterpy/featurize/core.py +++ b/src/lobsterpy/featurize/core.py @@ -954,7 +954,7 @@ def get_summarized_coxx_df( ) class FeaturizeCharges: """ - Class to compute Ionicity from CHARGE.lobster data. + Class to compute Ionicity and statistics from CHARGE.lobster data. :param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`) :param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`) @@ -969,7 +969,7 @@ def __init__( charge_type: Literal["mulliken", "loewdin"], ): """ - Compute the Ionicity of the structure from CHARGE.lobster data. + Compute the Ionicity of the structure and charge statistics from CHARGE.lobster data. :param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`) :param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`) @@ -980,6 +980,9 @@ def __init__( self.path_to_charge = path_to_charge self.charge_type = charge_type + if self.charge_type.lower() not in ["mulliken", "loewdin"]: + raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`") + def _calc_ionicity(self) -> float: r""" Calculate the ionicity of the crystal structure based on quantum chemical charges. @@ -994,9 +997,6 @@ def _calc_ionicity(self) -> float: chargeobj = Charge(filename=self.path_to_charge) structure = Structure.from_file(self.path_to_structure) - if self.charge_type.lower() not in ["mulliken", "loewdin"]: - raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`") - ch_veff = [] tol = 1e-6 for i, j in enumerate(getattr(chargeobj, self.charge_type.capitalize())): @@ -1047,29 +1047,44 @@ def _calc_ionicity(self) -> float: return sum(ch_veff) / structure.num_sites + def _calc_stats(self) -> dict[str, float]: + """ + Calculate standard statistics of the atomic-charges in CHARGE.lobster. + + Returns: + A dictionary with charge statistics + """ + chargeobj = Charge(filename=self.path_to_charge) + charges = getattr(chargeobj, self.charge_type.capitalize()) + return { + f"{self.charge_type.capitalize()}_mean": np.mean(charges), + f"{self.charge_type.capitalize()}_min": np.min(charges), + f"{self.charge_type.capitalize()}_max": np.max(charges), + f"{self.charge_type.capitalize()}_std": np.std(charges), + } + def get_df(self, ids: str | None = None) -> pd.DataFrame: """ - Return a pandas dataframe with computed ionicity as columns. + Return a pandas dataframe with computed ionicity and charge statistics as columns. :param ids: set index name in the pandas dataframe. Default is None. When None, LOBSTER calc directory name is used as index name. Returns: - Returns a pandas dataframe with ionicity + Returns a pandas dataframe with ionicity and charge statistics as columns. """ - if ids: - df = pd.DataFrame(index=[ids]) - else: + if not ids: ids = Path(self.path_to_charge).parent.name - df = pd.DataFrame(index=[ids]) + + data = self._calc_stats() if self.charge_type.lower() == "mulliken": - df.loc[ids, "Ionicity_Mull"] = self._calc_ionicity() + data["Ionicity_Mull"] = self._calc_ionicity() else: - df.loc[ids, "Ionicity_Loew"] = self._calc_ionicity() + data["Ionicity_Loew"] = self._calc_ionicity() - return df + return pd.DataFrame(index=[ids], data=data) class FeaturizeDoscar: diff --git a/tests/featurize/test_batch.py b/tests/featurize/test_batch.py index 28a30320..c6d9b0e5 100644 --- a/tests/featurize/test_batch.py +++ b/tests/featurize/test_batch.py @@ -66,6 +66,14 @@ def test_summary_featurize_with_json(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -120,6 +128,14 @@ def test_summary_featurize_with_no_bonds(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -197,6 +213,14 @@ def test_summary_featurize_orbitalwise(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -249,6 +273,14 @@ def test_summary_featurize_without_json(self): "edge_COHP", "Ionicity_Mull", "Ionicity_Loew", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -321,6 +353,14 @@ def test_summary_featurize_with_json_overall(self): "edge_COOP", "Ionicity_Mull", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -375,6 +415,10 @@ def test_summary_featurize_with_json_bonding(self): "kurtosis_COHP", "edge_COHP", "Ionicity_Mull", + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", ] assert sorted(df.columns) == sorted(expected_cols) @@ -425,6 +469,10 @@ def test_summary_featurize_with_json_antibonding(self): "kurtosis_COHP", "edge_COHP", "Ionicity_Loew", + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", ] assert sorted(df.columns) == sorted(expected_cols) diff --git a/tests/featurize/test_core.py b/tests/featurize/test_core.py index c7aaf4d9..090fd033 100644 --- a/tests/featurize/test_core.py +++ b/tests/featurize/test_core.py @@ -739,6 +739,10 @@ def test_featurize_c_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", "Ionicity_Mull", ] assert sorted(df.columns) == sorted(expected_cols) @@ -748,6 +752,10 @@ def test_featurize_c_charge(self): # Test that all the values in the DataFrame assert df.loc["C", "Ionicity_Mull"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_mean"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_min"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_max"] == pytest.approx(0.0, abs=1e-05) + assert df.loc["C", "Mulliken_std"] == pytest.approx(0.0, abs=1e-05) def test_featurize_cdf_charge(self): featurize_cdf_charge = FeaturizeCharges( @@ -762,6 +770,10 @@ def test_featurize_cdf_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ + "Mulliken_mean", + "Mulliken_min", + "Mulliken_max", + "Mulliken_std", "Ionicity_Mull", ] assert sorted(df.columns) == sorted(expected_cols) @@ -771,6 +783,10 @@ def test_featurize_cdf_charge(self): # Test that all the values in the DataFrame assert df.loc["CdF", "Ionicity_Mull"] == pytest.approx(0.788333, abs=1e-05) + assert df.loc["CdF", "Mulliken_mean"] == pytest.approx(-0.003333, abs=1e-05) + assert df.loc["CdF", "Mulliken_min"] == pytest.approx(-0.79, abs=1e-05) + assert df.loc["CdF", "Mulliken_max"] == pytest.approx(1.57, abs=1e-05) + assert df.loc["CdF", "Mulliken_std"] == pytest.approx(1.112515, abs=1e-05) def test_featurize_k3sb_charge(self): featurize_k3sb_charge = FeaturizeCharges( @@ -785,6 +801,10 @@ def test_featurize_k3sb_charge(self): # Test that the DataFrame has the expected columns expected_cols = [ + "Loewdin_mean", + "Loewdin_min", + "Loewdin_max", + "Loewdin_std", "Ionicity_Loew", ] assert sorted(df.columns) == sorted(expected_cols) @@ -794,6 +814,10 @@ def test_featurize_k3sb_charge(self): # Test that all the values in the DataFrame assert df.loc["K3Sb", "Ionicity_Loew"] == pytest.approx(0.563333, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_mean"] == pytest.approx(5.551115e-17, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_min"] == pytest.approx(-1.69, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_max"] == pytest.approx(0.63, abs=1e-05) + assert df.loc["K3Sb", "Loewdin_std"] == pytest.approx(0.976576, abs=1e-05) class TestExceptions: @@ -836,15 +860,13 @@ def test_lobsterpy_featurize_exception(self): assert str(err.value) == "No cation-anion bonds detected for C structure. Please switch to `all` bonds mode" def test_featurize_charges(self): - with pytest.raises(Exception) as err: # noqa: PT012, PT011 + with pytest.raises(Exception) as err: # noqa: PT011 self.featurize_cdf_charge = FeaturizeCharges( path_to_structure=TestDir / "test_data/CdF/CONTCAR.gz", path_to_charge=TestDir / "test_data/CdF/CHARGE.lobster.gz", charge_type="Mull", ) - _ = self.featurize_cdf_charge.get_df() - assert str(err.value) == "Please check the requested charge_type. Possible options are `mulliken` or `loewdin`" def test_featurize_coxx(self):