Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 29 additions & 14 deletions src/lobsterpy/featurize/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ def get_summarized_coxx_df(
)
class FeaturizeCharges:
"""
Class to compute Ionicity from CHARGE.lobster data.
Class to compute Ionicity and statistics from CHARGE.lobster data.

:param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`)
:param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`)
Expand All @@ -969,7 +969,7 @@ def __init__(
charge_type: Literal["mulliken", "loewdin"],
):
"""
Compute the Ionicity of the structure from CHARGE.lobster data.
Compute the Ionicity of the structure and charge statistics from CHARGE.lobster data.

:param path_to_structure: path to structure file (e.g., `CONTCAR` (preferred), `POSCAR`)
:param path_to_charge: path to CHARGE.lobster (e.g., `CHARGE.lobster`)
Expand All @@ -980,6 +980,9 @@ def __init__(
self.path_to_charge = path_to_charge
self.charge_type = charge_type

if self.charge_type.lower() not in ["mulliken", "loewdin"]:
raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`")

def _calc_ionicity(self) -> float:
r"""
Calculate the ionicity of the crystal structure based on quantum chemical charges.
Expand All @@ -994,9 +997,6 @@ def _calc_ionicity(self) -> float:
chargeobj = Charge(filename=self.path_to_charge)
structure = Structure.from_file(self.path_to_structure)

if self.charge_type.lower() not in ["mulliken", "loewdin"]:
raise ValueError("Please check the requested charge_type. Possible options are `mulliken` or `loewdin`")

ch_veff = []
tol = 1e-6
for i, j in enumerate(getattr(chargeobj, self.charge_type.capitalize())):
Expand Down Expand Up @@ -1047,29 +1047,44 @@ def _calc_ionicity(self) -> float:

return sum(ch_veff) / structure.num_sites

def _calc_stats(self) -> dict[str, float]:
"""
Calculate standard statistics of the atomic-charges in CHARGE.lobster.

Returns:
A dictionary with charge statistics
"""
chargeobj = Charge(filename=self.path_to_charge)
charges = getattr(chargeobj, self.charge_type.capitalize())
return {
f"{self.charge_type.capitalize()}_mean": np.mean(charges),
f"{self.charge_type.capitalize()}_min": np.min(charges),
f"{self.charge_type.capitalize()}_max": np.max(charges),
f"{self.charge_type.capitalize()}_std": np.std(charges),
}

def get_df(self, ids: str | None = None) -> pd.DataFrame:
"""
Return a pandas dataframe with computed ionicity as columns.
Return a pandas dataframe with computed ionicity and charge statistics as columns.

:param ids: set index name in the pandas dataframe. Default is None.
When None, LOBSTER calc directory name is used as index name.

Returns:
Returns a pandas dataframe with ionicity
Returns a pandas dataframe with ionicity and charge statistics as columns.

"""
if ids:
df = pd.DataFrame(index=[ids])
else:
if not ids:
ids = Path(self.path_to_charge).parent.name
df = pd.DataFrame(index=[ids])

data = self._calc_stats()

if self.charge_type.lower() == "mulliken":
df.loc[ids, "Ionicity_Mull"] = self._calc_ionicity()
data["Ionicity_Mull"] = self._calc_ionicity()
else:
df.loc[ids, "Ionicity_Loew"] = self._calc_ionicity()
data["Ionicity_Loew"] = self._calc_ionicity()

return df
return pd.DataFrame(index=[ids], data=data)


class FeaturizeDoscar:
Expand Down
48 changes: 48 additions & 0 deletions tests/featurize/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ def test_summary_featurize_with_json(self):
"edge_COHP",
"Ionicity_Mull",
"Ionicity_Loew",
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down Expand Up @@ -120,6 +128,14 @@ def test_summary_featurize_with_no_bonds(self):
"edge_COHP",
"Ionicity_Mull",
"Ionicity_Loew",
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down Expand Up @@ -197,6 +213,14 @@ def test_summary_featurize_orbitalwise(self):
"edge_COHP",
"Ionicity_Mull",
"Ionicity_Loew",
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down Expand Up @@ -249,6 +273,14 @@ def test_summary_featurize_without_json(self):
"edge_COHP",
"Ionicity_Mull",
"Ionicity_Loew",
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down Expand Up @@ -321,6 +353,14 @@ def test_summary_featurize_with_json_overall(self):
"edge_COOP",
"Ionicity_Mull",
"Ionicity_Loew",
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down Expand Up @@ -375,6 +415,10 @@ def test_summary_featurize_with_json_bonding(self):
"kurtosis_COHP",
"edge_COHP",
"Ionicity_Mull",
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down Expand Up @@ -425,6 +469,10 @@ def test_summary_featurize_with_json_antibonding(self):
"kurtosis_COHP",
"edge_COHP",
"Ionicity_Loew",
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
]

assert sorted(df.columns) == sorted(expected_cols)
Expand Down
28 changes: 25 additions & 3 deletions tests/featurize/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,10 @@ def test_featurize_c_charge(self):

# Test that the DataFrame has the expected columns
expected_cols = [
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
"Ionicity_Mull",
]
assert sorted(df.columns) == sorted(expected_cols)
Expand All @@ -748,6 +752,10 @@ def test_featurize_c_charge(self):

# Test that all the values in the DataFrame
assert df.loc["C", "Ionicity_Mull"] == pytest.approx(0.0, abs=1e-05)
assert df.loc["C", "Mulliken_mean"] == pytest.approx(0.0, abs=1e-05)
assert df.loc["C", "Mulliken_min"] == pytest.approx(0.0, abs=1e-05)
assert df.loc["C", "Mulliken_max"] == pytest.approx(0.0, abs=1e-05)
assert df.loc["C", "Mulliken_std"] == pytest.approx(0.0, abs=1e-05)

def test_featurize_cdf_charge(self):
featurize_cdf_charge = FeaturizeCharges(
Expand All @@ -762,6 +770,10 @@ def test_featurize_cdf_charge(self):

# Test that the DataFrame has the expected columns
expected_cols = [
"Mulliken_mean",
"Mulliken_min",
"Mulliken_max",
"Mulliken_std",
"Ionicity_Mull",
]
assert sorted(df.columns) == sorted(expected_cols)
Expand All @@ -771,6 +783,10 @@ def test_featurize_cdf_charge(self):

# Test that all the values in the DataFrame
assert df.loc["CdF", "Ionicity_Mull"] == pytest.approx(0.788333, abs=1e-05)
assert df.loc["CdF", "Mulliken_mean"] == pytest.approx(-0.003333, abs=1e-05)
assert df.loc["CdF", "Mulliken_min"] == pytest.approx(-0.79, abs=1e-05)
assert df.loc["CdF", "Mulliken_max"] == pytest.approx(1.57, abs=1e-05)
assert df.loc["CdF", "Mulliken_std"] == pytest.approx(1.112515, abs=1e-05)

def test_featurize_k3sb_charge(self):
featurize_k3sb_charge = FeaturizeCharges(
Expand All @@ -785,6 +801,10 @@ def test_featurize_k3sb_charge(self):

# Test that the DataFrame has the expected columns
expected_cols = [
"Loewdin_mean",
"Loewdin_min",
"Loewdin_max",
"Loewdin_std",
"Ionicity_Loew",
]
assert sorted(df.columns) == sorted(expected_cols)
Expand All @@ -794,6 +814,10 @@ def test_featurize_k3sb_charge(self):

# Test that all the values in the DataFrame
assert df.loc["K3Sb", "Ionicity_Loew"] == pytest.approx(0.563333, abs=1e-05)
assert df.loc["K3Sb", "Loewdin_mean"] == pytest.approx(5.551115e-17, abs=1e-05)
assert df.loc["K3Sb", "Loewdin_min"] == pytest.approx(-1.69, abs=1e-05)
assert df.loc["K3Sb", "Loewdin_max"] == pytest.approx(0.63, abs=1e-05)
assert df.loc["K3Sb", "Loewdin_std"] == pytest.approx(0.976576, abs=1e-05)


class TestExceptions:
Expand Down Expand Up @@ -836,15 +860,13 @@ def test_lobsterpy_featurize_exception(self):
assert str(err.value) == "No cation-anion bonds detected for C structure. Please switch to `all` bonds mode"

def test_featurize_charges(self):
with pytest.raises(Exception) as err: # noqa: PT012, PT011
with pytest.raises(Exception) as err: # noqa: PT011
self.featurize_cdf_charge = FeaturizeCharges(
path_to_structure=TestDir / "test_data/CdF/CONTCAR.gz",
path_to_charge=TestDir / "test_data/CdF/CHARGE.lobster.gz",
charge_type="Mull",
)

_ = self.featurize_cdf_charge.get_df()

assert str(err.value) == "Please check the requested charge_type. Possible options are `mulliken` or `loewdin`"

def test_featurize_coxx(self):
Expand Down
Loading