-
Notifications
You must be signed in to change notification settings - Fork 21
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
A clear and concise description of what the bug is.
Indexing error when I use merge='same' or merge='unique' when calling mudata.concat on MuData objects that have different embeddings stored in mdata.obsm slot. For example, if mdata1 has 'X_totavi' and mdata2 does not.
The error disappears if I delete the embeddings that are not shared before using concat.
for mdata in [mdata1, mdata2]:
keys = list(mdata.obsm.keys())
for x in keys:
if x not in ['rna', 'prot', 'airr']:
del mdata.obsm[x]
To Reproduce
# Here mdata1 and mdata2 both have 10,000 observations
mdata = mudata.concat([mdata1, mdata2],
join='outer', merge='same',
label='source', keys=['pb', 'tumor'])
---> [12](vscode-notebook-cell:?execution_count=65&line=12)
mdata = mudata.concat([mdata1, mdata2],
13 join='outer', merge='same',
14 label='source', keys=['pb', 'tumor'])
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/merge.py:287, in concat(mdatas, join, merge, uns_merge, label, keys, index_unique, fill_value, pairwise)
283 uns = uns_merge([m.uns for m in mdatas])
285 # TODO: attrmap
--> [287](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/merge.py:287) return MuData(
288 **{
289 "data": modalities,
290 "axis": axis,
291 dim: concat_annot,
292 alt_dim: alt_annot,
293 f"{dim}m": concat_mapping,
294 f"{alt_dim}m": alt_mapping,
295 f"{dim}p": concat_pairwise,
296 f"{alt_dim}p": alt_pairwise,
297 "uns": uns,
298 }
299 )
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:232, in MuData.__init__(self, data, feature_types_names, as_view, index, **kwargs)
229 self._axis = kwargs.get("axis") or 0
231 # Restore proper .obs and .var
--> [232](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:232) self.update()
234 self._uns = kwargs.get("uns") or {}
236 return
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:1829, in MuData.update(self)
1827 if len(self.mod) > 0:
1828 self.update_var()
-> [1829](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:1829) self.update_obs()
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:1461, in MuData.update_obs(self)
1457 """
1458 Update global .obs_names according to the .obs_names of all the modalities.
1459 """
1460 join_common = self.axis == 1
-> [1461](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:1461) self._update_attr("obs", axis=1, join_common=join_common)
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:602, in MuData._update_attr(self, attr, axis, **kwargs)
600 if "join_common" in kwargs:
601 join_common = kwargs.pop("join_common")
--> [602](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:602) self._update_attr_legacy(attr, axis, join_common, **kwargs)
603 return
605 prev_index = getattr(self, attr).index
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:1309, in MuData._update_attr_legacy(self, attr, axis, join_common, **kwargs)
1307 for mx_key, mx in attrm.items():
1308 if mx_key not in self.mod.keys(): # not a modality name
-> [1309](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/mudata/_core/mudata.py:1309) attrm[mx_key] = attrm[mx_key][index_order]
1310 attrm[mx_key][index_order == -1] = np.nan
1312 # Update .obsp/.varp (size might have changed)
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/pandas/core/frame.py:4113, in DataFrame.__getitem__(self, key)
4111 if is_iterator(key):
4112 key = list(key)
-> [4113](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/pandas/core/frame.py:4113) indexer = self.columns._get_indexer_strict(key, "columns")[1]
4115 # take() does not accept boolean indexers
4116 if getattr(indexer, "dtype", None) == bool:
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/pandas/core/indexes/base.py:6212, in Index._get_indexer_strict(self, key, axis_name)
6209 else:
6210 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> [6212](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/pandas/core/indexes/base.py:6212) self._raise_if_missing(keyarr, indexer, axis_name)
6214 keyarr = self.take(indexer)
6215 if isinstance(key, Index):
6216 # GH 42790 - Preserve name from an Index
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/pandas/core/indexes/base.py:6261, in Index._raise_if_missing(self, key, indexer, axis_name)
6259 if nmissing:
6260 if nmissing == len(indexer):
-> [6261](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/pandas/core/indexes/base.py:6261) raise KeyError(f"None of [{key}] are in the [{axis_name}]")
6263 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
6264 raise KeyError(f"{not_found} not in index")
KeyError: "None of [Index([ 3, 4, 7, 8, 11, 12, 15, 17, 18, 20,\n ...\n 19906, 19907, 19934, 19941, 19948, 19963, 19967, 19976, 19986, 19998],\n dtype='int64', length=20000)] are in the [columns]"Expected behaviour
I expected concat to fill missing .obsm embeddings values with null if they are not shared.
System
- OS: linux
- Python version Python 3.11.13
- Versions of libraries involved: mudata v0.3.2
Additional context
None
gtca
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working