Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Kevin McDonnell, Nathan Wamsley, Jason Derks, Sarah Sipe, Maddy Yeh, Harrison Sp
- [Environment Setup](#environment-setup)
- [Linux/MacOS](#linuxmacos)
- [Windows](#windows)
- [Sample Search (Test Mode)](#sample-search-test-mode)
- [Outputs](#outputs)
- [Running a Search](#running-a-search)
- [File Conversion](#file-conversion)
Expand All @@ -37,13 +38,19 @@ To set up a Conda environment to run JMod in, please download the `data/jmod_env
In a dedicated terminal, type:
1. ```conda env create -f jmod_env.yml -n $env_name```
2. ```conda activate $env_name```
3. ```python run_jmod.py <args> ```

If run successfully, the inputted configurations alongside `"Loading library..."` should be printed.

#### Windows
If on a Windows machine, please use the `data/jmod_env_windows.yml` file to set up your environment. Set up will follow the same steps as Linux/MacOS.

#### Sample Search (Test Mode)
To ensure that all packages and the environment were set up correctly, JMod can be run in test mode with the `test_mode.json` file located in the `data/` directory. Running JMod in test mode performs a full search on a subset of a small file with a small spectral library, allowing for a quick search and check that all functions are functioning properly.

The search can be run with the following command:
```
python run_jmod.py --config data/test_mode.json
```
If the conda environment was configured properly, JMod should start by printing the run configurations and proceed to run to completion.

### Outputs

Expand Down
199,483 changes: 199,483 additions & 0 deletions data/filtered_library.tsv

Large diffs are not rendered by default.

Binary file added data/filtered_library.tsv_pythonlib
Binary file not shown.
30 changes: 30 additions & 0 deletions data/test_mode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"mzml": "data/test_mode_filtered.mzML",
"speclib": "data/filtered_library.tsv",
"use_emp_rt": true,
"use_features": true,
"atleast_m": 3,
"threads": 10,
"ppm": 20,
"output_folder": null,
"ms2_align":false,
"timeplex": false,
"num_timeplex": 0,
"iso": true,
"unmatched": "c",
"decoy": "rev",
"tag": "",
"num_iso": 5,
"lib_frac": 0.5,
"test_mode": true,
"additional_config": {
"mz_tol": 0.00001,
"rt_tol": 5,
"im_tol": 0.5,
"rt_width": 1.5,
"numProc": 10,
"top_n": 10,
"atleast_m": 3,
"FT_minimum": 1000
}
}
34,034 changes: 34,034 additions & 0 deletions data/test_mode_filtered.features.tsv

Large diffs are not rendered by default.

75,173 changes: 75,173 additions & 0 deletions data/test_mode_filtered.mzML

Large diffs are not rendered by default.

Binary file added data/test_mode_filtered.mzML_pythonspec
Binary file not shown.
49 changes: 44 additions & 5 deletions src/run_jmod.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
def main():
"""Main function to run JMod analysis."""

print(config.args)
# print(config.args)
# Check if a single argument is provided and it's a JSON file
if len(sys.argv) == 2 and sys.argv[1].endswith('.json'):
# Treat this as the config_json argument
Expand Down Expand Up @@ -61,12 +61,9 @@ def main():
mzml_file = config.args.mzml.replace("\\","/")
lib_file = config.args.speclib.replace("\\","/")



spec_file_name = mzml_file.split("/")[-1].rsplit(".",1)[0]
lib_file_name = lib_file.split("/")[-1].rsplit(".",1)[0]


use_rt = "RT" if config.args.use_rt else ""
iso = f"iso{config.num_iso_peaks}" if config.args.iso else ""
lib_frac = f"iso{config.args.lib_frac}"
Expand Down Expand Up @@ -124,8 +121,37 @@ def main():
DIAspectra=load_files.loadSpectra(mzml_file)

if config.args.test_mode:
print(f"Running in test mode with RT range: {config.args.test_rt_min}-{config.args.test_rt_max}, m/z range: {config.args.test_mz_min}-{config.args.test_mz_max}")

# MS2 scans
all_rts = [scan.RT for scan in DIAspectra.ms2scans]
all_mzs = [scan.prec_mz for scan in DIAspectra.ms2scans]

# Features
feat_rts = dino_features['rtApex']
feat_mzs = dino_features['mz']

# Combine ranges to get overall min/max
min_rt = min(min(all_rts), feat_rts.min())
max_rt = max(max(all_rts), feat_rts.max())

min_mz = min(min(all_mzs), feat_mzs.min())
max_mz = max(max(all_mzs), feat_mzs.max())
print(min_mz)
print(max_mz)

# Compute means
mid_rt = np.mean(all_rts)
mid_mz = np.mean(all_mzs)

# Set test ranges around midpoint
config.args.test_rt_min = max(0, mid_rt - 1.5) # ±5 min
config.args.test_rt_max = mid_rt + 1.5
config.args.test_mz_min = max(0, mid_mz - 100) # ±250 m/z
config.args.test_mz_max = mid_mz + 100

print(f"Auto-set test mode ranges: RT {config.args.test_rt_min}-{config.args.test_rt_max}, "rf"m/z {config.args.test_mz_min}-{config.args.test_mz_max}")
print(f"Running in test mode with RT range: {config.args.test_rt_min}-{config.args.test_rt_max}, m/z range: {config.args.test_mz_min}-{config.args.test_mz_max}")

# Filter MS2 scans based on retention time and precursor m/z
filtered_ms2_scans = []
for scan in DIAspectra.ms2scans:
Expand All @@ -146,11 +172,24 @@ def main():
for key, entry in spectrumLibrary.items():
#if (config.args.test_rt_min - rt_tolerance <= entry["iRT"] <= config.args.test_rt_max + rt_tolerance and
# config.args.test_mz_min - mz_tolerance*entry["prec_mz"] <= entry["prec_mz"] <= config.args.test_mz_max + mz_tolerance*entry["prec_mz"]):
# filtered_library[key] = entry
if (config.args.test_mz_min - mz_tolerance*entry["prec_mz"] <= entry["prec_mz"] <= config.args.test_mz_max + mz_tolerance*entry["prec_mz"]):
filtered_library[key] = entry

print(f"Pre-filtered library to {len(filtered_library)} out of {len(spectrumLibrary)} entries for test mode")
spectrumLibrary = filtered_library

# Filtering features file
filtered_features = dino_features[
(dino_features["mz"] >= config.args.test_mz_min - mz_tolerance * dino_features["mz"]) &
(dino_features["mz"] <= config.args.test_mz_max + mz_tolerance * dino_features["mz"]) &
(dino_features["rtApex"] >= config.args.test_rt_min - rt_tolerance) &
(dino_features["rtApex"] <= config.args.test_rt_max + rt_tolerance)
]

print(f"Selected {len(filtered_features)} out of {len(dino_features)} features for test mode")
dino_features = filtered_features

else:
spectra_to_fit = DIAspectra.ms2scans
######################################################
Expand Down
22 changes: 13 additions & 9 deletions src/utils/parse_peptides.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@ def split_frag_name(ion_type: str) -> tuple[str, int, str, str]:
fragment_seq : Uses split_frag_name to determine fragment sequences
convert_frags : Processes fragment information for decoy generation
"""
frag_name,frag_z = ion_type.split("_")
frag_name, frag_z = ion_type.split("_")
if not frag_z:
raise ValueError(f"Missing charge in fragment ion '{ion_type}'")
loss_check = frag_name.split("-")
loss = ""
if len(loss_check)>1:
Expand Down Expand Up @@ -302,10 +304,11 @@ def change_seq(seq: str, rules: str) -> str:
# seq = [re.sub("\(.*\)","",aa) for aa in seq]\

if config.tag:
tags = [re.findall(f"(\({config.tag.name}.*?\))",i) for i in seq]
seq = [re.sub(f"(\({config.tag.name}.*?\))","",i) for i in seq]
tags = [re.findall(rf"(\({config.tag.name}.*?\))",i) for i in seq]
seq = [re.sub(rf"(\({config.tag.name}.*?\))","",i) for i in seq]
else:
tags = [[] for i in seq]
seq = [re.sub(r"\(.*?\)", "", aa) for aa in seq] # if the sequence has a modification on it

#mods = [extract_mod(i) for i in seq]
## assume AA is the first
Expand Down Expand Up @@ -354,8 +357,8 @@ def convert_prec_mz(seq: str, z: int = 1, mass_dict: dict[str, float] = {}) -> f
"""

#Identifies modifications within the sequence
split_seq = re.findall("([A-Z](?:\(.*?\))?)",seq)
mods = [re.findall("\((.*?)\)",i) for i in split_seq]
split_seq = re.findall(r"([A-Z](?:\(.*?\))?)",seq)
mods = [re.findall(r"\((.*?)\)",i) for i in split_seq]

## assume AA is the first. P(+79.97) or K(mTRAQ-0) for example
unmod_seq = [i[0] for i in split_seq]
Expand Down Expand Up @@ -438,8 +441,8 @@ def convert_frags(seq: str,frags: dict[str, list[float]],rules: str = diann_rule
if config.tag:
#tags = [re.findall(f"(\({config.tag.name}.*?\))",i) for i in seq]
#seq = [re.sub(f"(\({config.tag.name}.*?\))","",i) for i in seq]
tags = [[t.strip("()") for t in re.findall(f"(\({config.tag.name}.*?\))",i)] for i in split_seq]
split_seq = [re.sub(f"(\({config.tag.name}.*?\))","",i) for i in split_seq]
tags = [[t.strip("()") for t in re.findall(rf"(\({config.tag.name}.*?\))",i)] for i in split_seq]
split_seq = [re.sub(rf"(\({config.tag.name}.*?\))","",i) for i in split_seq]

else:
tags = [[] for i in seq]
Expand All @@ -451,9 +454,10 @@ def convert_frags(seq: str,frags: dict[str, list[float]],rules: str = diann_rule
unmod_seq = [i[0] for i in split_seq]

if config.tag:
tag_masses = [sum([config.tag.mass_dict[j] for j in i if j in config.tag.mass_dict]) for i in tags]
tag_masses = [sum([config.tag.mass_dict[j] for j in i if j in config.tag.mass_dict]) for i in tags]

else:
tag_masses = [0 for i in mods]
tag_masses = [0 for i in mods]

new_frags = {}

Expand Down
29 changes: 22 additions & 7 deletions tests/utils/test_parse_peptides.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# Import the functions we want to test
from src.utils.parse_peptides import (
change_seq, convert_prec_mz, convert_frags, parse_peptide, extract_mod, split_frag_name
change_seq, convert_prec_mz, convert_frags, extract_mod, parse_peptide, split_frag_name
)

class TestChangeSeq:
Expand Down Expand Up @@ -51,7 +51,7 @@ def test_change_seq_with_modifications(self):
"""Test change_seq with modified peptides"""
# Test with single modification
result = change_seq("PEP(+79.97)TIDE", "diann")
assert result == "LDLTSED" # Modifications should be stripped for AA conversion
assert result == "LDLSVED" # Modifications should be stripped for AA conversion

# Test reverse with modifications
result = change_seq("PEP(+79.97)TIDE", "rev")
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_change_seq_with_tags(self):
assert result == "L(mTRAQ)LDLSVED"

# Test reverse with tags
#Potential issue here
# Potential issue here
result = change_seq("K(mTRAQ)PEPTIDE", "rev")
assert result == "D(mTRAQ)ITPEPKE"

Expand Down Expand Up @@ -130,6 +130,10 @@ def test_convert_prec_mz(self):
#Now includes the mod mass
result = convert_prec_mz("PEPTIDE(mTRAQ-0)", 2, {"mTRAQ-0": 144.102063})
assert abs(result - 400.68725848012497 - 144.102063/2) < 1e-6

class TestConvertFrags:
"""Test cases for converting fragment ion m/z ratios function"""
def test_convert_frags(self):
seq = 'AAAEQAISVR'
frags = {
'b3_1': [214.1186178209, 0.48869178],
Expand Down Expand Up @@ -157,7 +161,17 @@ def test_convert_prec_mz(self):
'y8_1': [829.4526418832899, 0.8503218],
'y9_1': [916.4846702875599, 0.49269193]
}
assert convert_frags(seq, frags, "rev") == expected_new_frags

result = convert_frags(seq, frags, "rev")
assert all(
result[k][i] == pytest.approx(expected_new_frags[k][i], rel=1e-9)
for k in expected_new_frags
for i in (0, 1)
)

# assert convert_frags(seq, frags, "rev") == expected_new_frags



class TestParsePeptide:
"""Test cases for the parse_peptide function"""
Expand Down Expand Up @@ -544,12 +558,13 @@ def test_split_fragment_invalid_format(self):
with pytest.raises(ValueError):
split_frag_name("b5")

with pytest.raises(ValueError):
with pytest.raises(Exception): # changed to exception because it gives a values to unpack error
split_frag_name("y10-H2O")

# Missing charge after underscore
with pytest.raises(ValueError):
with pytest.raises(Exception):
split_frag_name("b5_")


def test_split_fragment_type_consistency(self):
"""Test that return types are consistent"""
Expand Down