Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@ $ ./mimic <path_to_fasta_file>
|replaceI | I | Count I as L for homolgy check |
|empiric | e | Infer amino acid frequency from fasta for mutations |
|no-digest | N | Do not digest the sequence before scrambling |
|retain-accession | A | Retains original accession (after prefix) in fasta header lines if set |


58 changes: 50 additions & 8 deletions src/Peptides.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ std::vector<size_t> getIdxs(const string& sequence){
return idxs;
}

Peptides::Peptides(unsigned int minLength, set<string> usedPeptides, AminoAcidDist background, std::mt19937 rGen, unsigned int maxTries, bool replaceI) :
Peptides::Peptides(unsigned int minLength, set<string> usedPeptides, AminoAcidDist background, std::mt19937 rGen, unsigned int maxTries, bool replaceI, unsigned int retainTermini) :
maxTries_{maxTries}, usedPeptides_{std::move(usedPeptides)}, minLen_(minLength), replaceI_(replaceI), seed_(1u),
multFactor_(1u), sharedPeptideRatio_(0.0),
proteinNamePrefix_("mimic|Random_"), prependOriginal_(false),
background_{std::move(background)}, inferAAFrequency_{false}, rGen_{rGen} {}
background_{std::move(background)}, inferAAFrequency_{false}, rGen_{rGen}, retainTermini_{ retainTermini } {}

void Peptides::printAll(const vector<string>& connectorStrings,
const std::string& suffix, std::ostream& os) {
Expand Down Expand Up @@ -151,14 +151,32 @@ void Peptides::readFasta(string& path, bool write, std::ostream& os) {
if (write) os << line << std::endl;

if (line[0] == '>') { // id line
string accession;
if (retainAccession_) {
// Extract accession from header, e.g. ">sp|P84243|H33_HUMAN ..."
size_t pipe1 = line.find('|');
if (pipe1 != string::npos) {
size_t pipe2 = line.find('|', pipe1 + 1);
if (pipe2 != string::npos) {
accession = line.substr(pipe1 + 1, pipe2 - pipe1 - 1);
}
}
accessions_.push_back(accession);
}

if (spillOver) {
assert(!seq.empty());
// add peptides and KR-stretches to connectorStrings_
cleaveProtein(seq, pepNo);
seq = "";
}
ostringstream newProteinName("");
newProteinName << ">" << proteinNamePrefix_ << ++proteinNo;
if (retainAccession_ && !accession.empty()) {
newProteinName << ">" << proteinNamePrefix_ << accession << "_" << (++proteinNo);
}
else {
newProteinName << ">" << proteinNamePrefix_ << (++proteinNo);
}
connectorStrings_.push_back(newProteinName.str());
assert(connectorStrings_.size() == pepNo+1);
} else { // amino acid sequence
Expand All @@ -175,9 +193,17 @@ void Peptides::readFasta(string& path, bool write, std::ostream& os) {
}

void Peptides::shuffle(const string& in,string& out) {
out=in;
std::shuffle(out.begin(), out.end(), rGen_);
if (retainTermini_ == 0 || in.length() <= 2 * retainTermini_) {
out = in;
std::shuffle(out.begin(), out.end(), rGen_);
} else {
out = in;
auto start = out.begin() + retainTermini_;
auto end = out.end() - retainTermini_;
std::shuffle(start, end, rGen_);
}
}

void Peptides::mutate(const string& in, string& out) {
out=in;
std::vector<size_t> okIds;
Expand Down Expand Up @@ -270,7 +296,8 @@ int Peptides::run() {
readFasta(inFile_, prependOriginal_, outStream);
background_.print(cerr);
for (unsigned int m = 0; m < multFactor_; ++m) {
Peptides entrapmentDB(minLen_, usedPeptides_, background_, rGen_, maxTries_, replaceI_);
Peptides entrapmentDB(minLen_, usedPeptides_, background_, rGen_, maxTries_, replaceI_, retainTermini_);


cerr << "Shuffling round: " << (m+1) << endl;
entrapmentDB.shuffle(pep2ixs_, logger);
Expand Down Expand Up @@ -333,6 +360,15 @@ bool Peptides::parseOptions(int argc, char **argv){
"Do not digest the sequence before scrambling",
"",
TRUE_IF_SET);
cmd.defineOption("A",
"retain-accession",
"Retain the original accession in the mimic output. \n Not set >mimic|Random_1|shuffle_1 \n If set >mimic|Random_P84243_1|shuffle_1",
"",
TRUE_IF_SET);
cmd.defineOption("T",
"retain-termini",
"Number of N- and C-terminal residues to retain during scrambling (NoDigest only)",
"int");

cmd.parseArgs(argc, argv);

Expand Down Expand Up @@ -363,6 +399,12 @@ bool Peptides::parseOptions(int argc, char **argv){
}
if (cmd.optionSet("N")) {
noDigest_ = true;
}
if (cmd.optionSet("A")) {
retainAccession_ = true;
}
if (cmd.optionSet("T")) {
retainTermini_ = cmd.getInt("T", 0, 100);
}

if (!cmd.arguments.empty()) {
Expand All @@ -374,8 +416,8 @@ bool Peptides::parseOptions(int argc, char **argv){
return true;
}

Peptides::Peptides(unsigned int minLen, set<string> usedPeptides, std::mt19937 rGen, unsigned int maxTries, bool replaceI):
Peptides(minLen, std::move(usedPeptides), AminoAcidDist{replaceI}, rGen, maxTries, replaceI) {
Peptides::Peptides(unsigned int minLen, set<string> usedPeptides, std::mt19937 rGen, unsigned int maxTries, bool replaceI, unsigned int retainTermini):
Peptides(minLen, std::move(usedPeptides), AminoAcidDist{replaceI}, rGen, maxTries, replaceI, retainTermini) {

}

Expand Down
10 changes: 7 additions & 3 deletions src/Peptides.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ using namespace std;
class Peptides
{
public:
Peptides(unsigned int minLen, set<string> usedPeptides, AminoAcidDist background, std::mt19937 rGen, unsigned int maxTries = 1000, bool replaceI=false);
Peptides(unsigned int minLen, set<string> usedPeptides, std::mt19937 rGen, unsigned int maxTries = 1000, bool replaceI=false);
Peptides();
Peptides(unsigned int minLen, set<string> usedPeptides, AminoAcidDist background, std::mt19937 rGen, unsigned int maxTries = 1000, bool replaceI = false, unsigned int retainTermini = 0);
Peptides(unsigned int minLen, set<string> usedPeptides, std::mt19937 rGen, unsigned int maxTries = 1000, bool replaceI = false, unsigned int retainTermini = 0);
Peptides();

bool parseOptions(int argc, char **argv);
int run();
Expand All @@ -53,6 +53,7 @@ class Peptides

map<string,set<unsigned int> > pep2ixs_;
vector<string> connectorStrings_;
vector<string> accessions_;
set<string> usedPeptides_;

// input arguments
Expand All @@ -73,6 +74,9 @@ class Peptides
AbsAminoAcidDist absBackground_;
bool isVerbose_ = false;
bool noDigest_ = false;
bool retainAccession_ = false;
unsigned int retainTermini_ = 0;

};

#endif /*PEPTIDES_H_*/