diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 1f23b782..9d28e777 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -39,7 +39,6 @@ jobs:
         hash -r
         conda config --set always_yes yes --set changeps1 no
         conda config --add channels conda-forge
-        conda config --add channels defaults                  
         conda config --add channels bioconda                  
         conda config --get                                    
 
@@ -48,8 +47,11 @@ jobs:
         conda info -a
 
         # Create test environment and install deps
-        conda create -q -n test-environment python=${{ matrix.python-version }} setuptools pip cython numpy pandas nose samtools pysam scipy pyyaml bioframe
+        conda create -q -n test-environment -c anaconda python=${{ matrix.python-version }} setuptools pip numpy scipy pandas nose
         source activate test-environment
+        conda install -c conda-forge cython pyyaml
+        conda install -c bioconda samtools
+        pip install pysam bioframe # workaround: incompatible with python3.10 if installed by conda
         pip install click
         python setup.py build_ext -i
         
diff --git a/README.md b/README.md
index e5d5a276..f9746f9e 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 [![Documentation Status](https://readthedocs.org/projects/pairtools/badge/?version=latest)](http://pairtools.readthedocs.org/en/latest/)
 [![Build Status](https://travis-ci.org/mirnylab/pairtools.svg?branch=master)](https://travis-ci.org/mirnylab/pairtools)
-[![Join the chat at https://gitter.im/mirnylab/distiller](https://badges.gitter.im/mirnylab/distiller.svg)](https://gitter.im/mirnylab/distiller?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![Join the chat at Open2C Slack](https://join.slack.com/t/open2c/shared_invite/zt-1buzt2uyt-FLGp8~~MvbTCig5sQZTSNQ)](https://join.slack.com/t/open2c/shared_invite/zt-1buzt2uyt-FLGp8~~MvbTCig5sQZTSNQ)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1490831.svg)](https://doi.org/10.5281/zenodo.1490831)
 
 ## Process Hi-C pairs with pairtools
@@ -19,9 +19,12 @@ operations:
 - generate extensive statistics of Hi-C datasets
 - select Hi-C pairs given flexibly defined criteria
 - restore .sam alignments from Hi-C pairs
+- annotate restriction digestion sites
+- get the mutated positions in Hi-C pairs
 
 To get started:
-- Take a look at a [quick example](https://github.com/open2c/pairtools#quick-example)
+- Visit [pairtools tutorials](https://pairtools.readthedocs.io/en/latest/examples/pairtools_walkthrough.html),
+- Take a look at a [quick example](https://github.com/open2c/pairtools#quick-example),
 - Check out the detailed [documentation](http://pairtools.readthedocs.io).
 
 ## Data formats
@@ -32,18 +35,23 @@ format defined by the [4D Nucleome Consortium](https://www.4dnucleome.org/). All
 pairtools properly manage file headers and keep track of the data
 processing history.
 
-Additionally, `pairtools` define the .pairsam format, an extension of .pairs that includes the SAM alignments 
+Additionally, `pairtools` define the [.pairsam format](https://pairtools.readthedocs.io/en/latest/formats.html#pairsam), an extension of .pairs that includes the SAM alignments 
 of a sequenced Hi-C molecule. .pairsam complies with the .pairs format, and can be processed by any tool that
 operates on .pairs files.
 
+`pairtools` produces a set of additional extra columns, which describe properties of alignments, phase, mutations, restriction and complex walks.
+The full list of possible extra columns is provided in the [`pairtools` format specification](https://pairtools.readthedocs.io/en/latest/formats.html#extra-columns). 
+
 ## Installation
 
 Requirements:
 
 - Python 3.x
-- Python packages `cython`, `numpy` and `click`.
+- Python packages `cython`, `pysam`, `bioframe`, `pyyaml`, `numpy`, `scipy`, `pandas` and `click`.
 - Command-line utilities `sort` (the Unix version), `bgzip` (shipped with `samtools`)  and `samtools`. If available, `pairtools` can compress outputs with `pbgzip` and `lz4`.
 
+For the full list of recommended versions, see [requirements in the the GitHub repo](https://github.com/open2c/pairtools/blob/detect_mutations/requirements.txt). 
+
 We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager.
 
 With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel.
@@ -91,21 +99,27 @@ Hi-C data analysis workflow, based on `pairtools` and [nextflow](https://www.nex
 
 ## Tools
 
-- `parse`: read .sam files produced by bwa and form Hi-C pairs
+- `parse`: read .sam/.bam files produced by bwa and form Hi-C pairs
     - form Hi-C pairs by reporting the outer-most mapped positions and the strand
     on the either side of each molecule;
     - report unmapped/multimapped (ambiguous alignments)/chimeric alignments as
     chromosome "!", position 0, strand "-";
-    - identify and rescue chrimeric alignments produced by singly-ligated Hi-C 
-    molecules with a sequenced ligation junction on one of the sides;
-    - annotate chimeric alignments by restriction fragments and report true junctions and hops (One-Read-Based Interactions Annotation, ORBITA);
     - perform upper-triangular flipping of the sides of Hi-C molecules 
     such that the first side has a lower sorting index than the second side;
     - form hybrid pairsam output, where each line contains all available data 
     for one Hi-C molecule (outer-most mapped positions on the either side, 
     read ID, pair type, and .sam entries for each alignment);
+    - report .sam tags or mutations of the alignments;
     - print the .sam header as #-comment lines at the start of the file.
 
+- `parse2`: read .sam/.bam files with long paired-and or single-end reads and form Hi-C pairs from complex walks 
+    - identify and rescue chrimeric alignments produced by singly-ligated Hi-C 
+    molecules with a sequenced ligation junction on one of the sides;
+    - annotate chimeric alignments by restriction fragments and report true junctions and hops (One-Read-Based Interactions Annotation, ORBITA);
+    - perform intra-molecule deduplication of paired-end data when one side reads through the DNA on the other side of the read;
+    - report index of the pair in the complex walk;
+    - make combinatorial expansion of pairs produced from the same walk; 
+
 - `sort`: sort pairs files (the lexicographic order for chromosomes, 
     the numeric order for the positions, the lexicographic order for pair types).
 
@@ -125,7 +139,10 @@ Hi-C data analysis workflow, based on `pairtools` and [nextflow](https://www.nex
 - `dedup`: remove PCR duplicates from a sorted triu-flipped .pairs file
     - remove PCR duplicates by finding pairs of entries with both sides mapped
     to similar genomic locations (+/- N bp);
-    - optionally output the PCR duplicate entries into a separate file.
+    - optionally output the PCR duplicate entries into a separate file;
+    - detect optical duplicates from the original Illumina read ids;
+    - apply filtering by various properties of pairs (MAPQ; orientation; distance) together with deduplication; 
+    - output yaml or convenient tsv deduplication stats into text file.
     - NOTE: in order to remove all PCR duplicates, the input must contain \*all\* 
       mapped read pairs from a single experimental replicate;
 
@@ -136,10 +153,20 @@ Hi-C data analysis workflow, based on `pairtools` and [nextflow](https://www.nex
 
 - `split`: split a .pairsam file into .pairs and .sam.
 
+- `flip`: flip pairs to get an upper-triangular matrix
+
+- `header`: manipulate the .pairs/.pairsam header
+    - generate new header for headerless .pairs file
+    - transfer header from one .pairs file to another
+    - set column names for the .pairs file
+    - validate that the header corresponds to the information stored in .pairs file
+
 - `stats`: calculate various statistics of .pairs files
 
 - `restrict`: identify the span of the restriction fragment forming a Hi-C junction
 
+- `phase`: phase pairs mapped to a diploid genome 
+
 ## Contributing
 
 [Pull requests](https://akrabat.com/the-beginners-guide-to-contributing-to-a-github-project/) are welcome.
diff --git a/doc/formats.rst b/doc/formats.rst
index 8ca364d4..a778c994 100644
--- a/doc/formats.rst
+++ b/doc/formats.rst
@@ -148,3 +148,37 @@ Finally, sam1 and sam2 can store multiple .sam alignments, separated by a string
 .. |check| unicode:: U+2714 .. check
 .. |cross| unicode:: U+274C .. cross
 
+Extra columns
+----------------
+
+`pairtools` can operate on `.pairs/.pairsam` with extra columns.
+Extra columns are specified in the order defined by the order their addition by various tools.
+Column names can be checked in the header of `.pairs/.pairsam` file.
+We provide `pairtools header` utilities for manipulating and verifying compatibility of headers and their columns.
+
+The list of additional columns used throughout `pairtools` modules:
+
+=================================== =================== ====================== ================================================== =================
+extra column                        generating module   format                 how to add it                                       description
+=================================== =================== ====================== ================================================== =================
+mapq1, mapq2                         `parse/parse2`      number from 0 to 255   `pairtools parse --add-columns mapq`              `Mapping quality <https://sequencing.qcfail.com/articles/mapq-values-are-really-useful-but-their-implementation-is-a-mess>`_, as reported in .sam/.bam, $-10 log_{10}(P_{error})$
+pos51, pos52                         `parse/parse2`      genomic coordinate     `pairtools parse --add-columns pos5`              5' position of alignment (closer to read start)
+pos31, pos32                         `parse/parse2`      genomic coordinate     `pairtools parse --add-columns pos3`              3' position of alignment (further from read start)
+cigar1, cigar2                       `parse/parse2`      string                 `pairtools parse --add-columns cigar`             `CIGAR, or Compact Idiosyncratic Gapped Alignment Report <https://en.wikipedia.org/wiki/Sequence_alignment#CIGAR_Format>`_ of alignment, as reported in .sam/.bam
+read_len1, read_len2                 `parse/parse2`      number                 `pairtools parse --add-columns read_len`          read length
+matched_bp1, matched_bp2             `parse/parse2`      number                 `pairtools parse --add-columns matched_bp`        number of matched alignment basepairs to the reference
+algn_ref_span1, algn_ref_span2       `parse/parse2`      number                 `pairtools parse --add-columns algn_ref_span`     basepairs of reference covered by alignment
+algn_read_span1, algn_read_span2     `parse/parse2`      number                 `pairtools parse --add-columns algn_read_span`    basepairs of read covered by alignment
+dist_to_51, dist_to_52               `parse/parse2`      number                 `pairtools parse --add-columns dist_to_5`         distance to 5'-end of read
+dist_to_31, dist_to_32               `parse/parse2`      number                 `pairtools parse --add-columns dist_to_3`         distance to 3'-end of read
+seq1, seq2                           `parse/parse2`      string                 `pairtools parse --add-columns seq`               sequence of alignment
+mismatches1, mismatches2             `parse/parse2`      string                 `pairtools parse --add-columns mismatches`        comma-separated list of mismatches relative to the reference, "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}"
+XB1/2,AS1/2,XS1/2 or any sam tag     `parse/parse2`                             `pairtools parse --add-columns XA,XB,NM`          format depends on `tag specification <https://samtools.github.io/hts-specs/SAMv1.pdf>`_
+walk_pair_type                       `parse/parse2`      string                 `pairtools parse2 --add-pair-index`               Type of the pair relative to R1 and R2 reads of paired-end sequencing, see `pasring docs <https://pairtools.readthedocs.io/en/latest/parsing.html#rescuing-complex-walks>`_
+walk_pair_index                      `parse/parse2`      number                 `pairtools parse2 --add-pair-index`               Order of the pair in the complex walk, starting from 5'-end of left read, see `pasring docs <https://pairtools.readthedocs.io/en/latest/parsing.html#rescuing-complex-walks>`_
+phase                                `phase`             0, 1 or "."            `pairtools phase`                                 Phase of alignment (haplotype 1, 2, on unphased), see `phasing walkthrough <https://pairtools.readthedocs.io/en/latest/examples/pairtools_phase_walkthrough.html>`_
+rfrag1, rfrag2                       `restrict`          number                 `pairtools restrict`                              Unique index of the restriction fragment after annotating pairs positions, see `restriction walkthrough <https://pairtools.readthedocs.io/en/latest/examples/pairtools_restrict_walkthrough.html>`_
+rfrag_start1, rfrag_start2           `restrict`          number                 `pairtools restrict`                              Coordinate of the start of restriction fragment
+rfrag_end1, rfrag_end2               `restrict`          number                 `pairtools restrict`                              Coordinate of the end of restriction fragment
+=================================== =================== ====================== ================================================== =================
+
diff --git a/doc/parsing.rst b/doc/parsing.rst
index b38f04a9..d7d63935 100644
--- a/doc/parsing.rst
+++ b/doc/parsing.rst
@@ -265,7 +265,7 @@ position:
 Sometimes it is important to restore the sequence of ligation events (e.g., for MC-3C data). For that, you can add
 special columns ``walk_pair_index`` and ``walk_pair_type`` by setting ``--add-pair-index`` option of ``parse2``, that will keep the order and type of pair in the whole walk in the output .pairs file.
 
-- ``walk_pair_index`` contains information on the order of the pair in the recovered walk, starting from 5'-end of left read
+- ``walk_pair_index`` contains information on the order of the pair in the complex walk, starting from 5'-end of left read
 - ``walk_pair_type`` describes the type of the pair relative to R1 and R2 reads of paired-end sequencing:
 
   - "R1-2" - unconfirmed pair, right and left alignments in the pair originate from different reads (left or right). This might be indirect ligation (mediated by other DNA fragments).
diff --git a/pairtools/cli/parse.py b/pairtools/cli/parse.py
index 567e5e2f..fc34cc01 100644
--- a/pairtools/cli/parse.py
+++ b/pairtools/cli/parse.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+
 import click
 import sys
 
@@ -13,20 +14,6 @@
 
 UTIL_NAME = "pairtools_parse"
 
-EXTRA_COLUMNS = [
-    "mapq",
-    "pos5",
-    "pos3",
-    "cigar",
-    "read_len",
-    "matched_bp",
-    "algn_ref_span",
-    "algn_read_span",
-    "dist_to_5",
-    "dist_to_3",
-    "seq",
-]
-
 
 @cli.command()
 @click.argument("sam_path", type=str, required=False)
@@ -96,7 +83,7 @@
     help="Report extra columns describing alignments "
     "Possible values (can take multiple values as a comma-separated "
     "list): a SAM tag (any pair of uppercase letters) or {}.".format(
-        ", ".join(EXTRA_COLUMNS)
+        ", ".join(pairsam_format.EXTRA_COLUMNS)
     ),
 )
 @click.option(
@@ -228,7 +215,7 @@ def parse_py(
     add_columns = kwargs.get("add_columns", [])
     add_columns = [col for col in add_columns.split(",") if col]
     for col in add_columns:
-        if not ((col in EXTRA_COLUMNS) or (len(col) == 2 and col.isupper())):
+        if not ((col in pairsam_format.EXTRA_COLUMNS) or (len(col) == 2 and col.isupper())):
             raise Exception("{} is not a valid extra column".format(col))
 
     columns = pairsam_format.COLUMNS + (
diff --git a/pairtools/cli/parse2.py b/pairtools/cli/parse2.py
index 37fb6cdd..725a5bec 100644
--- a/pairtools/cli/parse2.py
+++ b/pairtools/cli/parse2.py
@@ -13,21 +13,6 @@
 
 UTIL_NAME = "pairtools_parse2"
 
-EXTRA_COLUMNS = [
-    "mapq",
-    "pos5",
-    "pos3",
-    "cigar",
-    "read_len",
-    "matched_bp",
-    "algn_ref_span",
-    "algn_read_span",
-    "dist_to_5",
-    "dist_to_3",
-    "seq",
-]
-
-
 @cli.command()
 @click.argument("sam_path", type=str, required=False)
 # Parsing options:
@@ -145,9 +130,11 @@
 @click.option(
     "--flip/--no-flip",
     is_flag=True,
-    default=True,
-    help="If specified, do not flip pairs in genomic order and instead preserve "
-    "the order in which they were sequenced.",
+    default=False,
+    help="If specified, flip pairs in genomic order and instead preserve "
+    "the order in which they were sequenced. Note that no flip is recommended for analysis of walks because it will "
+    "override the order of alignments in pairs. Flip is required for appropriate deduplication of sorted pairs. "
+    "Flip is not required for cooler cload, which runs flipping internally. ",
 )
 @click.option(
     "--drop-readid/--keep-readid",
@@ -192,7 +179,7 @@
     help="Report extra columns describing alignments "
     "Possible values (can take multiple values as a comma-separated "
     "list): a SAM tag (any pair of uppercase letters) or {}.".format(
-        ", ".join(EXTRA_COLUMNS)
+        ", ".join(pairsam_format.EXTRA_COLUMNS)
     ),
 )
 @click.option(
@@ -281,7 +268,7 @@ def parse2_py(
     add_columns = kwargs.get("add_columns", [])
     add_columns = [col for col in add_columns.split(",") if col]
     for col in add_columns:
-        if not ((col in EXTRA_COLUMNS) or (len(col) == 2 and col.isupper())):
+        if not ((col in pairsam_format.EXTRA_COLUMNS) or (len(col) == 2 and col.isupper())):
             raise Exception("{} is not a valid extra column".format(col))
 
     columns = pairsam_format.COLUMNS + (
diff --git a/pairtools/lib/pairsam_format.py b/pairtools/lib/pairsam_format.py
index dfbd95ec..96c13f01 100644
--- a/pairtools/lib/pairsam_format.py
+++ b/pairtools/lib/pairsam_format.py
@@ -62,3 +62,18 @@
 UNMAPPED_STRAND = "-"
 
 UNANNOTATED_RFRAG = -1
+
+EXTRA_COLUMNS = [
+    "mapq",
+    "pos5",
+    "pos3",
+    "cigar",
+    "read_len",
+    "matched_bp",
+    "algn_ref_span",
+    "algn_read_span",
+    "dist_to_5",
+    "dist_to_3",
+    "seq",
+    "mismatches" # Format: "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}"
+]
\ No newline at end of file
diff --git a/pairtools/lib/parse.py b/pairtools/lib/parse.py
index ce0a1a27..55f5acbb 100644
--- a/pairtools/lib/parse.py
+++ b/pairtools/lib/parse.py
@@ -35,7 +35,7 @@
 
 """
 from . import pairsam_format
-
+from .parse_pysam import get_mismatches_c
 
 def streaming_classify(
     instream, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs
@@ -125,6 +125,7 @@ def streaming_classify(
                     walks_policy=kwargs["walks_policy"],
                     sam_tags=sam_tags,
                     store_seq=store_seq,
+                    report_mismatches=True if 'mismatches' in add_columns else False,
                 )
             else:  # parse2 parser:
                 pairstream, all_algns1, all_algns2 = parse2_read(
@@ -141,6 +142,7 @@ def streaming_classify(
                     store_seq=store_seq,
                     expand=kwargs["expand"],
                     max_expansion_depth=kwargs["max_expansion_depth"],
+                    report_mismatches=True if 'mismatches' in add_columns else False,
                 )
 
             ### Write:
@@ -240,18 +242,26 @@ def empty_alignment():
         "clip5_ref": 0,
         "read_len": 0,
         "type": "N",
+        "mismatches": ""
     }
 
 
 def parse_pysam_entry(
-    sam, min_mapq, sam_tags=None, store_seq=False, report_3_alignment_end=False
+    sam,
+    min_mapq,
+    sam_tags=None,
+    store_seq=False,
+    report_3_alignment_end=False,
+    report_mismatches=False
 ):
     """Parse alignments from pysam AlignedSegment entry
     :param sam: input pysam AlignedSegment entry
     :param min_mapq: minimal MAPQ to consider as a proper alignment
     :param sam_tags: list of sam tags to store
     :param store_seq: if True, the sequence will be parsed and stored in the output
-    :param report_3_alignment_end: if True, 3'-end of alignment will be reported as position (will be deprecated)
+    :param report_3_alignment_end: if True, 3'-end of alignment will be
+                                reported as position (will be deprecated)
+    :param report_mismatches: if True, mismatches will be parsed from MD field
     :return: parsed aligned entry (dictionary)
     """
 
@@ -282,11 +292,23 @@ def parse_pysam_entry(
                 # Note that pysam output is zero-based, thus add +1:
                 pos3 = sam.reference_start + 1
 
+            # Get number of matches:
+            if not sam.has_tag("MD") or not report_mismatches:
+                mismatches = ""
+            else:
+                seq = sam.query_sequence.upper()
+                quals = sam.query_qualities
+                aligned_pairs = sam.get_aligned_pairs(with_seq=True, matches_only=True)
+                mismatches = get_mismatches_c(seq, quals, aligned_pairs)
+                mismatches = ",".join([f"{original}:{mutated}:{phred}:{ref}:{read}" for original, mutated, phred, ref, read in mismatches])
+                #n_matches = len(aligned_pairs)
+
         else:
             chrom = pairsam_format.UNMAPPED_CHROM
             strand = pairsam_format.UNMAPPED_STRAND
             pos5 = pairsam_format.UNMAPPED_POS
             pos3 = pairsam_format.UNMAPPED_POS
+            mismatches = ""
     else:
         chrom = pairsam_format.UNMAPPED_CHROM
         strand = pairsam_format.UNMAPPED_STRAND
@@ -295,6 +317,7 @@ def parse_pysam_entry(
 
         dist_to_5 = 0
         dist_to_3 = 0
+        mismatches = ""
 
     algn = {
         "chrom": chrom,
@@ -308,6 +331,7 @@ def parse_pysam_entry(
         "dist_to_5": dist_to_5,
         "dist_to_3": dist_to_3,
         "type": ("N" if not is_mapped else ("M" if not is_unique else "U")),
+        "mismatches": mismatches
     }
 
     algn.update(cigar)
@@ -393,6 +417,7 @@ def parse_read(
     walks_policy,
     sam_tags,
     store_seq,
+    report_mismatches=False,
 ):
     """
     Parse sam entries corresponding to a single read (or Hi-C molecule)
@@ -426,8 +451,8 @@ def parse_read(
         return iter([(algns1[0], algns2[0], pair_index)]), algns1, algns2
 
     # Generate a sorted, gap-filled list of all alignments
-    algns1 = [parse_pysam_entry(sam, min_mapq, sam_tags, store_seq) for sam in sams1]
-    algns2 = [parse_pysam_entry(sam, min_mapq, sam_tags, store_seq) for sam in sams2]
+    algns1 = [parse_pysam_entry(sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches) for sam in sams1]
+    algns2 = [parse_pysam_entry(sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches) for sam in sams2]
 
     if len(algns1) > 0:
         algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"])
@@ -545,6 +570,7 @@ def parse2_read(
     sam_tags=[],
     dedup_max_mismatch=3,
     store_seq=False,
+    report_mismatches=False,
     expand=False,
     max_expansion_depth=None,
 ):
@@ -567,7 +593,7 @@ def parse2_read(
     if single_end:
         # Generate a sorted, gap-filled list of all alignments
         algns1 = [
-            parse_pysam_entry(sam, min_mapq, sam_tags, store_seq)
+            parse_pysam_entry(sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches)
             for sam in sams2  # note sams2, that's how these reads are typically parsed
         ]
         algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"])
@@ -617,10 +643,10 @@ def parse2_read(
 
         # Generate a sorted, gap-filled list of all alignments
         algns1 = [
-            parse_pysam_entry(sam, min_mapq, sam_tags, store_seq) for sam in sams1
+            parse_pysam_entry(sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches) for sam in sams1
         ]
         algns2 = [
-            parse_pysam_entry(sam, min_mapq, sam_tags, store_seq) for sam in sams2
+            parse_pysam_entry(sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches) for sam in sams2
         ]
 
         # Sort alignments by the distance to the 5'-end:
diff --git a/pairtools/lib/parse_pysam.pyx b/pairtools/lib/parse_pysam.pyx
index a65d3ba6..efdc6dfe 100644
--- a/pairtools/lib/parse_pysam.pyx
+++ b/pairtools/lib/parse_pysam.pyx
@@ -91,5 +91,35 @@ cdef class AlignedSegmentPairtoolized(AlignedSegment):
                 "algn_ref_span": algn_ref_span,
                 "algn_read_span": algn_read_span,
                 "read_len": read_len,
-                "matched_bp": matched_bp,
+                "matched_bp": matched_bp
             }
+
+
+from cpython cimport array
+import cython
+cimport cython
+
+cpdef list get_mismatches_c(str seq, array.array quals, list aligned_pairs):
+    '''
+    This function takes a SAM alignment and, for every mismatch between the read and reference sequences,
+    returns a tuple (the reference bp, the read bp, PHRED quality of the bp, reference position, read position).
+    
+    Reference: https://github.com/gerlichlab/scshic_pipeline/blob/master/bin/seq_mismatches.pyx 
+    '''
+
+    cdef cython.int read_pos, ref_pos
+    cdef str orig_bp, orig_bp_upper
+    cdef list mismatches = []
+
+    for read_pos, ref_pos, orig_bp in aligned_pairs:
+        orig_bp_upper = orig_bp.upper()
+        if (seq[read_pos] != orig_bp_upper):
+            mismatches.append(
+                (orig_bp_upper,
+                 seq[read_pos],
+                 quals[read_pos],
+                 ref_pos,
+                 read_pos)
+            )
+
+    return mismatches
\ No newline at end of file
diff --git a/tests/test_parse2.py b/tests/test_parse2.py
index cef2c912..37075ae1 100644
--- a/tests/test_parse2.py
+++ b/tests/test_parse2.py
@@ -22,6 +22,7 @@ def test_mock_pysam_parse2_read():
                 "-c",
                 mock_chroms_path,
                 "--add-pair-index",
+                "--flip",
                 "--report-position",
                 "junction",
                 "--report-orientation",
@@ -81,6 +82,7 @@ def test_mock_pysam_parse2_pair():
                 "-c",
                 mock_chroms_path,
                 "--add-pair-index",
+                "--flip",
                 "--report-position",
                 "outer",
                 "--report-orientation",