Module cstag.revcomp
Expand source code
from __future__ import annotations
import re
map_revcomp = {
"A": "T",
"C": "G",
"G": "C",
"T": "A",
"N": "N",
"a": "t",
"c": "g",
"g": "c",
"t": "a",
"n": "n",
}
def _extract_numbers(strings: str) -> list[str]:
# Using regular expression to find all numbers in the strings: st
numbers = re.findall(r"\d+", strings)
return numbers
def revcomp(cs_tag: str, prefix: bool = False) -> str:
"""Converts a cs tag into its reverse complement.
Args:
cs_tag (str): a cs tag
prefix (bool, optional): Whether to add the prefix 'cs:Z:' to the cs tag. Defaults to False
Return:
str: reverse complement of a cs tag
Example:
>>> import cstag
>>> cs = "=AAAA*ag=CTG"
>>> cstag.revcomp(cs)
'=CAG*tc=TTTT'
"""
pattern = r"(\=[ACGTN]+|:[0-9]+|\*[acgtn][acgtn]|\+[acgtn]+|\-[acgtn]+|\~[acgtn]{2}[0-9]+[acgtn]{2})"
cs_tag_revcomp = []
cs_tag = cs_tag.replace("cs:Z:", "")
for cs in re.split(pattern, cs_tag)[::-1]:
if cs == "":
continue
elif cs[0] == ":":
cs_tag_revcomp.append(cs)
elif cs[0] == "*":
cs_tag_revcomp.append(f"*{map_revcomp[cs[1]]}{map_revcomp[cs[2]]}")
elif cs[0] == "~":
numbers = _extract_numbers(cs)
cs_tag_revcomp.append(
f"~{map_revcomp[cs[-1]]}{map_revcomp[cs[-2]]}{numbers[0]}{map_revcomp[cs[2]]}{map_revcomp[cs[1]]}"
)
else:
op = cs[0]
cs_revcomp = "".join([map_revcomp[c] for c in cs[1:]])[::-1]
cs_tag_revcomp.append(f"{op}{cs_revcomp}")
cs_tag_revcomp = "".join(cs_tag_revcomp)
if prefix is True:
return "cs:Z:" + cs_tag_revcomp
else:
return cs_tag_revcomp
Functions
def revcomp(cs_tag: str, prefix: bool = False) ‑> str
-
Converts a cs tag into its reverse complement.
Args
cs_tag
:str
- a cs tag
prefix
:bool
, optional- Whether to add the prefix 'cs:Z:' to the cs tag. Defaults to False
Return
str: reverse complement of a cs tag
Example
>>> import cstag >>> cs = "=AAAA*ag=CTG" >>> cstag.revcomp(cs) '=CAG*tc=TTTT'
Expand source code
def revcomp(cs_tag: str, prefix: bool = False) -> str: """Converts a cs tag into its reverse complement. Args: cs_tag (str): a cs tag prefix (bool, optional): Whether to add the prefix 'cs:Z:' to the cs tag. Defaults to False Return: str: reverse complement of a cs tag Example: >>> import cstag >>> cs = "=AAAA*ag=CTG" >>> cstag.revcomp(cs) '=CAG*tc=TTTT' """ pattern = r"(\=[ACGTN]+|:[0-9]+|\*[acgtn][acgtn]|\+[acgtn]+|\-[acgtn]+|\~[acgtn]{2}[0-9]+[acgtn]{2})" cs_tag_revcomp = [] cs_tag = cs_tag.replace("cs:Z:", "") for cs in re.split(pattern, cs_tag)[::-1]: if cs == "": continue elif cs[0] == ":": cs_tag_revcomp.append(cs) elif cs[0] == "*": cs_tag_revcomp.append(f"*{map_revcomp[cs[1]]}{map_revcomp[cs[2]]}") elif cs[0] == "~": numbers = _extract_numbers(cs) cs_tag_revcomp.append( f"~{map_revcomp[cs[-1]]}{map_revcomp[cs[-2]]}{numbers[0]}{map_revcomp[cs[2]]}{map_revcomp[cs[1]]}" ) else: op = cs[0] cs_revcomp = "".join([map_revcomp[c] for c in cs[1:]])[::-1] cs_tag_revcomp.append(f"{op}{cs_revcomp}") cs_tag_revcomp = "".join(cs_tag_revcomp) if prefix is True: return "cs:Z:" + cs_tag_revcomp else: return cs_tag_revcomp