Module cstag.to_sequence
Expand source code
from __future__ import annotations
from cstag.split import split
from cstag.utils.validator import validate_cs_tag, validate_long_format
def to_sequence(cs_tag: str) -> str:
"""Reconstruct the reference subsequence in the alignment
Args:
cs_tag (str): cs tag in the **long** format
Returns:
str: The sequence string derived from the cs tag.
Example:
>>> import cstag
>>> cs_tag = "=AC*gt=T-gg=C+tt=A"
>>> cstag.to_sequence(cs_tag)
'ACTTCTTA'
"""
validate_cs_tag(cs_tag)
validate_long_format(cs_tag)
cs_tag = cs_tag.replace("cs:Z:", "")
sequence = []
for cs in split(cs_tag):
if cs.startswith("="):
sequence.append(cs[1:].upper())
elif cs.startswith("+"):
sequence.append(cs[1:].upper())
elif cs.startswith("*"):
sequence.append(cs[-1].upper())
return "".join(sequence)
Functions
def to_sequence(cs_tag: str) ‑> str
-
Reconstruct the reference subsequence in the alignment
Args
cs_tag
:str
- cs tag in the long format
Returns
str
- The sequence string derived from the cs tag.
Example
>>> import cstag >>> cs_tag = "=AC*gt=T-gg=C+tt=A" >>> cstag.to_sequence(cs_tag) 'ACTTCTTA'
Expand source code
def to_sequence(cs_tag: str) -> str: """Reconstruct the reference subsequence in the alignment Args: cs_tag (str): cs tag in the **long** format Returns: str: The sequence string derived from the cs tag. Example: >>> import cstag >>> cs_tag = "=AC*gt=T-gg=C+tt=A" >>> cstag.to_sequence(cs_tag) 'ACTTCTTA' """ validate_cs_tag(cs_tag) validate_long_format(cs_tag) cs_tag = cs_tag.replace("cs:Z:", "") sequence = [] for cs in split(cs_tag): if cs.startswith("="): sequence.append(cs[1:].upper()) elif cs.startswith("+"): sequence.append(cs[1:].upper()) elif cs.startswith("*"): sequence.append(cs[-1].upper()) return "".join(sequence)