Module cstag.to_vcf

Functions

def add_vcf_fields(variant_annotations: list[Vcf], chrom: str, reference_depth: dict[tuple[str, int], int]) ‑> list[Vcf]

Add Chrom and VCF info (AD, RD, DP, and VAF) to immutable Vcf dataclass

def call_reference_depth(variant_annotations, cs_tags_list, positions_list) ‑> dict[tuple[str, int], int]
def chrom_sort_key(chrom: str) ‑> int

Convert a chromosome string to an integer for sorting.

def find_ref_for_deletion(cs_tag_split: list[str], idx: int) ‑> str
def find_ref_for_insertion(cs_tag_split: list[str], idx: int) ‑> str | None
def format_cs_tags(cs_tags: list[str], chroms: list[str] | list[int], positions: list[int]) ‑> list[CsInfo]

Format and filter cs_tags, and create a list of CsInfo objects.

This function takes lists of cs_tags, chromosomes, and positions. It filters out any cs_tags containing a tilde ("~") and creates a list of CsInfo objects.

Args

cs_tags : list[str]
List of cs_tags as strings.
chroms : list[str] | list[int]
List of chromosomes as strings or integers.
positions : list[int]
List of starting positions as integers.

Returns

list[CsInfo]
A list of CsInfo objects, each containing information about

a cs_tag, its chromosome, and its start and end positions.

def get_pos_end(cs_tag: str, pos: int) ‑> int

Get 1-index end positions

def get_variant_annotations(cs_tag_split: list[str], position: int) ‑> list[Vcf]
def group_by_chrom(cs_tags_formatted: list[tuple]) ‑> dict[str, tuple]

Group cs tags by chromosomes

def group_by_overlapping_intervals(cs_tags_grouped: CsInfo) ‑> list[CsInfo]
def process_cs_tag(cs_tag: str, chrom: str | int, pos: int) ‑> str
def process_cs_tags(cs_tags: list[str], chroms: list[str], positions: list[int]) ‑> str
def remove_spaces_around_newlines(text: str) ‑> str
def replace_mutation_to_atmark(cs_tags: str) ‑> str

Replaces mutations with '@'.

def to_vcf(cs_tags: str | list[str], chroms: str | int | list[str] | list[int], positions: int | list[int]) ‑> str

Convert cs tag(s) to VCF (Variant Call Format) string.

Args

cs_tag : str | list[str]
The cs tag representing the sequence alignment.
chrom : str | list[str]
The chromosome name.
pos : int | list[int]
The starting position for the sequence.

Returns

str
The VCF-formatted string.

Example

>>> import cstag
>>> cs_tag = "=AC*gt=T-gg=C+tt=A"
>>> chrom = "chr1"
>>> pos = 1
>>> print(cstag.to_vcf(cs_tag, chrom, pos))
##fileformat=VCFv4.2
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO
chr1    3       .       G       T       .       .       .
chr1    4       .       TGG     T       .       .       .
chr1    5       .       C       CTT     .       .       .

Classes

class CsInfo (cs_tag: str, pos_start: int, pos_end: int, chrom: str | None = None)

CsInfo(cs_tag: 'str', pos_start: 'int', pos_end: 'int', chrom: 'str | None' = None)

Expand source code
@dataclass(frozen=True)
class CsInfo:
    cs_tag: str
    pos_start: int
    pos_end: int
    chrom: str | None = None

Class variables

var chrom : str | None
var cs_tag : str
var pos_end : int
var pos_start : int
class Vcf (chrom: str | None = None, pos: int | None = None, ref: str | None = None, alt: str | None = None, info: VcfInfo = VcfInfo(dp=None, rd=None, ad=None, vaf=None))

Vcf(chrom: 'str | None' = None, pos: 'int | None' = None, ref: 'str | None' = None, alt: 'str | None' = None, info: 'VcfInfo' = VcfInfo(dp=None, rd=None, ad=None, vaf=None))

Expand source code
@dataclass(frozen=True)
class Vcf:
    chrom: str | None = None
    pos: int | None = None
    ref: str | None = None
    alt: str | None = None
    info: VcfInfo = VcfInfo()

Class variables

var alt : str | None
var chrom : str | None
var infoVcfInfo
var pos : int | None
var ref : str | None
class VcfInfo (dp: int | None = None, rd: int | None = None, ad: int | None = None, vaf: float | None = None)

VcfInfo(dp: 'int | None' = None, rd: 'int | None' = None, ad: 'int | None' = None, vaf: 'float | None' = None)

Expand source code
@dataclass(frozen=True)
class VcfInfo:
    dp: int | None = None
    rd: int | None = None
    ad: int | None = None
    vaf: float | None = None

Class variables

var ad : int | None
var dp : int | None
var rd : int | None
var vaf : float | None