Module cstag.to_vcf
Functions
def add_vcf_fields(variant_annotations: list[Vcf], chrom: str, reference_depth: dict[tuple[str, int], int]) ‑> list[Vcf]
-
Add Chrom and VCF info (AD, RD, DP, and VAF) to immutable Vcf dataclass
def call_reference_depth(variant_annotations, cs_tags_list, positions_list) ‑> dict[tuple[str, int], int]
def chrom_sort_key(chrom: str) ‑> int
-
Convert a chromosome string to an integer for sorting.
def find_ref_for_deletion(cs_tag_split: list[str], idx: int) ‑> str
def find_ref_for_insertion(cs_tag_split: list[str], idx: int) ‑> str | None
-
Format and filter cs_tags, and create a list of CsInfo objects.
This function takes lists of cs_tags, chromosomes, and positions. It filters out any cs_tags containing a tilde ("~") and creates a list of CsInfo objects.
Args
cs_tags
:list[str]
- List of cs_tags as strings.
chroms
:list[str] | list[int]
- List of chromosomes as strings or integers.
positions
:list[int]
- List of starting positions as integers.
Returns
list[CsInfo]
- A list of CsInfo objects, each containing information about
a cs_tag, its chromosome, and its start and end positions.
def get_pos_end(cs_tag: str, pos: int) ‑> int
-
Get 1-index end positions
def get_variant_annotations(cs_tag_split: list[str], position: int) ‑> list[Vcf]
def group_by_chrom(cs_tags_formatted: list[tuple]) ‑> dict[str, tuple]
-
Group cs tags by chromosomes
def group_by_overlapping_intervals(cs_tags_grouped: CsInfo) ‑> list[CsInfo]
def process_cs_tag(cs_tag: str, chrom: str | int, pos: int) ‑> str
def remove_spaces_around_newlines(text: str) ‑> str
def replace_mutation_to_atmark(cs_tags: str) ‑> str
-
Replaces mutations with '@'.
def to_vcf(cs_tags: str | list[str], chroms: str | int | list[str] | list[int], positions: int | list[int]) ‑> str
-
Convert cs tag(s) to VCF (Variant Call Format) string.
Args
cs_tag
:str | list[str]
- The cs tag representing the sequence alignment.
chrom
:str | list[str]
- The chromosome name.
pos
:int | list[int]
- The starting position for the sequence.
Returns
str
- The VCF-formatted string.
Example
>>> import cstag >>> cs_tag = "=AC*gt=T-gg=C+tt=A" >>> chrom = "chr1" >>> pos = 1 >>> print(cstag.to_vcf(cs_tag, chrom, pos)) ##fileformat=VCFv4.2 #CHROM POS ID REF ALT QUAL FILTER INFO chr1 3 . G T . . . chr1 4 . TGG T . . . chr1 5 . C CTT . . .
Classes
class CsInfo (cs_tag: str, pos_start: int, pos_end: int, chrom: str | None = None)
-
CsInfo(cs_tag: 'str', pos_start: 'int', pos_end: 'int', chrom: 'str | None' = None)
Expand source code
@dataclass(frozen=True) class CsInfo: cs_tag: str pos_start: int pos_end: int chrom: str | None = None
Class variables
var chrom : str | None
var cs_tag : str
var pos_end : int
var pos_start : int
class Vcf (chrom: str | None = None, pos: int | None = None, ref: str | None = None, alt: str | None = None, info: VcfInfo = VcfInfo(dp=None, rd=None, ad=None, vaf=None))
-
Vcf(chrom: 'str | None' = None, pos: 'int | None' = None, ref: 'str | None' = None, alt: 'str | None' = None, info: 'VcfInfo' = VcfInfo(dp=None, rd=None, ad=None, vaf=None))
Expand source code
@dataclass(frozen=True) class Vcf: chrom: str | None = None pos: int | None = None ref: str | None = None alt: str | None = None info: VcfInfo = VcfInfo()
Class variables
var alt : str | None
var chrom : str | None
var info : VcfInfo
var pos : int | None
var ref : str | None
class VcfInfo (dp: int | None = None, rd: int | None = None, ad: int | None = None, vaf: float | None = None)
-
VcfInfo(dp: 'int | None' = None, rd: 'int | None' = None, ad: 'int | None' = None, vaf: 'float | None' = None)
Expand source code
@dataclass(frozen=True) class VcfInfo: dp: int | None = None rd: int | None = None ad: int | None = None vaf: float | None = None
Class variables
var ad : int | None
var dp : int | None
var rd : int | None
var vaf : float | None