Coverage for lib/utils.py: 38%
21 statements
« prev ^ index » next coverage.py v7.11.1, created at 2025-11-09 22:29 +0100
« prev ^ index » next coverage.py v7.11.1, created at 2025-11-09 22:29 +0100
1"""
2Utility functions for optwps package.
4This module provides helper functions for BAM file processing and file I/O operations.
5"""
7_open = open
9import os
10import sys
11import pgzip
12from contextlib import nullcontext
15def is_soft_clipped(cigar):
16 """
17 Check if a read has soft clipping in its CIGAR string.
19 Soft clipping (op=4) indicates that some bases at the start or end of the read
20 are not aligned to the reference but are present in the sequence.
22 Args:
23 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples
24 Each tuple is (operation, length)
26 Returns:
27 bool: True if any soft clipping operation is present, False otherwise
28 """
29 return any(op == 4 for op, _ in cigar)
32def ref_aln_length(cigar):
33 """
34 Calculate the length of alignment on the reference sequence from CIGAR.
36 Computes the total length consumed on the reference by summing lengths of
37 operations that consume reference bases: M(0), D(2), N(3), =(7), X(8).
39 Args:
40 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples
41 Each tuple is (operation, length)
43 Returns:
44 int: Total length on reference sequence
45 """
46 return sum(l for op, l in cigar if op in (0, 2, 3, 7, 8))
49def exopen(fil: str, mode: str = "r", *args, njobs=-1, **kwargs):
50 """
51 Open a file with automatic gzip support and parallel compression.
53 This function wraps the standard open() function with automatic detection
54 and handling of gzipped files using parallel compression (pgzip) for better
55 performance on multi-core systems. Also supports writing to stdout.
57 Args:
58 fil (str): Path to the file to open
59 mode (str, optional): File open mode ('r', 'w', 'rb', 'wb', etc.).
60 Default: 'r'
61 *args: Additional positional arguments passed to open function
62 njobs (int, optional): Number of parallel jobs for gzip compression.
63 If -1, uses all available CPU cores. Default: -1
64 **kwargs: Additional keyword arguments passed to open function
66 Returns:
67 file object: Opened file handle (either stdout, standard or pgzip)
68 """
69 if njobs == -1:
70 njobs = os.cpu_count()
71 if fil == "stdout":
72 assert "r" not in mode, "Cannot open stdout in read mode"
73 return nullcontext(sys.stdout)
74 if fil.endswith(".gz"):
75 try:
76 return pgzip.open(
77 fil, mode + "t" if not mode.endswith("b") else mode, *args, **kwargs
78 )
79 except BaseException:
80 return pgzip.open(fil, mode + "t" if not mode.endswith("b") else mode)
81 return _open(fil, mode, *args, **kwargs)