Coverage for lib/utils.py: 38%

21 statements  

« prev     ^ index     » next       coverage.py v7.11.1, created at 2025-11-09 22:29 +0100

1""" 

2Utility functions for optwps package. 

3 

4This module provides helper functions for BAM file processing and file I/O operations. 

5""" 

6 

7_open = open 

8 

9import os 

10import sys 

11import pgzip 

12from contextlib import nullcontext 

13 

14 

15def is_soft_clipped(cigar): 

16 """ 

17 Check if a read has soft clipping in its CIGAR string. 

18 

19 Soft clipping (op=4) indicates that some bases at the start or end of the read 

20 are not aligned to the reference but are present in the sequence. 

21 

22 Args: 

23 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples 

24 Each tuple is (operation, length) 

25 

26 Returns: 

27 bool: True if any soft clipping operation is present, False otherwise 

28 """ 

29 return any(op == 4 for op, _ in cigar) 

30 

31 

32def ref_aln_length(cigar): 

33 """ 

34 Calculate the length of alignment on the reference sequence from CIGAR. 

35 

36 Computes the total length consumed on the reference by summing lengths of 

37 operations that consume reference bases: M(0), D(2), N(3), =(7), X(8). 

38 

39 Args: 

40 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples 

41 Each tuple is (operation, length) 

42 

43 Returns: 

44 int: Total length on reference sequence 

45 """ 

46 return sum(l for op, l in cigar if op in (0, 2, 3, 7, 8)) 

47 

48 

49def exopen(fil: str, mode: str = "r", *args, njobs=-1, **kwargs): 

50 """ 

51 Open a file with automatic gzip support and parallel compression. 

52 

53 This function wraps the standard open() function with automatic detection 

54 and handling of gzipped files using parallel compression (pgzip) for better 

55 performance on multi-core systems. Also supports writing to stdout. 

56 

57 Args: 

58 fil (str): Path to the file to open 

59 mode (str, optional): File open mode ('r', 'w', 'rb', 'wb', etc.). 

60 Default: 'r' 

61 *args: Additional positional arguments passed to open function 

62 njobs (int, optional): Number of parallel jobs for gzip compression. 

63 If -1, uses all available CPU cores. Default: -1 

64 **kwargs: Additional keyword arguments passed to open function 

65 

66 Returns: 

67 file object: Opened file handle (either stdout, standard or pgzip) 

68 """ 

69 if njobs == -1: 

70 njobs = os.cpu_count() 

71 if fil == "stdout": 

72 assert "r" not in mode, "Cannot open stdout in read mode" 

73 return nullcontext(sys.stdout) 

74 if fil.endswith(".gz"): 

75 try: 

76 return pgzip.open( 

77 fil, mode + "t" if not mode.endswith("b") else mode, *args, **kwargs 

78 ) 

79 except BaseException: 

80 return pgzip.open(fil, mode + "t" if not mode.endswith("b") else mode) 

81 return _open(fil, mode, *args, **kwargs)