Coverage for src/pullapprove/diff.py: 93%

75 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-11 10:20 -0500

1import re 

2from collections.abc import Iterator 

3 

4 

5class DiffFile: 

6 def __init__(self, *, old_path: str, new_path: str): 

7 self.old_path = old_path 

8 self.new_path = new_path 

9 

10 def __repr__(self) -> str: 

11 return f"<DiffFile old_path={self.old_path} new_path={self.new_path}>" 

12 

13 def is_move(self) -> bool: 

14 return self.old_path != self.new_path 

15 

16 

17class DiffHunk: 

18 def __init__( 

19 self, 

20 *, 

21 old_line: int, 

22 old_length: int | None, 

23 new_line: int, 

24 new_length: int | None, 

25 ): 

26 self.old_line = old_line 

27 self.old_length = old_length 

28 self.new_line = new_line 

29 self.new_length = new_length 

30 

31 

32class DiffCode: 

33 def __init__(self, *, line_number: int, content: str, change_type: str): 

34 self.line_number = line_number 

35 self.content = content 

36 self.change_type = change_type 

37 

38 def __str__(self) -> str: 

39 return f"{self.line_number}: {self.change_type or ' '}{self.content}" 

40 

41 def __repr__(self) -> str: 

42 return f"<DiffCode change_type={self.change_type} line_number={self.line_number} content={self.content}>" 

43 

44 def raw(self) -> str: 

45 return f"{self.change_type or ' '}{self.content}" 

46 

47 # def is_change(self): 

48 # return self.change_type in ("+", "-") 

49 

50 

51def parse_diff_file_line(line: str) -> DiffFile | None: 

52 match = re.match(r"^diff --git \w/(.*) \w/(.*)", line) 

53 if match: 

54 a_path, b_path = match.groups() 

55 return DiffFile( 

56 old_path=a_path.strip(), 

57 new_path=b_path.strip(), 

58 ) 

59 return None 

60 

61 

62def parse_diff_hunk_line(line: str) -> DiffHunk | None: 

63 match = re.match(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@", line) 

64 if match: 

65 old_line, old_length, new_line, new_length = match.groups() 

66 return DiffHunk( 

67 old_line=int(old_line), 

68 old_length=int(old_length) if old_length else None, 

69 new_line=int(new_line), 

70 new_length=int(new_length) if new_length else None, 

71 ) 

72 return None 

73 

74 

75def iterate_diff_parts(diff: Iterator | str): 

76 current_file, current_hunk = None, None 

77 

78 # Keep track of where we are in the hunk as we go 

79 hunk_minus_line_number, hunk_plus_line_number = 0, 0 

80 

81 if isinstance(diff, str): 

82 diff_iterator = diff.splitlines() 

83 else: 

84 diff_iterator = diff 

85 

86 for raw in diff_iterator: 

87 if new_file := parse_diff_file_line(raw): 

88 current_file = new_file 

89 current_hunk = None 

90 yield new_file # Yield the new file as we go 

91 elif current_file: 

92 if new_hunk := parse_diff_hunk_line(raw): 

93 current_hunk = new_hunk 

94 

95 hunk_minus_line_number = current_hunk.old_line 

96 hunk_plus_line_number = current_hunk.new_line 

97 

98 # Git may include the first line of context immediately after 

99 # the second `@@` in the hunk header (e.g. function/context 

100 # signatures). For example: 

101 # 

102 # @@ -6,7 +6,7 @@ binary, for any purpose, ... 

103 # 

104 # In that case the portion after the final `@@` should be 

105 # treated as an unchanged context line that belongs to the 

106 # hunk. The existing logic only yields lines that start with 

107 # a prefix character ("+", "-", or space). To ensure we don't 

108 # silently drop this first line we detect any trailing text 

109 # after the hunk header and immediately yield it as a context 

110 # `DiffCode` line. 

111 # 

112 # Find the position of the closing `@@` and capture anything 

113 # that follows. We purposefully split on the first occurrence 

114 # of `@@` (after the initial one already matched by the regex) 

115 # so we don't mis-handle unusual file paths that might contain 

116 # the same token. 

117 # If the line contains more than one set of "@@" tokens then 

118 # any text that appears after the final token represents the 

119 # first context line of the hunk. Extract that portion and 

120 # yield it as a normal (unchanged) diff line. 

121 trailing = "" 

122 if raw.count("@@") > 1: 

123 trailing = raw.split("@@")[-1].lstrip() 

124 

125 if trailing: 

126 yield DiffCode( 

127 line_number=hunk_plus_line_number, 

128 content=trailing, 

129 change_type="", 

130 ) 

131 

132 # Increment the counters because we just consumed the first 

133 # context line for both the old and new versions. 

134 hunk_plus_line_number += 1 

135 hunk_minus_line_number += 1 

136 elif current_hunk: 

137 if raw.startswith("+"): 

138 yield DiffCode( 

139 line_number=hunk_plus_line_number, 

140 content=raw[1:], 

141 change_type="+", 

142 ) 

143 hunk_plus_line_number += 1 

144 elif raw.startswith("-"): 

145 yield DiffCode( 

146 line_number=hunk_minus_line_number, 

147 content=raw[1:], 

148 change_type="-", 

149 ) 

150 hunk_minus_line_number += 1 

151 elif raw.startswith(" "): 

152 yield DiffCode( 

153 line_number=hunk_plus_line_number, # would need plus and minus if we wanted to show split... 

154 content=raw[1:], 

155 change_type="", 

156 ) 

157 hunk_plus_line_number += 1 

158 hunk_minus_line_number += 1 

159 else: 

160 continue 

161 else: 

162 # Header/meta lines between file and hunk... 

163 pass