Coverage for src/pullapprove/diff.py: 93%
75 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-11 10:20 -0500
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-11 10:20 -0500
1import re
2from collections.abc import Iterator
5class DiffFile:
6 def __init__(self, *, old_path: str, new_path: str):
7 self.old_path = old_path
8 self.new_path = new_path
10 def __repr__(self) -> str:
11 return f"<DiffFile old_path={self.old_path} new_path={self.new_path}>"
13 def is_move(self) -> bool:
14 return self.old_path != self.new_path
17class DiffHunk:
18 def __init__(
19 self,
20 *,
21 old_line: int,
22 old_length: int | None,
23 new_line: int,
24 new_length: int | None,
25 ):
26 self.old_line = old_line
27 self.old_length = old_length
28 self.new_line = new_line
29 self.new_length = new_length
32class DiffCode:
33 def __init__(self, *, line_number: int, content: str, change_type: str):
34 self.line_number = line_number
35 self.content = content
36 self.change_type = change_type
38 def __str__(self) -> str:
39 return f"{self.line_number}: {self.change_type or ' '}{self.content}"
41 def __repr__(self) -> str:
42 return f"<DiffCode change_type={self.change_type} line_number={self.line_number} content={self.content}>"
44 def raw(self) -> str:
45 return f"{self.change_type or ' '}{self.content}"
47 # def is_change(self):
48 # return self.change_type in ("+", "-")
51def parse_diff_file_line(line: str) -> DiffFile | None:
52 match = re.match(r"^diff --git \w/(.*) \w/(.*)", line)
53 if match:
54 a_path, b_path = match.groups()
55 return DiffFile(
56 old_path=a_path.strip(),
57 new_path=b_path.strip(),
58 )
59 return None
62def parse_diff_hunk_line(line: str) -> DiffHunk | None:
63 match = re.match(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@", line)
64 if match:
65 old_line, old_length, new_line, new_length = match.groups()
66 return DiffHunk(
67 old_line=int(old_line),
68 old_length=int(old_length) if old_length else None,
69 new_line=int(new_line),
70 new_length=int(new_length) if new_length else None,
71 )
72 return None
75def iterate_diff_parts(diff: Iterator | str):
76 current_file, current_hunk = None, None
78 # Keep track of where we are in the hunk as we go
79 hunk_minus_line_number, hunk_plus_line_number = 0, 0
81 if isinstance(diff, str):
82 diff_iterator = diff.splitlines()
83 else:
84 diff_iterator = diff
86 for raw in diff_iterator:
87 if new_file := parse_diff_file_line(raw):
88 current_file = new_file
89 current_hunk = None
90 yield new_file # Yield the new file as we go
91 elif current_file:
92 if new_hunk := parse_diff_hunk_line(raw):
93 current_hunk = new_hunk
95 hunk_minus_line_number = current_hunk.old_line
96 hunk_plus_line_number = current_hunk.new_line
98 # Git may include the first line of context immediately after
99 # the second `@@` in the hunk header (e.g. function/context
100 # signatures). For example:
101 #
102 # @@ -6,7 +6,7 @@ binary, for any purpose, ...
103 #
104 # In that case the portion after the final `@@` should be
105 # treated as an unchanged context line that belongs to the
106 # hunk. The existing logic only yields lines that start with
107 # a prefix character ("+", "-", or space). To ensure we don't
108 # silently drop this first line we detect any trailing text
109 # after the hunk header and immediately yield it as a context
110 # `DiffCode` line.
111 #
112 # Find the position of the closing `@@` and capture anything
113 # that follows. We purposefully split on the first occurrence
114 # of `@@` (after the initial one already matched by the regex)
115 # so we don't mis-handle unusual file paths that might contain
116 # the same token.
117 # If the line contains more than one set of "@@" tokens then
118 # any text that appears after the final token represents the
119 # first context line of the hunk. Extract that portion and
120 # yield it as a normal (unchanged) diff line.
121 trailing = ""
122 if raw.count("@@") > 1:
123 trailing = raw.split("@@")[-1].lstrip()
125 if trailing:
126 yield DiffCode(
127 line_number=hunk_plus_line_number,
128 content=trailing,
129 change_type="",
130 )
132 # Increment the counters because we just consumed the first
133 # context line for both the old and new versions.
134 hunk_plus_line_number += 1
135 hunk_minus_line_number += 1
136 elif current_hunk:
137 if raw.startswith("+"):
138 yield DiffCode(
139 line_number=hunk_plus_line_number,
140 content=raw[1:],
141 change_type="+",
142 )
143 hunk_plus_line_number += 1
144 elif raw.startswith("-"):
145 yield DiffCode(
146 line_number=hunk_minus_line_number,
147 content=raw[1:],
148 change_type="-",
149 )
150 hunk_minus_line_number += 1
151 elif raw.startswith(" "):
152 yield DiffCode(
153 line_number=hunk_plus_line_number, # would need plus and minus if we wanted to show split...
154 content=raw[1:],
155 change_type="",
156 )
157 hunk_plus_line_number += 1
158 hunk_minus_line_number += 1
159 else:
160 continue
161 else:
162 # Header/meta lines between file and hunk...
163 pass