Coverage for src/pullapprove/pullrequests.py: 42%

344 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-11 16:46 -0500

1import re 

2from collections.abc import Generator 

3from enum import Enum 

4from random import Random 

5from typing import Optional 

6 

7from pydantic import BaseModel, ConfigDict, Field 

8 

9from .config import ( 

10 ConfigModel, 

11 ConfigModels, 

12 LargeScaleChangeModel, 

13 OwnershipChoices, 

14 ReviewedForChoices, 

15 ScopeModel, 

16) 

17from .matches import ChangeMatches, ScopeCodeMatch, ScopePathMatch, match_diff 

18 

19 

20# Could be a bool if these are literally the only two states? 

21class Status(str, Enum): 

22 PASS = "PASS" 

23 FAIL = "FAIL" 

24 ERROR = "ERROR" 

25 PENDING = "PENDING" 

26 EMPTY = "" 

27 

28 

29class User(BaseModel): 

30 model_config = ConfigDict(extra="forbid") 

31 

32 host_id: str = Field(min_length=1) 

33 username: str = Field(min_length=1) 

34 avatar_url: str 

35 

36 def __str__(self) -> str: 

37 return self.username 

38 

39 def __eq__(self, value): 

40 if isinstance(value, User): 

41 return self.host_id == value.host_id 

42 elif isinstance(value, str): 

43 return self.host_id == value or self.username == value 

44 return False 

45 

46 

47class ReviewStates(str, Enum): 

48 APPROVED = "APPROVED" 

49 PENDING = "PENDING" 

50 CHANGES_REQUESTED = "CHANGES_REQUESTED" 

51 EMPTY = "" 

52 

53 

54class Review(BaseModel): 

55 model_config = ConfigDict(extra="forbid") 

56 

57 host_id: str = Field(min_length=1) 

58 host_url: str = Field(min_length=1) 

59 body: str 

60 state: ReviewStates 

61 submitted_at: str 

62 user: User 

63 

64 def get_reviewed_for_scopes(self): 

65 if self.body: 

66 # Parse Reviewed-for: <scope> from the body (could be comma separated) 

67 if matches := re.findall( 

68 r"Reviewed-for:\s*(\S+)", self.body, re.IGNORECASE 

69 ): 

70 return [match.strip() for match in matches[0].split(",")] 

71 

72 return [] 

73 

74 

75class Reviewer(BaseModel): 

76 model_config = ConfigDict(extra="forbid") 

77 

78 reviews: list[Review] 

79 user: User 

80 

81 def __str__(self) -> str: 

82 return str(self.user) 

83 

84 def latest_review(self, scope=None) -> Review | None: 

85 if not self.reviews: 

86 return None 

87 

88 # Most recent valid review is the one we want 

89 sorted_reviews = sorted( 

90 self.reviews, key=lambda r: r.submitted_at, reverse=True 

91 ) 

92 

93 for review in sorted_reviews: 

94 if scope and scope.reviewed_for != ReviewedForChoices.IGNORED: 

95 review_scopes = review.get_reviewed_for_scopes() 

96 

97 # Some scopes are required, so review_scopes can't be empty 

98 if ( 

99 scope.reviewed_for == ReviewedForChoices.REQUIRED 

100 and not review_scopes 

101 ): 

102 continue 

103 

104 if review_scopes and scope.name not in review_scopes: 

105 continue 

106 

107 # Otherwise review_scopes are [] and that is ok for everything 

108 

109 # If a review has no known state, we skip it (commented on GitHub) 

110 if review.state: 

111 return review 

112 

113 return None 

114 

115 def get_review_state(self) -> ReviewStates: 

116 if review := self.latest_review(): 

117 return review.state 

118 

119 # They are pending if they are a reviewer with no specific state 

120 return ReviewStates.PENDING 

121 

122 

123class Branch(BaseModel): 

124 model_config = ConfigDict(extra="forbid") 

125 

126 name: str = Field(min_length=1) 

127 # could be fork, other repo... 

128 

129 

130class Commit(BaseModel): 

131 model_config = ConfigDict(extra="forbid") 

132 

133 sha: str = Field(min_length=1) 

134 message: str 

135 author: User 

136 coauthors: list[User] 

137 

138 

139class PullRequest(BaseModel): 

140 model_config = ConfigDict(extra="forbid") 

141 

142 base_branch: Branch 

143 head_branch: Branch 

144 commits: list[Commit] 

145 reviewers: list[Reviewer] # Includes requested and previous reviewers 

146 author: User 

147 diff: str | Generator = Field(exclude=True, default="") 

148 number: int 

149 draft: bool 

150 

151 # Configs actually come from outside the PR, so we don't attach it here 

152 

153 def get_reviewer(self, identifier): 

154 for reviewer in self.reviewers: 

155 if reviewer.user.host_id == identifier: 

156 return reviewer 

157 

158 if reviewer.user.username == identifier: 

159 return reviewer 

160 

161 return None 

162 

163 def process_configs(self, configs: ConfigModels) -> Optional["PullRequestResults"]: 

164 if not configs: 

165 return None 

166 

167 filtered_configs = configs.filter_for_pullrequest(self) 

168 

169 # If there are no configs, or they are are disabled, then we can return early 

170 if not filtered_configs: 

171 return None 

172 

173 change_matches, config_paths_modified = match_diff(filtered_configs, self.diff) 

174 

175 # If it's a large scale change, that's the only thing we need to consider (after branches) 

176 if change_matches.large_scale_change: 

177 return self.process_large_scale_change(change_matches) 

178 

179 results = PullRequestResults( 

180 pullrequest=self, 

181 status=Status.PENDING, 

182 description="", 

183 labels=[], 

184 large_scale_change_results=None, 

185 scope_results={}, 

186 path_results={}, 

187 code_results={}, 

188 review_results={}, 

189 config_results={ 

190 path: ConfigResult.from_config_model(config) 

191 for path, config in change_matches.configs.items() 

192 }, 

193 config_paths_modified=config_paths_modified, 

194 ) 

195 

196 # Iterate the active scopes and get their results 

197 for scope_name, scope_model in change_matches.scopes.items(): 

198 reviews = [] 

199 review_points = 0 

200 pending_points = 0 

201 

202 for reviewer in self.reviewers: 

203 # Could maybe enable host id, or email too 

204 if ( 

205 reviewer.user.username not in scope_model.reviewers 

206 and reviewer.user.username not in scope_model.alternates 

207 ): 

208 continue 

209 

210 if review := reviewer.latest_review(scope=scope_model): 

211 reviews.append(review.host_id) 

212 results.review_results[review.host_id] = ReviewResult( 

213 review=review, 

214 scopes=review.get_reviewed_for_scopes(), 

215 ) 

216 

217 if review.state == ReviewStates.APPROVED: 

218 review_points += 1 

219 elif review.state in ( 

220 ReviewStates.PENDING, 

221 ReviewStates.CHANGES_REQUESTED, 

222 ): 

223 pending_points += 1 

224 else: 

225 # They exist on the PR but with no review yet 

226 pending_points += 1 

227 

228 if self.author.username in scope_model.reviewers: 

229 author_points = scope_model.author_value 

230 else: 

231 author_points = 0 

232 

233 # TODO commit points 

234 commit_points = 0 

235 

236 points = review_points + author_points + commit_points 

237 

238 if any( 

239 results.review_results[review].review.state 

240 == ReviewStates.CHANGES_REQUESTED 

241 for review in reviews 

242 ): 

243 status = Status.FAIL 

244 elif points >= scope_model.require: 

245 status = Status.PASS 

246 else: 

247 status = Status.PENDING 

248 

249 matched_paths = [] 

250 for path, path_match in change_matches.paths.items(): 

251 if scope_name in path_match.scopes: 

252 matched_paths.append(path) 

253 

254 matched_code = [] 

255 for code, code_match in change_matches.code.items(): 

256 if scope_name in code_match.scopes: 

257 matched_code.append(code) 

258 

259 results.scope_results[scope_name] = ScopeResult( 

260 scope=scope_model, 

261 status=status, 

262 points=points, 

263 # separate commit points, review points, author points etc? 

264 points_pending=pending_points, # Not using this anywhere? would tell us how many to request... 

265 reviews=reviews, 

266 matched_paths=matched_paths, 

267 matched_code=matched_code, 

268 ) 

269 

270 # Now we have to get the status of the results overall by looking 

271 # at the paths and code, because scopes can combine based on their ownership model, 

272 # so looking at scopes alone isn't enough. 

273 

274 for path, path_match in change_matches.paths.items(): 

275 results.path_results[path] = PathResult( 

276 path=path_match, 

277 status=results.status_for_scope_names(path_match.scopes), 

278 reviews=results.reviews_for_scope_names(path_match.scopes), 

279 ) 

280 

281 for code_hash, code_match in change_matches.code.items(): 

282 results.code_results[code_hash] = CodeResult( 

283 code=code_match, 

284 status=results.status_for_scope_names(code_match.scopes), 

285 reviews=results.reviews_for_scope_names(code_match.scopes), 

286 ) 

287 

288 # TODO what happens if no scopes match? 

289 # configurable in pullapprove.com? 

290 

291 results.status = results.compute_status() 

292 results.description = results.compute_description() 

293 results.labels = results.compute_labels() 

294 

295 return results 

296 

297 def process_large_scale_change( 

298 self, change_matches: ChangeMatches 

299 ) -> "PullRequestResults": 

300 lsc = change_matches.large_scale_change 

301 reviews = [] 

302 review_points = 0 

303 pending_points = 0 

304 review_results = {} 

305 

306 config_results = { 

307 path: ConfigResult.from_config_model(config) 

308 for path, config in change_matches.configs.items() 

309 } 

310 

311 for reviewer in self.reviewers: 

312 # Could maybe enable host id, or email too 

313 if reviewer.user.username not in lsc.reviewers: 

314 continue 

315 

316 # TODO what about Reviewed-for? 

317 if review := reviewer.latest_review(scope=None): 

318 reviews.append(review.host_id) 

319 review_results[review.host_id] = ReviewResult( 

320 review=review, 

321 scopes=review.get_reviewed_for_scopes(), 

322 ) 

323 

324 if review.state == ReviewStates.APPROVED: 

325 review_points += 1 

326 elif review.state in ( 

327 ReviewStates.PENDING, 

328 ReviewStates.CHANGES_REQUESTED, 

329 ): 

330 pending_points += 1 

331 else: 

332 # They exist on the PR but with no review yet 

333 pending_points += 1 

334 

335 if any( 

336 review_results[review].review.state == ReviewStates.CHANGES_REQUESTED 

337 for review in reviews 

338 ): 

339 status = Status.FAIL 

340 description = "Large-scale change: changes requested" 

341 elif review_points >= lsc.require: 

342 status = Status.PASS 

343 description = "Large-scale change: approved" 

344 else: 

345 status = Status.PENDING 

346 description = f"Large-scale change: {review_points} of {lsc.require} reviewers approved" 

347 

348 # If reviewers were not defined (default LSC config), 

349 # then we show an error. 

350 if not lsc.reviewers: 

351 status = Status.FAIL 

352 description = ( 

353 "Large-scale change: configuration required (no reviewers defined)" 

354 ) 

355 

356 return PullRequestResults( 

357 status=status, 

358 description=description, 

359 labels=lsc.labels, 

360 large_scale_change_results=LargeScaleChangeResults( 

361 large_scale_change=lsc, 

362 status=status, 

363 points=review_points, 

364 points_pending=pending_points, 

365 reviews=reviews, 

366 ), 

367 scope_results={}, 

368 path_results={}, 

369 code_results={}, 

370 review_results=review_results, 

371 pullrequest=self, 

372 config_results=config_results, 

373 config_paths_modified=[], 

374 ) 

375 

376 

377class LargeScaleChangeResults(BaseModel): 

378 model_config = ConfigDict(extra="forbid") 

379 

380 large_scale_change: LargeScaleChangeModel 

381 status: Status 

382 points: int 

383 points_pending: int 

384 reviews: list[str] 

385 

386 

387class PullRequestResults(BaseModel): 

388 model_config = ConfigDict(extra="forbid") 

389 

390 # No defaults in this model, so we will always get all fields represented in the export 

391 status: Status 

392 description: str 

393 labels: list[str] 

394 # comments? 

395 

396 config_paths_modified: list[str] = Field( 

397 default_factory=list 

398 ) # Paths that were modified in the PR 

399 

400 pullrequest: PullRequest 

401 

402 large_scale_change_results: LargeScaleChangeResults | None 

403 scope_results: dict[str, "ScopeResult"] 

404 path_results: dict[str, "PathResult"] 

405 code_results: dict[str, "CodeResult"] 

406 review_results: dict[str, "ReviewResult"] # Latest reviews and their scopes... 

407 config_results: dict[str, "ConfigResult"] 

408 

409 def as_dict(self) -> dict: 

410 """ 

411 Dump the results as a dictionary and remove any values that aren't the same 

412 as the defaults (we always use "empty" defaults) -- this keeps the stored JSON more minimal. 

413 

414 In the UI, the actual models are reloaded from the dict, so it is ok that we don't have all the information in the stored dict. 

415 """ 

416 return self.model_dump(exclude_defaults=True) 

417 

418 @classmethod 

419 def from_dict(cls, data: dict): 

420 return cls(**data) 

421 

422 def get_scope_results_by_name(self, names): 

423 """ 

424 Get scopes by name (from other result objects), 

425 and return them in as ordered_scope_results() order. 

426 """ 

427 filtered_scopes = [ 

428 scope_result 

429 for scope_result in self.ordered_scope_results() 

430 if scope_result.scope.name in names 

431 ] 

432 return filtered_scopes 

433 

434 def ordered_scope_results(self): 

435 """Order by ownership (primary will naturally come first, then appended, then global)""" 

436 return sorted( 

437 self.scope_results.values(), 

438 key=lambda s: s.scope.ownership, 

439 ) 

440 

441 def scope_results_pending(self): 

442 """Get all scope results that are pending""" 

443 return [ 

444 scope_result 

445 for scope_result in self.scope_results.values() 

446 if scope_result.status == Status.PENDING 

447 and scope_result.scope.ownership != OwnershipChoices.GLOBAL 

448 ] 

449 

450 def path_results_pending(self): 

451 """Get all path results that are pending""" 

452 return [ 

453 path_result 

454 for path_result in self.path_results.values() 

455 if path_result.status == Status.PENDING 

456 ] 

457 

458 def code_results_pending(self): 

459 """Get all code results that are pending""" 

460 return [ 

461 code_result 

462 for code_result in self.code_results.values() 

463 if code_result.status == Status.PENDING 

464 ] 

465 

466 def status_for_scope_names(self, scope_names: list[str]) -> Status: 

467 """ 

468 Get the status for a list of scopes. 

469 This is used to get the status for a list of scopes. 

470 """ 

471 scope_results = [self.scope_results[scope_name] for scope_name in scope_names] 

472 

473 # If there's a single scope, use that result (whether it is global, or normal, etc) 

474 if len(scope_results) == 1: 

475 return scope_results[0].status 

476 

477 # If any scope failed, then we fail 

478 if any(scope.status == Status.FAIL for scope in scope_results): 

479 return Status.FAIL 

480 

481 global_scopes = [ 

482 scope 

483 for scope in scope_results 

484 if scope.scope.ownership == OwnershipChoices.GLOBAL 

485 ] 

486 nonglobal_scopes = [ 

487 scope 

488 for scope in scope_results 

489 if scope.scope.ownership != OwnershipChoices.GLOBAL 

490 ] 

491 

492 # If any global scopes approved, then we pass 

493 if any(scope.status == Status.PASS for scope in global_scopes): 

494 return Status.PASS 

495 

496 # If all regular scopes approved, then we pass 

497 if all(scope.status == Status.PASS for scope in nonglobal_scopes): 

498 return Status.PASS 

499 

500 return Status.PENDING 

501 

502 def reviews_for_scope_names(self, scope_names: list[str]) -> list[str]: 

503 """ 

504 Get the reviews for a list of scopes. 

505 This is used to get the reviews for a list of scopes. 

506 """ 

507 scope_results = [self.scope_results[scope_name] for scope_name in scope_names] 

508 reviews = [] 

509 for scope in scope_results: 

510 reviews.extend(scope.reviews) 

511 return reviews 

512 

513 def compute_status(self) -> Status: 

514 if self.pullrequest.draft: 

515 return Status.PENDING 

516 

517 # Assume passing status to start 

518 # TODO is this the unmatched status? what if there are no enabled scopes 

519 status = Status.PASS 

520 

521 for path_results in self.path_results.values(): 

522 if path_results.status == Status.FAIL: 

523 return Status.FAIL # Immediately fail if any fail 

524 elif path_results.status == Status.PENDING: 

525 status = Status.PENDING # Move to pending (could fail later) 

526 

527 for code_results in self.code_results.values(): 

528 if code_results.status == Status.FAIL: 

529 return Status.FAIL # Immediately fail if any fail 

530 elif code_results.status == Status.PENDING: 

531 status = Status.PENDING # Move to pending (could fail later) 

532 

533 return status 

534 

535 def compute_description(self) -> str: 

536 if self.pullrequest.draft: 

537 return "Draft is not ready for review" 

538 

539 if self.status == Status.PASS: 

540 # In success, want to know how many scopes passed 

541 scopes_passed = [ 

542 scope 

543 for scope in self.scope_results.values() 

544 if scope.status == Status.PASS 

545 ] 

546 

547 if not scopes_passed: 

548 # If the status was pass, but there are no scopes, then there were none assigned 

549 return "No review scopes are required" 

550 

551 scope_text = "scope" if len(scopes_passed) == 1 else "scopes" 

552 return f"{len(scopes_passed)} review {scope_text} passed" 

553 elif self.status == Status.FAIL: 

554 scopes_failed = [ 

555 scope 

556 for scope in self.scope_results.values() 

557 if scope.status == Status.FAIL 

558 ] 

559 scope_text = "scope" if len(scopes_failed) == 1 else "scopes" 

560 return f"{len(scopes_failed)} review {scope_text} failed" 

561 elif self.status == Status.PENDING: 

562 # In pending, want to know how many scopes are pending 

563 scopes_passed = [ 

564 scope 

565 for scope in self.scope_results.values() 

566 if scope.status == Status.PASS 

567 ] 

568 scopes_pending = [ 

569 scope 

570 for scope in self.scope_results.values() 

571 if scope.status == Status.PENDING 

572 and scope.scope.ownership != OwnershipChoices.GLOBAL 

573 ] 

574 pending_text = "scope" if len(scopes_pending) == 1 else "scopes" 

575 if scopes_passed: 

576 passed_text = "scope" if len(scopes_passed) == 1 else "scopes" 

577 return f"{len(scopes_pending)} review {pending_text} pending, {len(scopes_passed)} review {passed_text} passed" 

578 else: 

579 return f"{len(scopes_pending)} review {pending_text} pending" 

580 else: 

581 return "" 

582 

583 def compute_labels(self) -> list[str]: 

584 labels = set() 

585 

586 for scope_result in self.scope_results.values(): 

587 labels.update(scope_result.scope.labels) 

588 

589 return list(labels) 

590 

591 def compute_overview(self) -> str: 

592 """Build a concise markdown overview for GitHub pull request comments.""" 

593 overview = "" 

594 overview += f"**{self.status.value}**: {self.description}\n\n" 

595 

596 if self.large_scale_change_results: 

597 lsc = self.large_scale_change_results 

598 overview += ( 

599 "### Large Scale Change\n\n" 

600 f"- Status: {lsc.status.value}\n" 

601 f"- Points: {lsc.points} (Pending: {lsc.points_pending})\n" 

602 ) 

603 

604 overview += "## Matched Scopes\n\n" 

605 matched_scopes = [ 

606 sr 

607 for sr in self.ordered_scope_results() 

608 if sr.matched_paths or sr.matched_code 

609 ] 

610 

611 if matched_scopes: 

612 for scope_result in matched_scopes: 

613 line = ( 

614 f"- **{scope_result.scope.printed_name()}**: {scope_result.status.value}" 

615 f" ({scope_result.points}/{scope_result.scope.require})" 

616 ) 

617 if scope_result.scope.cc: 

618 line += " cc: " + " ".join(f"@{u}" for u in scope_result.scope.cc) 

619 overview += line + "\n" 

620 

621 if scope_result.scope.instructions: 

622 overview += ( 

623 " <details>\n" 

624 f" {scope_result.scope.instructions}\n" 

625 " </details>\n" 

626 ) 

627 else: 

628 overview += "- None\n" 

629 

630 return overview 

631 

632 def rebuild_config_models(self) -> ConfigModels: 

633 """ 

634 Rebuild the ConfigModels from the config_results. 

635 This is useful for when we want to get the configs back from the results. 

636 """ 

637 configs = ConfigModels(root={}) 

638 for path, config_result in self.config_results.items(): 

639 configs.add_config(config_result.config, path) 

640 return configs 

641 

642 

643class ConfigResult(BaseModel): 

644 model_config = ConfigDict(extra="forbid") 

645 

646 config: ConfigModel 

647 

648 @classmethod 

649 def from_config_model(cls, config_model: ConfigModel): 

650 return cls( 

651 config=config_model, 

652 ) 

653 

654 

655class ReviewResult(BaseModel): 

656 model_config = ConfigDict(extra="forbid") 

657 

658 review: Review 

659 scopes: list[str] 

660 

661 

662class ScopeResult(BaseModel): 

663 model_config = ConfigDict(extra="forbid") 

664 

665 scope: ScopeModel 

666 status: Status # and/or review_status? 

667 points: int 

668 points_pending: int 

669 

670 reviews: list[str] # Review references 

671 matched_paths: list[str] # Path result references 

672 matched_code: list[str] # Code result references 

673 

674 def is_notable(self): 

675 # In some cases, we don't care much about scopes that are global and not in use, for example 

676 if ( 

677 self.scope.ownership == OwnershipChoices.GLOBAL 

678 and self.status == Status.PENDING 

679 ): 

680 return False 

681 return True 

682 

683 def reviewers_to_request( 

684 self, pullrequest_results: PullRequestResults 

685 ) -> list[str]: 

686 if self.scope.request == 0 or not self.scope.reviewers: 

687 return [] 

688 

689 additional_reviewers_needed = ( 

690 self.scope.request - self.points - self.points_pending 

691 ) 

692 if additional_reviewers_needed <= 0: 

693 return [] 

694 

695 already_reviewed = pullrequest_results.review_results.values() 

696 

697 eligible_logins = [ 

698 login for login in self.scope.reviewers if login not in already_reviewed 

699 ] 

700 

701 # Remove the author from the list of eligible reviewers 

702 if pullrequest_results.pullrequest.author.username in eligible_logins: 

703 eligible_logins.remove(pullrequest_results.pullrequest.author.username) 

704 

705 if self.scope.request < 0: 

706 return eligible_logins 

707 

708 # Put the reviewers in a predictable random order for this PR 

709 Random(pullrequest_results.pullrequest.number).shuffle(eligible_logins) 

710 

711 return eligible_logins[:additional_reviewers_needed] 

712 

713 

714class PathResult(BaseModel): 

715 model_config = ConfigDict(extra="forbid") 

716 

717 path: ScopePathMatch 

718 status: Status 

719 reviews: list[str] # Review references 

720 

721 

722class CodeResult(BaseModel): 

723 model_config = ConfigDict(extra="forbid") 

724 

725 code: ScopeCodeMatch 

726 status: Status 

727 reviews: list[str] # Review references