Coverage for src / moai_adk / utils / link_validator.py: 0.00%

128 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-20 20:52 +0900

1""" 

2Link Validation Utilities 

3Online documentation link validation utilities 

4""" 

5 

6import asyncio 

7import logging 

8from dataclasses import dataclass, field 

9from datetime import datetime 

10from pathlib import Path 

11from typing import List, Optional 

12 

13from moai_adk.utils.common import ( 

14 HTTPClient, 

15 create_report_path, 

16 extract_links_from_text, 

17 is_valid_url, 

18) 

19from moai_adk.utils.safe_file_reader import SafeFileReader 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24@dataclass 

25class LinkResult: 

26 """Link validation result""" 

27 

28 url: str 

29 status_code: int 

30 is_valid: bool 

31 response_time: float 

32 error_message: Optional[str] = None 

33 checked_at: datetime = field(default_factory=datetime.now) 

34 

35 def __post_init__(self): 

36 if self.checked_at is None: 

37 self.checked_at = datetime.now() 

38 

39 

40@dataclass 

41class ValidationResult: 

42 """Overall validation result""" 

43 

44 total_links: int 

45 valid_links: int 

46 invalid_links: int 

47 results: List[LinkResult] 

48 completed_at: datetime = field(default_factory=datetime.now) 

49 

50 def __post_init__(self): 

51 if self.completed_at is None: 

52 self.completed_at = datetime.now() 

53 

54 @property 

55 def success_rate(self) -> float: 

56 """Calculate success rate""" 

57 if self.total_links == 0: 

58 return 0.0 

59 return (self.valid_links / self.total_links) * 100 

60 

61 

62class LinkValidator(HTTPClient): 

63 """Online documentation link validator""" 

64 

65 def __init__(self, max_concurrent: int = 5, timeout: int = 10): 

66 super().__init__(max_concurrent, timeout) 

67 

68 def extract_links_from_file(self, file_path: Path) -> List[str]: 

69 """Extract all links from file (using safe file reading)""" 

70 if not file_path.exists(): 

71 logger.warning(f"File does not exist: {file_path}") 

72 return [] 

73 

74 try: 

75 reader = SafeFileReader() 

76 content = reader.read_text(file_path) 

77 if content is None: 

78 logger.error(f"Unable to read file: {file_path}") 

79 return [] 

80 

81 base_url = "https://adk.mo.ai.kr" 

82 links = extract_links_from_text(content, base_url) 

83 logger.info(f"Found {len(links)} links in file: {file_path}") 

84 return links 

85 except Exception as e: 

86 logger.error(f"Error during link extraction: {e}") 

87 return [] 

88 

89 async def validate_link(self, url: str) -> LinkResult: 

90 """Validate single link""" 

91 try: 

92 # URL validity check 

93 if not is_valid_url(url): 

94 return LinkResult( 

95 url=url, 

96 status_code=0, 

97 is_valid=False, 

98 response_time=0.0, 

99 error_message="Invalid URL format", 

100 ) 

101 

102 # HTTP request 

103 response = await self.fetch_url(url) 

104 

105 return LinkResult( 

106 url=url, 

107 status_code=response.status_code, 

108 is_valid=response.success, 

109 response_time=response.load_time, 

110 error_message=response.error_message, 

111 ) 

112 

113 except Exception as e: 

114 return LinkResult( 

115 url=url, 

116 status_code=0, 

117 is_valid=False, 

118 response_time=0.0, 

119 error_message=f"Unexpected error: {str(e)}", 

120 ) 

121 

122 async def validate_all_links(self, links: List[str]) -> ValidationResult: 

123 """Validate all links""" 

124 results = [] 

125 

126 # Split into link groups (concurrency control) 

127 semaphore = asyncio.Semaphore(self.max_concurrent) 

128 

129 async def validate_with_semaphore(link: str): 

130 async with semaphore: 

131 result = await self.validate_link(link) 

132 results.append(result) 

133 # Log progress 

134 logger.info( 

135 f"Validation complete: {link} -> {result.status_code} ({result.is_valid})" 

136 ) 

137 return result 

138 

139 # Validate all links asynchronously 

140 tasks = [validate_with_semaphore(link) for link in links] 

141 await asyncio.gather(*tasks) 

142 

143 # Analyze results 

144 valid_links = sum(1 for r in results if r.is_valid) 

145 invalid_links = len(results) - valid_links 

146 

147 return ValidationResult( 

148 total_links=len(results), 

149 valid_links=valid_links, 

150 invalid_links=invalid_links, 

151 results=results, 

152 completed_at=datetime.now(), 

153 ) 

154 

155 def generate_report(self, validation_result: ValidationResult) -> str: 

156 """Generate validation report""" 

157 from moai_adk.utils.common import get_summary_stats 

158 

159 report = [] 

160 report.append("# Online Documentation Link Validation Report") 

161 report.append( 

162 f"**Validation Time**: {validation_result.completed_at.strftime('%Y-%m-%d %H:%M:%S')}" 

163 ) 

164 report.append(f"**Total Links**: {validation_result.total_links}") 

165 report.append(f"**Valid Links**: {validation_result.valid_links}") 

166 report.append(f"**Invalid Links**: {validation_result.invalid_links}") 

167 report.append(f"**Success Rate**: {validation_result.success_rate:.1f}%") 

168 report.append("") 

169 

170 # Statistics 

171 if validation_result.results: 

172 response_times = [r.response_time for r in validation_result.results] 

173 stats = get_summary_stats(response_times) 

174 report.append("## 📊 Statistics") 

175 report.append("") 

176 report.append(f"- Average Response Time: {stats['mean']:.2f}s") 

177 report.append(f"- Minimum Response Time: {stats['min']:.2f}s") 

178 report.append(f"- Maximum Response Time: {stats['max']:.2f}s") 

179 report.append(f"- Standard Deviation: {stats['std']:.2f}s") 

180 report.append("") 

181 

182 # Failed links detailed report 

183 if validation_result.invalid_links > 0: 

184 report.append("## ❌ Failed Links") 

185 report.append("") 

186 

187 for result in validation_result.results: 

188 if not result.is_valid: 

189 report.append(f"- **{result.url}**") 

190 report.append(f" - Status Code: {result.status_code}") 

191 report.append(f" - Response Time: {result.response_time:.2f}s") 

192 if result.error_message: 

193 report.append(f" - Error: {result.error_message}") 

194 report.append("") 

195 

196 # Successful links summary 

197 if validation_result.valid_links > 0: 

198 report.append("## ✅ Successful Links") 

199 report.append("") 

200 report.append( 

201 f"Total of {validation_result.valid_links} links validated successfully." 

202 ) 

203 

204 return "\n".join(report) 

205 

206 

207def validate_readme_links(readme_path: Optional[Path] = None) -> ValidationResult: 

208 """Validate all links in README file""" 

209 if readme_path is None: 

210 readme_path = Path("README.ko.md") 

211 

212 validator = LinkValidator(max_concurrent=3, timeout=8) 

213 

214 # Extract links from README file 

215 links = validator.extract_links_from_file(readme_path) 

216 

217 if not links: 

218 logger.warning("No links to validate") 

219 return ValidationResult( 

220 total_links=0, valid_links=0, invalid_links=0, results=[] 

221 ) 

222 

223 logger.info(f"Validating total of {len(links)} links...") 

224 

225 # Perform asynchronous validation 

226 result = asyncio.run(validator.validate_all_links(links)) 

227 

228 # Generate and save report 

229 report = validator.generate_report(result) 

230 report_path = create_report_path(Path("."), "link_validation") 

231 report_path.write_text(report, encoding="utf-8") 

232 logger.info(f"Report saved to: {report_path}") 

233 

234 return result 

235 

236 

237if __name__ == "__main__": 

238 # Execute README file link validation 

239 result = validate_readme_links() 

240 

241 # Print results 

242 validator = LinkValidator() 

243 report = validator.generate_report(result) 

244 print(report) 

245 

246 # Save to file 

247 report_path = Path("link_validation_report.md") 

248 report_path.write_text(report, encoding="utf-8") 

249 print(f"\nReport saved to: {report_path}")