Coverage for src / moai_adk / utils / safe_file_reader.py: 0.00%

66 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-20 20:52 +0900

1#!/usr/bin/env python3 

2""" 

3Safe File Reader Utility 

4 

5Provides safe file reading with multiple encoding fallbacks and error handling. 

6Resolves UTF-8 encoding issues found in MoAI-ADK project. 

7 

8Author: Alfred@MoAI 

9Date: 2025-11-11 

10""" 

11 

12from pathlib import Path 

13from typing import List, Optional, Union 

14 

15 

16class SafeFileReader: 

17 """ 

18 Safe file reader with encoding fallback support. 

19 

20 Handles various encoding issues including: 

21 - UTF-8 vs CP1252 encoding conflicts 

22 - Binary files with special characters 

23 - File system encoding issues 

24 """ 

25 

26 # Encoding priority order (most common to least common) 

27 DEFAULT_ENCODINGS = [ 

28 "utf-8", # Standard UTF-8 

29 "cp1252", # Windows-1252 (Western European) 

30 "iso-8859-1", # Latin-1 (Western European) 

31 "latin1", # Alternative Latin-1 

32 "utf-16", # UTF-16 with BOM detection 

33 "ascii", # Pure ASCII fallback 

34 ] 

35 

36 def __init__(self, encodings: Optional[List[str]] = None, errors: str = "ignore"): 

37 """ 

38 Initialize SafeFileReader. 

39 

40 Args: 

41 encodings: List of encodings to try in order 

42 errors: Error handling strategy ('ignore', 'replace', 'strict') 

43 """ 

44 self.encodings = encodings or self.DEFAULT_ENCODINGS 

45 self.errors = errors 

46 

47 def read_text(self, file_path: Union[str, Path]) -> Optional[str]: 

48 """ 

49 Safely read text file with encoding fallbacks. 

50 

51 Args: 

52 file_path: Path to the file to read 

53 

54 Returns: 

55 File content as string, or None if all attempts fail 

56 """ 

57 file_path = Path(file_path) 

58 

59 if not file_path.exists(): 

60 return None 

61 

62 # Try each encoding in order 

63 for encoding in self.encodings: 

64 try: 

65 return file_path.read_text(encoding=encoding) 

66 except UnicodeDecodeError: 

67 continue 

68 except Exception as e: 

69 # Log non-decoding errors but continue 

70 print(f"Warning: Error reading {file_path} with {encoding}: {e}") 

71 continue 

72 

73 # Final fallback with specified error handling 

74 try: 

75 return file_path.read_text(encoding="utf-8", errors=self.errors) 

76 except Exception as e: 

77 print(f"Error: Could not read {file_path}: {e}") 

78 return None 

79 

80 def read_lines(self, file_path: Union[str, Path]) -> List[str]: 

81 """ 

82 Safely read file as list of lines. 

83 

84 Args: 

85 file_path: Path to the file to read 

86 

87 Returns: 

88 List of lines, or empty list if reading fails 

89 """ 

90 content = self.read_text(file_path) 

91 if content is None: 

92 return [] 

93 

94 return content.splitlines(keepends=True) 

95 

96 def safe_glob_read(self, pattern: str, base_path: Union[str, Path] = ".") -> dict: 

97 """ 

98 Safely read multiple files matching a glob pattern. 

99 

100 Args: 

101 pattern: Glob pattern to match files 

102 base_path: Base directory for glob search 

103 

104 Returns: 

105 Dictionary mapping file paths to their contents 

106 """ 

107 base_path = Path(base_path) 

108 results = {} 

109 

110 try: 

111 for file_path in base_path.glob(pattern): 

112 if file_path.is_file(): 

113 content = self.read_text(file_path) 

114 if content is not None: 

115 results[str(file_path)] = content 

116 except Exception as e: 

117 print(f"Error: Failed to glob pattern '{pattern}': {e}") 

118 

119 return results 

120 

121 def is_safe_file(self, file_path: Union[str, Path]) -> bool: 

122 """ 

123 Check if file can be safely read. 

124 

125 Args: 

126 file_path: Path to the file to check 

127 

128 Returns: 

129 True if file can be read safely, False otherwise 

130 """ 

131 content = self.read_text(file_path) 

132 return content is not None 

133 

134 

135# Global convenience functions 

136def safe_read_file( 

137 file_path: Union[str, Path], encodings: Optional[List[str]] = None 

138) -> Optional[str]: 

139 """ 

140 Convenience function to safely read a single file. 

141 

142 Args: 

143 file_path: Path to the file to read 

144 encodings: List of encodings to try in order 

145 

146 Returns: 

147 File content as string, or None if reading fails 

148 """ 

149 reader = SafeFileReader(encodings=encodings) 

150 return reader.read_text(file_path) 

151 

152 

153def safe_read_lines( 

154 file_path: Union[str, Path], encodings: Optional[List[str]] = None 

155) -> List[str]: 

156 """ 

157 Convenience function to safely read file lines. 

158 

159 Args: 

160 file_path: Path to the file to read 

161 encodings: List of encodings to try in order 

162 

163 Returns: 

164 List of lines, or empty list if reading fails 

165 """ 

166 reader = SafeFileReader(encodings=encodings) 

167 return reader.read_lines(file_path) 

168 

169 

170def safe_glob_read( 

171 pattern: str, 

172 base_path: Union[str, Path] = ".", 

173 encodings: Optional[List[str]] = None, 

174) -> dict: 

175 """ 

176 Convenience function to safely read multiple files. 

177 

178 Args: 

179 pattern: Glob pattern to match files 

180 base_path: Base directory for search 

181 encodings: List of encodings to try in order 

182 

183 Returns: 

184 Dictionary mapping file paths to their contents 

185 """ 

186 reader = SafeFileReader(encodings=encodings) 

187 return reader.safe_glob_read(pattern, base_path) 

188 

189 

190if __name__ == "__main__": 

191 # Test the safe file reader 

192 import logging 

193 import sys 

194 

195 # Set up logging 

196 logging.basicConfig(level=logging.INFO) 

197 logger = logging.getLogger(__name__) 

198 

199 # Test reading this file 

200 reader = SafeFileReader() 

201 content = reader.read_text(__file__) 

202 

203 if content: 

204 print("Successfully read file with safe encoding detection") 

205 print(f"File length: {len(content)} characters") 

206 else: 

207 print("Failed to read file") 

208 sys.exit(1) 

209 

210 print("SafeFileReader test completed successfully!")