Coverage for src / moai_adk / utils / safe_file_reader.py: 0.00%
66 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-20 20:52 +0900
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-20 20:52 +0900
1#!/usr/bin/env python3
2"""
3Safe File Reader Utility
5Provides safe file reading with multiple encoding fallbacks and error handling.
6Resolves UTF-8 encoding issues found in MoAI-ADK project.
8Author: Alfred@MoAI
9Date: 2025-11-11
10"""
12from pathlib import Path
13from typing import List, Optional, Union
16class SafeFileReader:
17 """
18 Safe file reader with encoding fallback support.
20 Handles various encoding issues including:
21 - UTF-8 vs CP1252 encoding conflicts
22 - Binary files with special characters
23 - File system encoding issues
24 """
26 # Encoding priority order (most common to least common)
27 DEFAULT_ENCODINGS = [
28 "utf-8", # Standard UTF-8
29 "cp1252", # Windows-1252 (Western European)
30 "iso-8859-1", # Latin-1 (Western European)
31 "latin1", # Alternative Latin-1
32 "utf-16", # UTF-16 with BOM detection
33 "ascii", # Pure ASCII fallback
34 ]
36 def __init__(self, encodings: Optional[List[str]] = None, errors: str = "ignore"):
37 """
38 Initialize SafeFileReader.
40 Args:
41 encodings: List of encodings to try in order
42 errors: Error handling strategy ('ignore', 'replace', 'strict')
43 """
44 self.encodings = encodings or self.DEFAULT_ENCODINGS
45 self.errors = errors
47 def read_text(self, file_path: Union[str, Path]) -> Optional[str]:
48 """
49 Safely read text file with encoding fallbacks.
51 Args:
52 file_path: Path to the file to read
54 Returns:
55 File content as string, or None if all attempts fail
56 """
57 file_path = Path(file_path)
59 if not file_path.exists():
60 return None
62 # Try each encoding in order
63 for encoding in self.encodings:
64 try:
65 return file_path.read_text(encoding=encoding)
66 except UnicodeDecodeError:
67 continue
68 except Exception as e:
69 # Log non-decoding errors but continue
70 print(f"Warning: Error reading {file_path} with {encoding}: {e}")
71 continue
73 # Final fallback with specified error handling
74 try:
75 return file_path.read_text(encoding="utf-8", errors=self.errors)
76 except Exception as e:
77 print(f"Error: Could not read {file_path}: {e}")
78 return None
80 def read_lines(self, file_path: Union[str, Path]) -> List[str]:
81 """
82 Safely read file as list of lines.
84 Args:
85 file_path: Path to the file to read
87 Returns:
88 List of lines, or empty list if reading fails
89 """
90 content = self.read_text(file_path)
91 if content is None:
92 return []
94 return content.splitlines(keepends=True)
96 def safe_glob_read(self, pattern: str, base_path: Union[str, Path] = ".") -> dict:
97 """
98 Safely read multiple files matching a glob pattern.
100 Args:
101 pattern: Glob pattern to match files
102 base_path: Base directory for glob search
104 Returns:
105 Dictionary mapping file paths to their contents
106 """
107 base_path = Path(base_path)
108 results = {}
110 try:
111 for file_path in base_path.glob(pattern):
112 if file_path.is_file():
113 content = self.read_text(file_path)
114 if content is not None:
115 results[str(file_path)] = content
116 except Exception as e:
117 print(f"Error: Failed to glob pattern '{pattern}': {e}")
119 return results
121 def is_safe_file(self, file_path: Union[str, Path]) -> bool:
122 """
123 Check if file can be safely read.
125 Args:
126 file_path: Path to the file to check
128 Returns:
129 True if file can be read safely, False otherwise
130 """
131 content = self.read_text(file_path)
132 return content is not None
135# Global convenience functions
136def safe_read_file(
137 file_path: Union[str, Path], encodings: Optional[List[str]] = None
138) -> Optional[str]:
139 """
140 Convenience function to safely read a single file.
142 Args:
143 file_path: Path to the file to read
144 encodings: List of encodings to try in order
146 Returns:
147 File content as string, or None if reading fails
148 """
149 reader = SafeFileReader(encodings=encodings)
150 return reader.read_text(file_path)
153def safe_read_lines(
154 file_path: Union[str, Path], encodings: Optional[List[str]] = None
155) -> List[str]:
156 """
157 Convenience function to safely read file lines.
159 Args:
160 file_path: Path to the file to read
161 encodings: List of encodings to try in order
163 Returns:
164 List of lines, or empty list if reading fails
165 """
166 reader = SafeFileReader(encodings=encodings)
167 return reader.read_lines(file_path)
170def safe_glob_read(
171 pattern: str,
172 base_path: Union[str, Path] = ".",
173 encodings: Optional[List[str]] = None,
174) -> dict:
175 """
176 Convenience function to safely read multiple files.
178 Args:
179 pattern: Glob pattern to match files
180 base_path: Base directory for search
181 encodings: List of encodings to try in order
183 Returns:
184 Dictionary mapping file paths to their contents
185 """
186 reader = SafeFileReader(encodings=encodings)
187 return reader.safe_glob_read(pattern, base_path)
190if __name__ == "__main__":
191 # Test the safe file reader
192 import logging
193 import sys
195 # Set up logging
196 logging.basicConfig(level=logging.INFO)
197 logger = logging.getLogger(__name__)
199 # Test reading this file
200 reader = SafeFileReader()
201 content = reader.read_text(__file__)
203 if content:
204 print("Successfully read file with safe encoding detection")
205 print(f"File length: {len(content)} characters")
206 else:
207 print("Failed to read file")
208 sys.exit(1)
210 print("SafeFileReader test completed successfully!")