Coverage for src / dataknobs_bots / registry / portability.py: 34%

32 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-16 10:50 -0700

1"""Portability validation utilities for bot configurations. 

2 

3This module provides utilities to validate that bot configurations 

4are portable across environments. Portable configs use $resource 

5references instead of hardcoded values like local paths or localhost URLs. 

6""" 

7 

8from __future__ import annotations 

9 

10import re 

11from typing import Any 

12 

13 

14class PortabilityError(Exception): 

15 r"""Raised when a config contains non-portable values. 

16 

17 Non-portable values include: 

18 - Local file paths (/Users/..., /home/..., C:\Users\...) 

19 - Localhost URLs (localhost:port, 127.0.0.1, 0.0.0.0) 

20 

21 Portable configs should use $resource references that are 

22 resolved at runtime based on the environment. 

23 

24 Example: 

25 ```python 

26 # This will raise PortabilityError 

27 validate_portability({ 

28 "storage": {"path": "/Users/dev/data"} # Local path! 

29 }) 

30 

31 # This is OK 

32 validate_portability({ 

33 "storage": {"$resource": "default", "type": "databases"} 

34 }) 

35 ``` 

36 """ 

37 

38 pass 

39 

40 

41# Patterns that indicate resolved local values (not portable) 

42# Note: Windows paths may appear with single or double backslashes depending 

43# on whether we're matching against repr() output or actual string values 

44SUSPICIOUS_PATTERNS: list[tuple[str, str]] = [ 

45 (r"/Users/\w+", "macOS home directory"), 

46 (r"/home/\w+", "Linux home directory"), 

47 (r"C:\\+Users\\+\w+", "Windows home directory"), # Matches C:\Users or C:\\Users 

48 (r"localhost:\d+", "localhost with port"), 

49 (r"127\.0\.0\.1", "localhost IP"), 

50 (r"0\.0\.0\.0", "all interfaces IP"), 

51] 

52 

53# Patterns that are OK (environment variable placeholders) 

54SAFE_PATTERNS: list[str] = [ 

55 r"\$\{[^}]+\}", # ${VAR} or ${VAR:default} 

56 r"\$[A-Z_][A-Z0-9_]*", # $VAR 

57] 

58 

59 

60def validate_portability( 

61 config: dict[str, Any], 

62 raise_on_error: bool = True, 

63) -> list[str]: 

64 """Validate that a config is portable (no resolved local values). 

65 

66 Checks for patterns that indicate resolved environment values 

67 that would break portability across environments. 

68 

69 Args: 

70 config: Configuration dictionary to validate 

71 raise_on_error: If True, raise PortabilityError; otherwise return issues 

72 

73 Returns: 

74 List of portability issues found (empty if portable) 

75 

76 Raises: 

77 PortabilityError: If non-portable and raise_on_error=True 

78 

79 Example: 

80 ```python 

81 # This will raise PortabilityError 

82 validate_portability({ 

83 "llm": {"api_key": "sk-..."}, # OK - not a path 

84 "storage": {"path": "/Users/dev/data"}, # NOT OK - local path 

85 }) 

86 

87 # Check without raising 

88 issues = validate_portability(config, raise_on_error=False) 

89 if issues: 

90 print(f"Found {len(issues)} portability issues") 

91 

92 # This is OK - uses $resource references 

93 validate_portability({ 

94 "llm": {"$resource": "default", "type": "llm_providers"}, 

95 "storage": {"$resource": "db", "type": "databases"}, 

96 }) 

97 

98 # Environment variables are OK 

99 validate_portability({ 

100 "storage": {"path": "${DATA_PATH}"}, # OK - env var placeholder 

101 }) 

102 ``` 

103 """ 

104 config_str = str(config) 

105 issues: list[str] = [] 

106 

107 for pattern, description in SUSPICIOUS_PATTERNS: 

108 matches = re.findall(pattern, config_str) 

109 for match in matches: 

110 # Check if this match is inside a safe pattern (env var) 

111 is_safe = _is_in_safe_pattern(match, config_str) 

112 

113 if not is_safe: 

114 issues.append(f"Found {description}: '{match}'") 

115 

116 if issues and raise_on_error: 

117 raise PortabilityError( 

118 "Config appears to contain resolved local values that would break " 

119 "portability. Store portable config with $resource references instead.\n" 

120 "Issues found:\n" + "\n".join(f" - {issue}" for issue in issues) 

121 ) 

122 

123 return issues 

124 

125 

126def _is_in_safe_pattern(match: str, config_str: str) -> bool: 

127 """Check if a suspicious match is inside a safe pattern (env var). 

128 

129 Args: 

130 match: The suspicious string that was matched 

131 config_str: The full config string 

132 

133 Returns: 

134 True if the match appears inside an env var pattern 

135 """ 

136 for safe_pattern in SAFE_PATTERNS: 

137 # Check if the suspicious pattern appears inside a safe pattern 

138 # e.g., "${HOME}/data" contains "/home" but it's inside ${...} 

139 combined_pattern = f"{safe_pattern}[^'\"]*{re.escape(match)}" 

140 if re.search(combined_pattern, config_str): 

141 return True 

142 return False 

143 

144 

145def has_resource_references(config: dict[str, Any]) -> bool: 

146 """Check if config contains $resource references. 

147 

148 $resource references indicate a portable config that needs 

149 environment resolution before use. 

150 

151 Args: 

152 config: Configuration dictionary 

153 

154 Returns: 

155 True if config contains $resource references 

156 

157 Example: 

158 ```python 

159 # Portable config with $resource refs 

160 config = { 

161 "bot": { 

162 "llm": {"$resource": "default", "type": "llm_providers"}, 

163 } 

164 } 

165 assert has_resource_references(config) is True 

166 

167 # Resolved config (no $resource refs) 

168 config = { 

169 "bot": { 

170 "llm": {"provider": "openai", "model": "gpt-4"}, 

171 } 

172 } 

173 assert has_resource_references(config) is False 

174 ``` 

175 """ 

176 return "$resource" in str(config) 

177 

178 

179def is_portable(config: dict[str, Any]) -> bool: 

180 """Check if config appears to be portable. 

181 

182 A config is considered portable if it either: 

183 - Contains $resource references (for late binding), or 

184 - Contains no suspicious local values 

185 

186 Args: 

187 config: Configuration dictionary 

188 

189 Returns: 

190 True if config appears to be portable 

191 

192 Example: 

193 ```python 

194 # Portable: uses $resource 

195 assert is_portable({"llm": {"$resource": "default"}}) is True 

196 

197 # Portable: no local paths 

198 assert is_portable({"llm": {"provider": "openai"}}) is True 

199 

200 # Not portable: contains local path 

201 assert is_portable({"path": "/Users/dev/data"}) is False 

202 ``` 

203 """ 

204 # If it has $resource refs, it's portable (will be resolved later) 

205 if has_resource_references(config): 

206 return True 

207 

208 # Otherwise, check for suspicious patterns 

209 issues = validate_portability(config, raise_on_error=False) 

210 return len(issues) == 0