Coverage for src / dataknobs_bots / bot / registry.py: 30%

139 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-16 10:50 -0700

1"""Multi-tenant bot registry with pluggable storage backends. 

2 

3This module provides a registry for managing bot configurations and instances 

4across multiple tenants. It combines: 

5- Pluggable storage backends (via RegistryBackend protocol) 

6- Environment-aware configuration resolution 

7- Portability validation for cross-environment deployments 

8- Bot instance caching with TTL 

9 

10Example: 

11 ```python 

12 from dataknobs_bots.bot import BotRegistry 

13 from dataknobs_bots.registry import InMemoryBackend 

14 

15 # Create registry with in-memory storage 

16 registry = BotRegistry( 

17 backend=InMemoryBackend(), 

18 environment="production", 

19 ) 

20 await registry.initialize() 

21 

22 # Register a portable bot configuration 

23 await registry.register("my-bot", { 

24 "bot": { 

25 "llm": {"$resource": "default", "type": "llm_providers"}, 

26 "conversation_storage": {"$resource": "db", "type": "databases"}, 

27 } 

28 }) 

29 

30 # Get bot instance (resolves $resource references) 

31 bot = await registry.get_bot("my-bot") 

32 response = await bot.chat(message, context) 

33 

34 # Cleanup 

35 await registry.close() 

36 ``` 

37""" 

38 

39from __future__ import annotations 

40 

41import asyncio 

42import logging 

43import time 

44from pathlib import Path 

45from typing import TYPE_CHECKING, Any 

46 

47from ..registry import InMemoryBackend, RegistryBackend, validate_portability 

48from .base import DynaBot 

49 

50if TYPE_CHECKING: 

51 from dataknobs_config import EnvironmentConfig 

52 

53 from ..registry import Registration 

54 

55logger = logging.getLogger(__name__) 

56 

57 

58class BotRegistry: 

59 """Multi-tenant bot registry with caching and environment support. 

60 

61 The BotRegistry manages multiple bot instances for different clients/tenants. 

62 It provides: 

63 - Pluggable storage backends via RegistryBackend protocol 

64 - Environment-aware configuration resolution 

65 - Portability validation to ensure configs work across environments 

66 - LRU-style caching with TTL for bot instances 

67 - Thread-safe access 

68 

69 This enables: 

70 - Multi-tenant SaaS platforms 

71 - A/B testing with different bot configurations 

72 - Horizontal scaling with stateless bot instances 

73 - Cross-environment deployment with portable configs 

74 

75 Attributes: 

76 backend: Storage backend for configurations 

77 environment: Environment for $resource resolution 

78 cache_ttl: Time-to-live for cached bots in seconds 

79 max_cache_size: Maximum number of bots to cache 

80 

81 Example: 

82 ```python 

83 from dataknobs_bots.bot import BotRegistry 

84 from dataknobs_bots.registry import InMemoryBackend 

85 

86 # Create registry 

87 registry = BotRegistry( 

88 backend=InMemoryBackend(), 

89 environment="production", 

90 cache_ttl=300, 

91 ) 

92 await registry.initialize() 

93 

94 # Register portable configuration 

95 await registry.register("client-123", { 

96 "bot": { 

97 "llm": {"$resource": "default", "type": "llm_providers"}, 

98 } 

99 }) 

100 

101 # Get bot for a client 

102 bot = await registry.get_bot("client-123") 

103 

104 # Use the bot 

105 response = await bot.chat(message, context) 

106 ``` 

107 """ 

108 

109 def __init__( 

110 self, 

111 backend: RegistryBackend | None = None, 

112 environment: EnvironmentConfig | str | None = None, 

113 env_dir: str | Path = "config/environments", 

114 cache_ttl: int = 300, 

115 max_cache_size: int = 1000, 

116 validate_on_register: bool = True, 

117 config_key: str = "bot", 

118 ): 

119 """Initialize bot registry. 

120 

121 Args: 

122 backend: Storage backend for configurations. 

123 If None, uses InMemoryBackend. 

124 environment: Environment name or EnvironmentConfig for 

125 $resource resolution. If None, configs are used as-is 

126 without environment resolution. 

127 env_dir: Directory containing environment config files. 

128 Only used if environment is a string name. 

129 cache_ttl: Cache time-to-live in seconds (default: 300) 

130 max_cache_size: Maximum cached bots (default: 1000) 

131 validate_on_register: If True, validate config portability 

132 when registering (default: True) 

133 config_key: Key within config containing bot configuration. 

134 Defaults to "bot". Used during environment resolution. 

135 """ 

136 self._backend = backend or InMemoryBackend() 

137 self._env_dir = Path(env_dir) 

138 self._cache_ttl = cache_ttl 

139 self._max_cache_size = max_cache_size 

140 self._validate_on_register = validate_on_register 

141 self._config_key = config_key 

142 

143 # Bot instance cache: bot_id -> (DynaBot, cached_timestamp) 

144 self._cache: dict[str, tuple[DynaBot, float]] = {} 

145 self._lock = asyncio.Lock() 

146 self._initialized = False 

147 

148 # Load environment config if specified 

149 self._environment: EnvironmentConfig | None = None 

150 if environment is not None: 

151 try: 

152 from dataknobs_config import EnvironmentConfig as EnvConfig 

153 

154 if isinstance(environment, str): 

155 self._environment = EnvConfig.load(environment, env_dir) 

156 else: 

157 self._environment = environment 

158 logger.info(f"BotRegistry using environment: {self._environment.name}") 

159 except ImportError: 

160 logger.warning( 

161 "dataknobs_config not installed, environment-aware features disabled" 

162 ) 

163 

164 @property 

165 def backend(self) -> RegistryBackend: 

166 """Get the storage backend.""" 

167 return self._backend 

168 

169 @property 

170 def environment(self) -> EnvironmentConfig | None: 

171 """Get current environment config, or None if not environment-aware.""" 

172 return self._environment 

173 

174 @property 

175 def environment_name(self) -> str | None: 

176 """Get current environment name, or None if not environment-aware.""" 

177 return self._environment.name if self._environment else None 

178 

179 @property 

180 def cache_ttl(self) -> int: 

181 """Get cache TTL in seconds.""" 

182 return self._cache_ttl 

183 

184 @property 

185 def max_cache_size(self) -> int: 

186 """Get maximum cache size.""" 

187 return self._max_cache_size 

188 

189 async def initialize(self) -> None: 

190 """Initialize the registry and backend. 

191 

192 Must be called before using the registry. 

193 """ 

194 if not self._initialized: 

195 await self._backend.initialize() 

196 self._initialized = True 

197 logger.info("BotRegistry initialized") 

198 

199 async def close(self) -> None: 

200 """Close the registry and backend. 

201 

202 Clears the bot cache and closes the storage backend. 

203 """ 

204 async with self._lock: 

205 self._cache.clear() 

206 await self._backend.close() 

207 self._initialized = False 

208 logger.info("BotRegistry closed") 

209 

210 async def register( 

211 self, 

212 bot_id: str, 

213 config: dict[str, Any], 

214 status: str = "active", 

215 skip_validation: bool = False, 

216 ) -> Registration: 

217 """Register or update a bot configuration. 

218 

219 Stores a portable configuration in the backend. By default, validates 

220 that the configuration is portable (no resolved local values). 

221 

222 Args: 

223 bot_id: Unique bot identifier 

224 config: Bot configuration dictionary (should be portable) 

225 status: Registration status (default: active) 

226 skip_validation: If True, skip portability validation 

227 

228 Returns: 

229 Registration object with metadata 

230 

231 Raises: 

232 PortabilityError: If config is not portable and validation is enabled 

233 

234 Example: 

235 ```python 

236 # Register with portable config 

237 reg = await registry.register("support-bot", { 

238 "bot": { 

239 "llm": {"$resource": "default", "type": "llm_providers"}, 

240 } 

241 }) 

242 print(f"Registered at: {reg.created_at}") 

243 

244 # Update existing registration 

245 reg = await registry.register("support-bot", new_config) 

246 print(f"Updated at: {reg.updated_at}") 

247 ``` 

248 """ 

249 # Validate portability if enabled 

250 if self._validate_on_register and not skip_validation: 

251 validate_portability(config) 

252 

253 # Store in backend 

254 registration = await self._backend.register(bot_id, config, status) 

255 

256 # Invalidate cache for this bot 

257 async with self._lock: 

258 if bot_id in self._cache: 

259 del self._cache[bot_id] 

260 logger.debug(f"Invalidated cache for bot: {bot_id}") 

261 

262 logger.info(f"Registered bot: {bot_id}") 

263 return registration 

264 

265 async def get_bot( 

266 self, 

267 bot_id: str, 

268 force_refresh: bool = False, 

269 ) -> DynaBot: 

270 """Get bot instance for a client. 

271 

272 Bots are cached for performance. If a cached bot exists and hasn't 

273 expired, it's returned. Otherwise, a new bot is created from the 

274 stored configuration with environment resolution applied. 

275 

276 Args: 

277 bot_id: Bot identifier 

278 force_refresh: If True, bypass cache and create fresh bot 

279 

280 Returns: 

281 DynaBot instance for the client 

282 

283 Raises: 

284 KeyError: If no registration exists for the bot_id 

285 ValueError: If bot configuration is invalid 

286 

287 Example: 

288 ```python 

289 # Get cached bot 

290 bot = await registry.get_bot("client-123") 

291 

292 # Force refresh (e.g., after config change) 

293 bot = await registry.get_bot("client-123", force_refresh=True) 

294 ``` 

295 """ 

296 async with self._lock: 

297 # Check cache 

298 if not force_refresh and bot_id in self._cache: 

299 bot, cached_at = self._cache[bot_id] 

300 if time.time() - cached_at < self._cache_ttl: 

301 logger.debug(f"Returning cached bot: {bot_id}") 

302 return bot 

303 

304 # Load configuration from backend 

305 config = await self._backend.get_config(bot_id) 

306 if config is None: 

307 raise KeyError(f"No bot configuration found for: {bot_id}") 

308 

309 # Create bot with environment resolution if configured 

310 if self._environment is not None: 

311 logger.debug(f"Creating bot with environment resolution: {bot_id}") 

312 bot = await DynaBot.from_environment_aware_config( 

313 config, 

314 environment=self._environment, 

315 env_dir=self._env_dir, 

316 config_key=self._config_key, 

317 ) 

318 else: 

319 # Traditional path - use config as-is 

320 # Extract bot config if wrapped in config_key 

321 bot_config = config.get(self._config_key, config) 

322 logger.debug(f"Creating bot without environment resolution: {bot_id}") 

323 bot = await DynaBot.from_config(bot_config) 

324 

325 # Cache the bot 

326 self._cache[bot_id] = (bot, time.time()) 

327 logger.info(f"Created bot: {bot_id}") 

328 

329 # Evict old entries if cache is full 

330 if len(self._cache) > self._max_cache_size: 

331 self._evict_oldest() 

332 

333 return bot 

334 

335 async def get_config(self, bot_id: str) -> dict[str, Any] | None: 

336 """Get stored configuration for a bot. 

337 

338 Returns the portable configuration as stored, without 

339 environment resolution applied. 

340 

341 Args: 

342 bot_id: Bot identifier 

343 

344 Returns: 

345 Configuration dict if found, None otherwise 

346 """ 

347 return await self._backend.get_config(bot_id) 

348 

349 async def get_registration(self, bot_id: str) -> Registration | None: 

350 """Get full registration including metadata. 

351 

352 Args: 

353 bot_id: Bot identifier 

354 

355 Returns: 

356 Registration if found, None otherwise 

357 """ 

358 return await self._backend.get(bot_id) 

359 

360 async def unregister(self, bot_id: str) -> bool: 

361 """Remove a bot registration (hard delete). 

362 

363 Args: 

364 bot_id: Bot identifier 

365 

366 Returns: 

367 True if removed, False if not found 

368 """ 

369 # Remove from cache 

370 async with self._lock: 

371 if bot_id in self._cache: 

372 del self._cache[bot_id] 

373 

374 result = await self._backend.unregister(bot_id) 

375 if result: 

376 logger.info(f"Unregistered bot: {bot_id}") 

377 return result 

378 

379 async def deactivate(self, bot_id: str) -> bool: 

380 """Deactivate a bot registration (soft delete). 

381 

382 Args: 

383 bot_id: Bot identifier 

384 

385 Returns: 

386 True if deactivated, False if not found 

387 """ 

388 # Remove from cache 

389 async with self._lock: 

390 if bot_id in self._cache: 

391 del self._cache[bot_id] 

392 

393 result = await self._backend.deactivate(bot_id) 

394 if result: 

395 logger.info(f"Deactivated bot: {bot_id}") 

396 return result 

397 

398 async def exists(self, bot_id: str) -> bool: 

399 """Check if an active bot registration exists. 

400 

401 Args: 

402 bot_id: Bot identifier 

403 

404 Returns: 

405 True if registration exists and is active 

406 """ 

407 return await self._backend.exists(bot_id) 

408 

409 async def list_bots(self) -> list[str]: 

410 """List all active bot IDs. 

411 

412 Returns: 

413 List of active bot identifiers 

414 """ 

415 return await self._backend.list_ids() 

416 

417 async def count(self) -> int: 

418 """Count active bot registrations. 

419 

420 Returns: 

421 Number of active registrations 

422 """ 

423 return await self._backend.count() 

424 

425 def get_cached_bots(self) -> list[str]: 

426 """Get list of currently cached bot IDs. 

427 

428 Returns: 

429 List of bot IDs with cached instances 

430 """ 

431 return list(self._cache.keys()) 

432 

433 def clear_cache(self) -> None: 

434 """Clear all cached bot instances. 

435 

436 Does not affect stored registrations. 

437 """ 

438 self._cache.clear() 

439 logger.debug("Cleared bot cache") 

440 

441 def _evict_oldest(self) -> None: 

442 """Evict oldest cache entries when cache is full. 

443 

444 Removes 10% of the oldest entries to make room for new ones. 

445 """ 

446 # Sort by timestamp (oldest first) 

447 sorted_items = sorted(self._cache.items(), key=lambda x: x[1][1]) 

448 

449 # Remove oldest 10% 

450 num_to_remove = max(1, len(sorted_items) // 10) 

451 for bot_id, _ in sorted_items[:num_to_remove]: 

452 del self._cache[bot_id] 

453 logger.debug(f"Evicted {num_to_remove} bots from cache") 

454 

455 # Legacy compatibility methods 

456 

457 async def register_client( 

458 self, client_id: str, bot_config: dict[str, Any] 

459 ) -> None: 

460 """Register or update a client's bot configuration. 

461 

462 .. deprecated:: 

463 Use :meth:`register` instead. 

464 

465 Args: 

466 client_id: Client/tenant identifier 

467 bot_config: Bot configuration dictionary 

468 """ 

469 await self.register(client_id, bot_config) 

470 

471 async def remove_client(self, client_id: str) -> None: 

472 """Remove a client from the registry. 

473 

474 .. deprecated:: 

475 Use :meth:`unregister` instead. 

476 

477 Args: 

478 client_id: Client/tenant identifier 

479 """ 

480 await self.unregister(client_id) 

481 

482 def get_cached_clients(self) -> list[str]: 

483 """Get list of currently cached client IDs. 

484 

485 .. deprecated:: 

486 Use :meth:`get_cached_bots` instead. 

487 

488 Returns: 

489 List of client IDs with cached bots 

490 """ 

491 return self.get_cached_bots() 

492 

493 def __repr__(self) -> str: 

494 """String representation.""" 

495 env = f", environment={self._environment.name!r}" if self._environment else "" 

496 return ( 

497 f"BotRegistry(backend={self._backend!r}, " 

498 f"cached={len(self._cache)}{env})" 

499 ) 

500 

501 

502class InMemoryBotRegistry(BotRegistry): 

503 """BotRegistry with in-memory storage backend. 

504 

505 A convenience subclass that uses InMemoryBackend for storage, 

506 suitable for testing, CLIs, and single-instance deployments. 

507 

508 Unlike the base BotRegistry which accepts a pluggable backend, 

509 this class always uses in-memory storage and doesn't require 

510 external dependencies like databases. 

511 

512 Example: 

513 ```python 

514 from dataknobs_bots.bot import InMemoryBotRegistry 

515 

516 # For testing - no environment resolution 

517 registry = InMemoryBotRegistry(validate_on_register=False) 

518 await registry.initialize() 

519 

520 await registry.register("test-bot", {"llm": {"provider": "echo"}}) 

521 bot = await registry.get_bot("test-bot") 

522 

523 # For development with environment 

524 registry = InMemoryBotRegistry(environment="development") 

525 await registry.initialize() 

526 ``` 

527 """ 

528 

529 def __init__( 

530 self, 

531 environment: EnvironmentConfig | str | None = None, 

532 env_dir: str | Path = "config/environments", 

533 cache_ttl: int = 300, 

534 max_cache_size: int = 1000, 

535 validate_on_register: bool = True, 

536 config_key: str = "bot", 

537 ): 

538 """Initialize in-memory bot registry. 

539 

540 Args: 

541 environment: Environment name or EnvironmentConfig for 

542 $resource resolution. If None, configs are used as-is 

543 without environment resolution. 

544 env_dir: Directory containing environment config files. 

545 Only used if environment is a string name. 

546 cache_ttl: Cache time-to-live in seconds (default: 300) 

547 max_cache_size: Maximum cached bots (default: 1000) 

548 validate_on_register: If True, validate config portability 

549 when registering (default: True) 

550 config_key: Key within config containing bot configuration. 

551 Defaults to "bot". Used during environment resolution. 

552 """ 

553 super().__init__( 

554 backend=InMemoryBackend(), 

555 environment=environment, 

556 env_dir=env_dir, 

557 cache_ttl=cache_ttl, 

558 max_cache_size=max_cache_size, 

559 validate_on_register=validate_on_register, 

560 config_key=config_key, 

561 ) 

562 

563 async def clear(self) -> None: 

564 """Clear all registrations and cached bots. 

565 

566 Convenience method for test cleanup that clears both the 

567 backend storage and the bot instance cache. 

568 

569 Example: 

570 ```python 

571 # In tests - reset between test cases 

572 await registry.clear() 

573 assert await registry.count() == 0 

574 ``` 

575 """ 

576 await self._backend.clear() 

577 self._cache.clear() 

578 logger.debug("Cleared all registrations and cache") 

579 

580 def __repr__(self) -> str: 

581 """String representation.""" 

582 env = f", environment={self._environment.name!r}" if self._environment else "" 

583 return f"InMemoryBotRegistry(cached={len(self._cache)}{env})" 

584 

585 

586def create_memory_registry( 

587 environment: EnvironmentConfig | str | None = None, 

588 env_dir: str | Path = "config/environments", 

589 cache_ttl: int = 300, 

590 max_cache_size: int = 1000, 

591 validate_on_register: bool = True, 

592 config_key: str = "bot", 

593) -> InMemoryBotRegistry: 

594 """Create an InMemoryBotRegistry. 

595 

596 Convenience factory for creating in-memory registries suitable for 

597 testing, CLIs, or single-instance deployments. 

598 

599 Args: 

600 environment: Environment name or EnvironmentConfig for 

601 $resource resolution. If None, configs are used as-is. 

602 env_dir: Directory containing environment config files. 

603 cache_ttl: Cache time-to-live in seconds (default: 300) 

604 max_cache_size: Maximum cached bots (default: 1000) 

605 validate_on_register: If True, validate config portability 

606 config_key: Key within config containing bot configuration 

607 

608 Returns: 

609 InMemoryBotRegistry instance 

610 

611 Example: 

612 ```python 

613 from dataknobs_bots.bot import create_memory_registry 

614 

615 registry = create_memory_registry(validate_on_register=False) 

616 await registry.initialize() 

617 

618 await registry.register("test-bot", {"llm": {"provider": "echo"}}) 

619 bot = await registry.get_bot("test-bot") 

620 ``` 

621 """ 

622 return InMemoryBotRegistry( 

623 environment=environment, 

624 env_dir=env_dir, 

625 cache_ttl=cache_ttl, 

626 max_cache_size=max_cache_size, 

627 validate_on_register=validate_on_register, 

628 config_key=config_key, 

629 )