Coverage for src/lite_agent/client.py: 57%

91 statements  

« prev     ^ index     » next       coverage.py v7.10.5, created at 2025-08-25 22:58 +0900

1import abc 

2import os 

3from typing import Any, Literal 

4 

5import litellm 

6from openai.types.chat import ChatCompletionToolParam 

7from openai.types.responses import FunctionToolParam 

8from pydantic import BaseModel 

9 

10ReasoningEffort = Literal["minimal", "low", "medium", "high"] 

11ThinkingConfig = dict[str, Any] | None 

12 

13# 统一的推理配置类型 

14ReasoningConfig = ( 

15 str 

16 | dict[str, Any] # {"type": "enabled", "budget_tokens": 2048} 或其他配置 

17 | bool # True/False 简单开关 

18 | None # 不启用推理 

19) 

20 

21 

22class LLMConfig(BaseModel): 

23 """LLM generation parameters configuration.""" 

24 

25 temperature: float | None = None 

26 max_tokens: int | None = None 

27 top_p: float | None = None 

28 frequency_penalty: float | None = None 

29 presence_penalty: float | None = None 

30 stop: list[str] | str | None = None 

31 

32 

33def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]: 

34 """ 

35 解析统一的推理配置,返回 reasoning_effort 和 thinking_config。 

36 

37 Args: 

38 reasoning: 统一的推理配置 

39 - str: "minimal", "low", "medium", "high" -> reasoning_effort 

40 - dict: {"type": "enabled", "budget_tokens": N} -> thinking_config 

41 - bool: True -> "medium", False -> None 

42 - None: 不启用推理 

43 

44 Returns: 

45 tuple: (reasoning_effort, thinking_config) 

46 """ 

47 if reasoning is None: 

48 return None, None 

49 if isinstance(reasoning, str): 

50 # 字符串类型,使用 reasoning_effort 

51 # 确保字符串是有效的 ReasoningEffort 值 

52 if reasoning in ("minimal", "low", "medium", "high"): 

53 return reasoning, None # type: ignore[return-value] 

54 return None, None 

55 if isinstance(reasoning, dict): 

56 # 字典类型,使用 thinking_config 

57 return None, reasoning 

58 if isinstance(reasoning, bool): 

59 # 布尔类型,True 使用默认的 medium,False 不启用 

60 return "medium" if reasoning else None, None 

61 # 其他类型,默认不启用 

62 return None, None 

63 

64 

65class BaseLLMClient(abc.ABC): 

66 """Base class for LLM clients.""" 

67 

68 def __init__( 

69 self, 

70 *, 

71 model: str, 

72 api_key: str | None = None, 

73 api_base: str | None = None, 

74 api_version: str | None = None, 

75 reasoning: ReasoningConfig = None, 

76 llm_config: LLMConfig | None = None, 

77 **llm_params: Any, # noqa: ANN401 

78 ): 

79 self.model = model 

80 self.api_key = api_key 

81 self.api_base = api_base 

82 self.api_version = api_version 

83 

84 # 处理 LLM 生成参数 

85 if llm_config is not None: 

86 self.llm_config = llm_config 

87 else: 

88 # 从 **llm_params 创建配置 

89 self.llm_config = LLMConfig(**llm_params) 

90 

91 # 处理推理配置 

92 self.reasoning_effort: ReasoningEffort | None 

93 self.thinking_config: ThinkingConfig 

94 self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning) 

95 

96 @abc.abstractmethod 

97 async def completion( 

98 self, 

99 messages: list[Any], 

100 tools: list[ChatCompletionToolParam] | None = None, 

101 tool_choice: str = "auto", 

102 reasoning: ReasoningConfig = None, 

103 *, 

104 streaming: bool = True, 

105 **kwargs: Any, # noqa: ANN401 

106 ) -> Any: # noqa: ANN401 

107 """Perform a completion request to the LLM.""" 

108 

109 @abc.abstractmethod 

110 async def responses( 

111 self, 

112 messages: list[dict[str, Any]], # Changed from ResponseInputParam 

113 tools: list[FunctionToolParam] | None = None, 

114 tool_choice: Literal["none", "auto", "required"] = "auto", 

115 reasoning: ReasoningConfig = None, 

116 *, 

117 streaming: bool = True, 

118 **kwargs: Any, # noqa: ANN401 

119 ) -> Any: # noqa: ANN401 

120 """Perform a response request to the LLM.""" 

121 

122 

123class LiteLLMClient(BaseLLMClient): 

124 def _resolve_reasoning_params( 

125 self, 

126 reasoning: ReasoningConfig, 

127 ) -> tuple[ReasoningEffort | None, ThinkingConfig]: 

128 """解析推理配置参数。""" 

129 if reasoning is not None: 

130 return parse_reasoning_config(reasoning) 

131 

132 # 使用实例默认值 

133 return self.reasoning_effort, self.thinking_config 

134 

135 async def completion( 

136 self, 

137 messages: list[Any], 

138 tools: list[ChatCompletionToolParam] | None = None, 

139 tool_choice: str = "auto", 

140 reasoning: ReasoningConfig = None, 

141 *, 

142 streaming: bool = True, 

143 **kwargs: Any, # noqa: ANN401 

144 ) -> Any: # noqa: ANN401 

145 """Perform a completion request to the Litellm API.""" 

146 

147 # 处理推理配置参数 

148 final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params( 

149 reasoning, 

150 ) 

151 

152 # Prepare completion parameters 

153 completion_params = { 

154 "model": self.model, 

155 "messages": messages, 

156 "tools": tools, 

157 "tool_choice": tool_choice, 

158 "api_version": self.api_version, 

159 "api_key": self.api_key, 

160 "api_base": self.api_base, 

161 "stream": streaming, 

162 **kwargs, 

163 } 

164 

165 # Add LLM generation parameters if specified 

166 if self.llm_config.temperature is not None: 

167 completion_params["temperature"] = self.llm_config.temperature 

168 if self.llm_config.max_tokens is not None: 

169 completion_params["max_tokens"] = self.llm_config.max_tokens 

170 if self.llm_config.top_p is not None: 

171 completion_params["top_p"] = self.llm_config.top_p 

172 if self.llm_config.frequency_penalty is not None: 

173 completion_params["frequency_penalty"] = self.llm_config.frequency_penalty 

174 if self.llm_config.presence_penalty is not None: 

175 completion_params["presence_penalty"] = self.llm_config.presence_penalty 

176 if self.llm_config.stop is not None: 

177 completion_params["stop"] = self.llm_config.stop 

178 

179 # Add reasoning parameters if specified 

180 if final_reasoning_effort is not None: 

181 completion_params["reasoning_effort"] = final_reasoning_effort 

182 if final_thinking_config is not None: 

183 completion_params["thinking"] = final_thinking_config 

184 

185 return await litellm.acompletion(**completion_params) 

186 

187 async def responses( 

188 self, 

189 messages: list[dict[str, Any]], # Changed from ResponseInputParam 

190 tools: list[FunctionToolParam] | None = None, 

191 tool_choice: Literal["none", "auto", "required"] = "auto", 

192 reasoning: ReasoningConfig = None, 

193 *, 

194 streaming: bool = True, 

195 **kwargs: Any, # noqa: ANN401 

196 ) -> Any: # type: ignore[return] # noqa: ANN401 

197 """Perform a response request to the Litellm API.""" 

198 

199 os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True" 

200 

201 # 处理推理配置参数 

202 final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params( 

203 reasoning, 

204 ) 

205 

206 # Prepare response parameters 

207 response_params = { 

208 "model": self.model, 

209 "input": messages, # type: ignore[arg-type] 

210 "tools": tools, 

211 "tool_choice": tool_choice, 

212 "api_version": self.api_version, 

213 "api_key": self.api_key, 

214 "api_base": self.api_base, 

215 "stream": streaming, 

216 "store": False, 

217 **kwargs, 

218 } 

219 

220 # Add LLM generation parameters if specified 

221 if self.llm_config.temperature is not None: 

222 response_params["temperature"] = self.llm_config.temperature 

223 if self.llm_config.max_tokens is not None: 

224 response_params["max_tokens"] = self.llm_config.max_tokens 

225 if self.llm_config.top_p is not None: 

226 response_params["top_p"] = self.llm_config.top_p 

227 if self.llm_config.frequency_penalty is not None: 

228 response_params["frequency_penalty"] = self.llm_config.frequency_penalty 

229 if self.llm_config.presence_penalty is not None: 

230 response_params["presence_penalty"] = self.llm_config.presence_penalty 

231 if self.llm_config.stop is not None: 

232 response_params["stop"] = self.llm_config.stop 

233 

234 # Add reasoning parameters if specified 

235 if final_reasoning_effort is not None: 

236 response_params["reasoning_effort"] = final_reasoning_effort 

237 if final_thinking_config is not None: 

238 response_params["thinking"] = final_thinking_config 

239 

240 return await litellm.aresponses(**response_params) # type: ignore[return-value]