Coverage for src/lite_agent/client.py: 57%
91 statements
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-25 22:58 +0900
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-25 22:58 +0900
1import abc
2import os
3from typing import Any, Literal
5import litellm
6from openai.types.chat import ChatCompletionToolParam
7from openai.types.responses import FunctionToolParam
8from pydantic import BaseModel
10ReasoningEffort = Literal["minimal", "low", "medium", "high"]
11ThinkingConfig = dict[str, Any] | None
13# 统一的推理配置类型
14ReasoningConfig = (
15 str
16 | dict[str, Any] # {"type": "enabled", "budget_tokens": 2048} 或其他配置
17 | bool # True/False 简单开关
18 | None # 不启用推理
19)
22class LLMConfig(BaseModel):
23 """LLM generation parameters configuration."""
25 temperature: float | None = None
26 max_tokens: int | None = None
27 top_p: float | None = None
28 frequency_penalty: float | None = None
29 presence_penalty: float | None = None
30 stop: list[str] | str | None = None
33def parse_reasoning_config(reasoning: ReasoningConfig) -> tuple[ReasoningEffort | None, ThinkingConfig]:
34 """
35 解析统一的推理配置,返回 reasoning_effort 和 thinking_config。
37 Args:
38 reasoning: 统一的推理配置
39 - str: "minimal", "low", "medium", "high" -> reasoning_effort
40 - dict: {"type": "enabled", "budget_tokens": N} -> thinking_config
41 - bool: True -> "medium", False -> None
42 - None: 不启用推理
44 Returns:
45 tuple: (reasoning_effort, thinking_config)
46 """
47 if reasoning is None:
48 return None, None
49 if isinstance(reasoning, str):
50 # 字符串类型,使用 reasoning_effort
51 # 确保字符串是有效的 ReasoningEffort 值
52 if reasoning in ("minimal", "low", "medium", "high"):
53 return reasoning, None # type: ignore[return-value]
54 return None, None
55 if isinstance(reasoning, dict):
56 # 字典类型,使用 thinking_config
57 return None, reasoning
58 if isinstance(reasoning, bool):
59 # 布尔类型,True 使用默认的 medium,False 不启用
60 return "medium" if reasoning else None, None
61 # 其他类型,默认不启用
62 return None, None
65class BaseLLMClient(abc.ABC):
66 """Base class for LLM clients."""
68 def __init__(
69 self,
70 *,
71 model: str,
72 api_key: str | None = None,
73 api_base: str | None = None,
74 api_version: str | None = None,
75 reasoning: ReasoningConfig = None,
76 llm_config: LLMConfig | None = None,
77 **llm_params: Any, # noqa: ANN401
78 ):
79 self.model = model
80 self.api_key = api_key
81 self.api_base = api_base
82 self.api_version = api_version
84 # 处理 LLM 生成参数
85 if llm_config is not None:
86 self.llm_config = llm_config
87 else:
88 # 从 **llm_params 创建配置
89 self.llm_config = LLMConfig(**llm_params)
91 # 处理推理配置
92 self.reasoning_effort: ReasoningEffort | None
93 self.thinking_config: ThinkingConfig
94 self.reasoning_effort, self.thinking_config = parse_reasoning_config(reasoning)
96 @abc.abstractmethod
97 async def completion(
98 self,
99 messages: list[Any],
100 tools: list[ChatCompletionToolParam] | None = None,
101 tool_choice: str = "auto",
102 reasoning: ReasoningConfig = None,
103 *,
104 streaming: bool = True,
105 **kwargs: Any, # noqa: ANN401
106 ) -> Any: # noqa: ANN401
107 """Perform a completion request to the LLM."""
109 @abc.abstractmethod
110 async def responses(
111 self,
112 messages: list[dict[str, Any]], # Changed from ResponseInputParam
113 tools: list[FunctionToolParam] | None = None,
114 tool_choice: Literal["none", "auto", "required"] = "auto",
115 reasoning: ReasoningConfig = None,
116 *,
117 streaming: bool = True,
118 **kwargs: Any, # noqa: ANN401
119 ) -> Any: # noqa: ANN401
120 """Perform a response request to the LLM."""
123class LiteLLMClient(BaseLLMClient):
124 def _resolve_reasoning_params(
125 self,
126 reasoning: ReasoningConfig,
127 ) -> tuple[ReasoningEffort | None, ThinkingConfig]:
128 """解析推理配置参数。"""
129 if reasoning is not None:
130 return parse_reasoning_config(reasoning)
132 # 使用实例默认值
133 return self.reasoning_effort, self.thinking_config
135 async def completion(
136 self,
137 messages: list[Any],
138 tools: list[ChatCompletionToolParam] | None = None,
139 tool_choice: str = "auto",
140 reasoning: ReasoningConfig = None,
141 *,
142 streaming: bool = True,
143 **kwargs: Any, # noqa: ANN401
144 ) -> Any: # noqa: ANN401
145 """Perform a completion request to the Litellm API."""
147 # 处理推理配置参数
148 final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
149 reasoning,
150 )
152 # Prepare completion parameters
153 completion_params = {
154 "model": self.model,
155 "messages": messages,
156 "tools": tools,
157 "tool_choice": tool_choice,
158 "api_version": self.api_version,
159 "api_key": self.api_key,
160 "api_base": self.api_base,
161 "stream": streaming,
162 **kwargs,
163 }
165 # Add LLM generation parameters if specified
166 if self.llm_config.temperature is not None:
167 completion_params["temperature"] = self.llm_config.temperature
168 if self.llm_config.max_tokens is not None:
169 completion_params["max_tokens"] = self.llm_config.max_tokens
170 if self.llm_config.top_p is not None:
171 completion_params["top_p"] = self.llm_config.top_p
172 if self.llm_config.frequency_penalty is not None:
173 completion_params["frequency_penalty"] = self.llm_config.frequency_penalty
174 if self.llm_config.presence_penalty is not None:
175 completion_params["presence_penalty"] = self.llm_config.presence_penalty
176 if self.llm_config.stop is not None:
177 completion_params["stop"] = self.llm_config.stop
179 # Add reasoning parameters if specified
180 if final_reasoning_effort is not None:
181 completion_params["reasoning_effort"] = final_reasoning_effort
182 if final_thinking_config is not None:
183 completion_params["thinking"] = final_thinking_config
185 return await litellm.acompletion(**completion_params)
187 async def responses(
188 self,
189 messages: list[dict[str, Any]], # Changed from ResponseInputParam
190 tools: list[FunctionToolParam] | None = None,
191 tool_choice: Literal["none", "auto", "required"] = "auto",
192 reasoning: ReasoningConfig = None,
193 *,
194 streaming: bool = True,
195 **kwargs: Any, # noqa: ANN401
196 ) -> Any: # type: ignore[return] # noqa: ANN401
197 """Perform a response request to the Litellm API."""
199 os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"
201 # 处理推理配置参数
202 final_reasoning_effort, final_thinking_config = self._resolve_reasoning_params(
203 reasoning,
204 )
206 # Prepare response parameters
207 response_params = {
208 "model": self.model,
209 "input": messages, # type: ignore[arg-type]
210 "tools": tools,
211 "tool_choice": tool_choice,
212 "api_version": self.api_version,
213 "api_key": self.api_key,
214 "api_base": self.api_base,
215 "stream": streaming,
216 "store": False,
217 **kwargs,
218 }
220 # Add LLM generation parameters if specified
221 if self.llm_config.temperature is not None:
222 response_params["temperature"] = self.llm_config.temperature
223 if self.llm_config.max_tokens is not None:
224 response_params["max_tokens"] = self.llm_config.max_tokens
225 if self.llm_config.top_p is not None:
226 response_params["top_p"] = self.llm_config.top_p
227 if self.llm_config.frequency_penalty is not None:
228 response_params["frequency_penalty"] = self.llm_config.frequency_penalty
229 if self.llm_config.presence_penalty is not None:
230 response_params["presence_penalty"] = self.llm_config.presence_penalty
231 if self.llm_config.stop is not None:
232 response_params["stop"] = self.llm_config.stop
234 # Add reasoning parameters if specified
235 if final_reasoning_effort is not None:
236 response_params["reasoning_effort"] = final_reasoning_effort
237 if final_thinking_config is not None:
238 response_params["thinking"] = final_thinking_config
240 return await litellm.aresponses(**response_params) # type: ignore[return-value]