Coverage for src / dataknobs_bots / knowledge / query / expander.py: 21%
73 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-16 10:13 -0700
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-16 10:13 -0700
1"""Contextual query expansion using conversation history.
3This module provides query expansion without requiring LLM calls,
4using recent conversation context to enrich ambiguous queries.
5"""
7from __future__ import annotations
9from dataclasses import dataclass
10from typing import Any, Callable
13@dataclass
14class Message:
15 """A conversation message.
17 Attributes:
18 role: Message role ("user", "assistant", "system")
19 content: Message content
20 """
22 role: str
23 content: str
26class ContextualExpander:
27 """Expands queries using conversation context.
29 This expander enriches ambiguous or context-dependent queries
30 by incorporating information from recent conversation turns.
31 Unlike QueryTransformer, it doesn't require LLM calls.
33 Example:
34 ```python
35 expander = ContextualExpander(max_context_turns=3)
37 # User asks: "Show me an example"
38 # Recent context: discussing chain-of-thought prompting
39 expanded = expander.expand(
40 "Show me an example",
41 conversation_history
42 )
43 # Returns: "chain-of-thought prompting examples Show me an example"
44 ```
45 """
47 def __init__(
48 self,
49 max_context_turns: int = 3,
50 include_assistant: bool = False,
51 keyword_weight: int = 2,
52 ):
53 """Initialize the contextual expander.
55 Args:
56 max_context_turns: Maximum conversation turns to consider
57 include_assistant: Whether to include assistant messages
58 keyword_weight: How many times to repeat extracted keywords
59 """
60 self.max_context_turns = max_context_turns
61 self.include_assistant = include_assistant
62 self.keyword_weight = keyword_weight
64 # Common words to filter out
65 self._stop_words = {
66 "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
67 "have", "has", "had", "do", "does", "did", "will", "would", "could",
68 "should", "may", "might", "must", "can", "this", "that", "these",
69 "those", "i", "you", "he", "she", "it", "we", "they", "what", "which",
70 "who", "when", "where", "why", "how", "all", "each", "every", "both",
71 "few", "more", "most", "other", "some", "such", "no", "not", "only",
72 "own", "same", "so", "than", "too", "very", "just", "also", "now",
73 "here", "there", "about", "into", "through", "during", "before",
74 "after", "above", "below", "to", "from", "up", "down", "in", "out",
75 "on", "off", "over", "under", "again", "further", "then", "once",
76 "and", "but", "or", "nor", "for", "yet", "because", "as", "until",
77 "while", "of", "at", "by", "with", "without", "between", "me", "my",
78 "your", "his", "her", "its", "our", "their", "please", "help", "want",
79 "need", "like", "show", "tell", "give", "make", "let", "get", "see",
80 }
82 def expand(
83 self,
84 user_input: str,
85 conversation_history: list[Message] | list[dict[str, Any]],
86 ) -> str:
87 """Expand query with conversation context.
89 Args:
90 user_input: The user's current message
91 conversation_history: Recent conversation messages
93 Returns:
94 Expanded query string
95 """
96 # Normalize conversation history to Message objects
97 messages = self._normalize_messages(conversation_history)
99 # Get recent context
100 recent = self._get_recent_context(messages)
102 # Extract keywords from context
103 keywords = self._extract_keywords(recent)
105 # Build expanded query
106 if keywords:
107 keyword_str = " ".join(keywords)
108 return f"{keyword_str} {user_input}"
110 return user_input
112 def _normalize_messages(
113 self,
114 history: list[Message] | list[dict[str, Any]],
115 ) -> list[Message]:
116 """Normalize history to Message objects.
118 Args:
119 history: Conversation history in various formats
121 Returns:
122 List of Message objects
123 """
124 messages = []
125 for item in history:
126 if isinstance(item, Message):
127 messages.append(item)
128 elif isinstance(item, dict):
129 messages.append(Message(
130 role=item.get("role", "user"),
131 content=item.get("content", ""),
132 ))
133 return messages
135 def _get_recent_context(self, messages: list[Message]) -> list[str]:
136 """Get recent relevant context from conversation.
138 Args:
139 messages: Conversation messages
141 Returns:
142 List of context strings
143 """
144 context = []
145 count = 0
147 # Walk backwards through messages
148 for msg in reversed(messages):
149 if count >= self.max_context_turns:
150 break
152 if msg.role == "user" or (msg.role == "assistant" and self.include_assistant):
153 context.insert(0, msg.content)
154 count += 1
156 return context
158 def _extract_keywords(self, context: list[str]) -> list[str]:
159 """Extract meaningful keywords from context.
161 Args:
162 context: List of context strings
164 Returns:
165 List of extracted keywords
166 """
167 # Combine all context
168 combined = " ".join(context)
170 # Tokenize and filter
171 words = combined.lower().split()
172 keywords = []
174 for word in words:
175 # Clean punctuation
176 cleaned = word.strip(".,!?\"'()[]{}:;")
178 # Skip short words, stop words, and numbers
179 if (
180 len(cleaned) < 3
181 or cleaned in self._stop_words
182 or cleaned.isdigit()
183 ):
184 continue
186 # Add keyword if not already present
187 if cleaned not in keywords:
188 keywords.append(cleaned)
190 # Return top keywords (most recent first gives natural weighting)
191 return keywords[:5]
193 def expand_with_topics(
194 self,
195 user_input: str,
196 conversation_history: list[Message] | list[dict[str, Any]],
197 topic_extractor: Callable[[str], list[str]] | None = None,
198 ) -> str:
199 """Expand query with extracted topics.
201 Enhanced expansion that uses a custom topic extractor.
203 Args:
204 user_input: The user's current message
205 conversation_history: Recent conversation messages
206 topic_extractor: Optional function to extract topics from text
208 Returns:
209 Expanded query string
210 """
211 messages = self._normalize_messages(conversation_history)
212 recent = self._get_recent_context(messages)
214 if topic_extractor:
215 # Use custom topic extraction
216 topics = []
217 for text in recent:
218 topics.extend(topic_extractor(text))
219 topic_str = " ".join(topics[:5])
220 else:
221 # Fall back to keyword extraction
222 keywords = self._extract_keywords(recent)
223 topic_str = " ".join(keywords)
225 if topic_str:
226 return f"{topic_str} {user_input}"
228 return user_input
231def is_ambiguous_query(query: str) -> bool:
232 """Check if a query is likely ambiguous and needs expansion.
234 Args:
235 query: The query to check
237 Returns:
238 True if query appears ambiguous
240 Example:
241 ```python
242 is_ambiguous_query("Show me an example") # True
243 is_ambiguous_query("How do I configure OAuth?") # False
244 ```
245 """
246 # Short queries are often ambiguous
247 words = query.split()
248 if len(words) < 4:
249 return True
251 # Queries with demonstratives are often context-dependent
252 ambiguous_patterns = [
253 "this", "that", "these", "those", "it", "them",
254 "example", "more", "another", "same", "similar",
255 ]
257 query_lower = query.lower()
258 for pattern in ambiguous_patterns:
259 if pattern in query_lower:
260 return True
262 return False