"""LLM helper: OpenRouter calls, token estimation, context fitting.""" import json import logging import os from typing import Any import httpx log = logging.getLogger("runtime") API_KEY = os.environ.get("OPENROUTER_API_KEY", "") OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" async def llm_call(model: str, messages: list[dict], stream: bool = False) -> Any: """Single LLM call via OpenRouter. Returns full text or (client, response) for streaming.""" headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"} body = {"model": model, "messages": messages, "stream": stream} client = httpx.AsyncClient(timeout=60) if stream: resp = await client.send(client.build_request("POST", OPENROUTER_URL, headers=headers, json=body), stream=True) return client, resp resp = await client.post(OPENROUTER_URL, headers=headers, json=body) await client.aclose() data = resp.json() if "choices" not in data: log.error(f"LLM error: {data}") return f"[LLM error: {data.get('error', {}).get('message', 'unknown')}]" return data["choices"][0]["message"]["content"] def estimate_tokens(text: str) -> int: """Rough token estimate: 1 token ~ 4 chars.""" return len(text) // 4 def fit_context(messages: list[dict], max_tokens: int, protect_last: int = 4) -> list[dict]: """Trim oldest messages (after system prompt) to fit token budget. Always keeps: system prompt(s) at start + last `protect_last` messages.""" if not messages: return messages system_msgs = [] rest = [] for m in messages: if not rest and m["role"] == "system": system_msgs.append(m) else: rest.append(m) protected = rest[-protect_last:] if len(rest) > protect_last else rest middle = rest[:-protect_last] if len(rest) > protect_last else [] fixed_tokens = sum(estimate_tokens(m["content"]) for m in system_msgs + protected) if fixed_tokens >= max_tokens: result = system_msgs + protected total = sum(estimate_tokens(m["content"]) for m in result) while total > max_tokens and len(result) > 2: removed = result.pop(1) total -= estimate_tokens(removed["content"]) return result remaining = max_tokens - fixed_tokens kept_middle = [] for m in reversed(middle): t = estimate_tokens(m["content"]) if remaining - t < 0: break kept_middle.insert(0, m) remaining -= t return system_msgs + kept_middle + protected