Files
Trading_Studio/llm_service.py
T

199 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
远程 Ollama LLM 润色服务
通过局域网 HTTP 请求 Gemma4 模型,对交易复盘转写稿进行纪律审判式润色。
"""
from __future__ import annotations
import logging
import time
from typing import Tuple
import requests
from config import (
HEALTH_CHECK_CACHE_SECONDS,
HEALTH_CHECK_CONNECT_TIMEOUT,
HEALTH_CHECK_READ_TIMEOUT,
MODEL_NAME,
OLLAMA_TIMEOUT,
OLLAMA_URL,
SYSTEM_PROMPT,
)
logger = logging.getLogger(__name__)
# 健康检查短时缓存,避免平板/手机反复打开页面时重复等待
_health_cache: dict = {"ts": 0.0, "ok": False, "msg": ""}
def _build_payload(raw_text: str) -> dict:
"""构造 Ollama /api/chat 非流式请求体。"""
return {
"model": MODEL_NAME,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{
"role": "user",
"content": (
"以下是我的交易复盘录音转写原文,请严格按系统要求润色:\n\n"
f"{raw_text}"
),
},
],
"stream": False,
"options": {
"temperature": 0.7,
"num_predict": 4096,
},
}
def _extract_content(response_json: dict) -> str:
"""从 Ollama 响应 JSON 中提取 assistant 文本。"""
# /api/chat 标准格式
message = response_json.get("message")
if isinstance(message, dict):
content = message.get("content", "").strip()
if content:
return content
# 兼容 /api/generate 格式(部分旧版或代理)
if "response" in response_json:
content = str(response_json["response"]).strip()
if content:
return content
raise ValueError(f"无法从 Ollama 响应中解析文本内容: {response_json}")
def polish_text(raw_text: str) -> Tuple[bool, str]:
"""
调用远程 Ollama 对原始转写文本进行润色。
Args:
raw_text: Whisper 转写得到的原始口语文本
Returns:
(success, polished_text_or_error_message)
"""
if not raw_text or not raw_text.strip():
return False, "润色输入为空,请先完成语音识别。"
payload = _build_payload(raw_text.strip())
try:
logger.info("正在请求 Ollama: %s, model=%s", OLLAMA_URL, MODEL_NAME)
response = requests.post(
OLLAMA_URL,
json=payload,
timeout=OLLAMA_TIMEOUT,
)
response.raise_for_status()
data = response.json()
polished = _extract_content(data)
if not polished:
return False, "Ollama 返回内容为空,请检查模型是否正常加载。"
logger.info("润色完成,输出字数: %d", len(polished))
return True, polished
except requests.exceptions.ConnectTimeout:
err = (
f"连接 Ollama 超时(>{OLLAMA_TIMEOUT}s)。"
f"请确认 {OLLAMA_URL} 可达且 Ollama 服务已启动。"
)
logger.error(err)
return False, err
except requests.exceptions.ReadTimeout:
err = (
f"Ollama 响应超时(>{OLLAMA_TIMEOUT}s)。"
"模型可能正在加载或生成长度过长,请稍后重试。"
)
logger.error(err)
return False, err
except requests.exceptions.ConnectionError as exc:
err = (
f"无法连接到 Ollama 节点 ({OLLAMA_URL})。"
"请检查局域网连通性、防火墙及 Ollama 是否监听 0.0.0.0:11434。\n"
f"详情: {exc}"
)
logger.error(err)
return False, err
except requests.exceptions.HTTPError as exc:
status = exc.response.status_code if exc.response is not None else "?"
body = exc.response.text[:500] if exc.response is not None else ""
err = (
f"Ollama HTTP 错误 ({status})。"
f"请确认模型 `{MODEL_NAME}` 已通过 ollama pull 下载。\n"
f"响应片段: {body}"
)
logger.error(err)
return False, err
except ValueError as exc:
logger.error("Ollama 响应解析失败: %s", exc)
return False, str(exc)
except requests.exceptions.RequestException as exc:
err = f"Ollama 请求异常: {exc}"
logger.exception(err)
return False, err
except Exception as exc:
err = f"润色过程发生未知错误: {exc}"
logger.exception(err)
return False, err
def check_ollama_health(force: bool = False) -> Tuple[bool, str]:
"""
快速检测 Ollama 节点是否在线(不触发完整推理)。
默认 2+3 秒超时,结果缓存 30 秒,避免平板首屏长时间白屏。
Returns:
(online, message)
"""
global _health_cache
now = time.time()
if (
not force
and _health_cache["msg"]
and (now - _health_cache["ts"]) < HEALTH_CHECK_CACHE_SECONDS
):
return _health_cache["ok"], _health_cache["msg"]
base_url = OLLAMA_URL.rsplit("/api/", 1)[0]
timeout = (HEALTH_CHECK_CONNECT_TIMEOUT, HEALTH_CHECK_READ_TIMEOUT)
try:
resp = requests.get(f"{base_url}/api/tags", timeout=timeout)
resp.raise_for_status()
tags = resp.json().get("models", [])
model_names = [m.get("name", "") for m in tags]
if any(MODEL_NAME.split(":")[0] in name for name in model_names):
msg = f"Ollama 在线,已检测到模型: {MODEL_NAME}"
ok = True
else:
ok = True
msg = (
f"Ollama 在线,但未找到模型 {MODEL_NAME}"
f"请执行: ollama pull {MODEL_NAME}"
)
except requests.exceptions.Timeout:
ok, msg = False, (
f"Ollama 检测超时(>{HEALTH_CHECK_READ_TIMEOUT}s)。"
"页面已加载,可稍后点击「刷新状态」重试。"
)
except Exception as exc:
ok, msg = False, f"Ollama 不可达: {exc}"
_health_cache.update({"ts": now, "ok": ok, "msg": msg})
return ok, msg