diff --git a/DEPLOY.md b/DEPLOY.md index a5c3589..dde6c03 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -102,6 +102,26 @@ cd /opt/Trading_Studio bash deploy.sh update ``` +> **git pull 报本地修改冲突?** 新版 `deploy.sh` 会自动 `stash` 后同步;若仍失败可手动: +> `git fetch origin && git reset --hard origin/main` + +### 0.4.1 pip / PyTorch 下载超时 + +PyTorch + triton 约 2-3GB,国内网络默认启用清华镜像,并延长超时到 600 秒: + +```bash +# 默认已开启国内镜像,若仍慢可加大超时 +USE_CN_MIRROR=1 PIP_TIMEOUT=900 bash deploy.sh deps + +# 仅补装依赖(不拉代码、不启 PM2) +bash deploy.sh deps + +# PyTorch 已手动装好时跳过 +SKIP_PYTORCH=1 bash deploy.sh deps +``` + +下载过程中出现 `Retrying... Read timed out` 属于正常重试,**并非卡死**,请耐心等待 10-30 分钟。 + ### 0.5 PM2 运维(root 环境) ```bash diff --git a/deploy.sh b/deploy.sh index e195128..b86957d 100644 --- a/deploy.sh +++ b/deploy.sh @@ -12,6 +12,12 @@ # sudo bash deploy.sh stop # 停止 PM2 进程 # sudo bash deploy.sh status # 查看 PM2 与 GPU 状态 # sudo bash deploy.sh logs # 查看 PM2 最近日志 +# +# 环境变量: +# USE_CN_MIRROR=1 使用清华 PyPI / PyTorch 镜像(国内服务器推荐,默认开启) +# USE_CN_MIRROR=0 使用官方 PyTorch 源 +# PIP_TIMEOUT=600 pip 单次下载超时秒数(默认 600) +# SKIP_PYTORCH=1 跳过 PyTorch 安装(已手动装好时使用) # ============================================================================= set -euo pipefail @@ -21,10 +27,22 @@ set -euo pipefail # --------------------------------------------------------------------------- INSTALL_DIR="/opt/Trading_Studio" GIT_REPO="https://git.bz121.com/dekun/Trading_Studio.git" +GIT_BRANCH="main" PM2_APP_NAME="trading_studio" GRADIO_PORT=5683 GPU_POWER_LIMIT=120 -PYTORCH_INDEX="https://download.pytorch.org/whl/cu121" + +# pip 网络参数(大包如下载 triton 需要更长超时) +PIP_TIMEOUT="${PIP_TIMEOUT:-600}" +PIP_RETRIES="${PIP_RETRIES:-15}" +PIP_ATTEMPTS="${PIP_ATTEMPTS:-3}" + +# 国内镜像(默认开启,海外服务器可 export USE_CN_MIRROR=0) +USE_CN_MIRROR="${USE_CN_MIRROR:-1}" +PYTORCH_INDEX_OFFICIAL="https://download.pytorch.org/whl/cu121" +PYTORCH_INDEX_CN="https://mirrors.tuna.tsinghua.edu.cn/pytorch-wheels/cu121" +PIP_INDEX_CN="https://pypi.tuna.tsinghua.edu.cn/simple" +HF_MIRROR="https://hf-mirror.com" # --------------------------------------------------------------------------- # 颜色输出 @@ -103,18 +121,54 @@ install_node_pm2() { # --------------------------------------------------------------------------- # 代码部署 # --------------------------------------------------------------------------- +sync_git_repo() { + local repo_dir="$1" + cd "${repo_dir}" + + # 自动 stash 本地修改(如 sed 修 CRLF 等),避免 pull 被阻塞 + if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then + log_warn "检测到本地未提交更改,自动 git stash ..." + git stash push -u -m "deploy-auto-stash-$(date +%Y%m%d%H%M%S)" || true + fi + + # 优先 ff-only pull + if git pull --ff-only origin "${GIT_BRANCH}" 2>/dev/null; then + log_ok "git pull 成功 (origin/${GIT_BRANCH})" + return 0 + fi + if git pull --ff-only 2>/dev/null; then + log_ok "git pull 成功" + return 0 + fi + + # pull 失败则强制与远端同步(生产服务器标准做法) + log_warn "git pull 失败,执行 fetch + reset --hard 同步远端 ..." + git fetch origin "${GIT_BRANCH}" 2>/dev/null || git fetch origin + if git reset --hard "origin/${GIT_BRANCH}" 2>/dev/null; then + log_ok "已强制同步到 origin/${GIT_BRANCH}" + return 0 + fi + if git reset --hard "origin/master" 2>/dev/null; then + log_ok "已强制同步到 origin/master" + return 0 + fi + + log_error "代码同步失败,请手动处理:" + log_error " cd ${repo_dir} && git status && git pull" + return 1 +} + deploy_code() { if [[ -d "${INSTALL_DIR}/.git" ]]; then log_info "更新已有代码: ${INSTALL_DIR}" - git -C "${INSTALL_DIR}" pull --ff-only || { - log_warn "git pull 失败,尝试保留本地更改继续部署 ..." - } + sync_git_repo "${INSTALL_DIR}" elif [[ -d "${INSTALL_DIR}" ]]; then log_error "${INSTALL_DIR} 已存在但不是 git 仓库,请手动处理后重试。" exit 1 else log_info "克隆仓库到 ${INSTALL_DIR} ..." - git clone "${GIT_REPO}" "${INSTALL_DIR}" + git clone -b "${GIT_BRANCH}" "${GIT_REPO}" "${INSTALL_DIR}" || \ + git clone "${GIT_REPO}" "${INSTALL_DIR}" fi log_ok "代码就绪: ${INSTALL_DIR}" } @@ -122,34 +176,103 @@ deploy_code() { # --------------------------------------------------------------------------- # Python 环境 # --------------------------------------------------------------------------- +configure_pip() { + local venv_pip="$1" + + log_info "配置 pip 网络参数 (timeout=${PIP_TIMEOUT}s, retries=${PIP_RETRIES}) ..." + + "${venv_pip}" config set global.timeout "${PIP_TIMEOUT}" 2>/dev/null || true + "${venv_pip}" config set global.retries "${PIP_RETRIES}" 2>/dev/null || true + + if [[ "${USE_CN_MIRROR}" == "1" ]]; then + log_info "启用国内镜像: 清华 PyPI + PyTorch cu121" + "${venv_pip}" config set global.index-url "${PIP_INDEX_CN}" 2>/dev/null || true + export PIP_INDEX_URL="${PIP_INDEX_CN}" + fi + + # ChatTTS / HuggingFace 模型下载加速 + export HF_ENDPOINT="${HF_MIRROR}" + export HF_HUB_ENABLE_HF_TRANSFER=0 +} + +pip_install_with_retry() { + local attempt=1 + local pip_bin="$1" + shift + + while [[ "${attempt}" -le "${PIP_ATTEMPTS}" ]]; do + log_info "pip 安装中 (第 ${attempt}/${PIP_ATTEMPTS} 次),大包下载较慢请耐心等待 ..." + if "${pip_bin}" install \ + --timeout "${PIP_TIMEOUT}" \ + --retries "${PIP_RETRIES}" \ + --progress-bar on \ + "$@"; then + return 0 + fi + log_warn "pip 安装失败,60 秒后重试 ..." + sleep 60 + attempt=$((attempt + 1)) + done + + log_error "pip 安装多次重试仍失败,请检查网络或手动安装后设置 SKIP_PYTORCH=1 重试" + return 1 +} + +install_pytorch() { + local pip_bin="$1" + local pytorch_index="${PYTORCH_INDEX_OFFICIAL}" + + if [[ "${USE_CN_MIRROR}" == "1" ]]; then + pytorch_index="${PYTORCH_INDEX_CN}" + fi + + log_info "安装 PyTorch CUDA 12.1 (源: ${pytorch_index}) ..." + log_warn "PyTorch + triton 体积约 2-3GB,国内网络可能需要 10-30 分钟,并非卡死。" + + # 分包安装,降低单次失败成本 + pip_install_with_retry "${pip_bin}" \ + torch \ + --index-url "${pytorch_index}" + + pip_install_with_retry "${pip_bin}" \ + torchvision torchaudio \ + --index-url "${pytorch_index}" +} + setup_python_venv() { local venv_path="${INSTALL_DIR}/venv" + local pip_bin="${venv_path}/bin/pip" + local python_bin="${venv_path}/bin/python" if [[ ! -d "${venv_path}" ]]; then log_info "创建 Python 虚拟环境 ..." python3 -m venv "${venv_path}" fi - # shellcheck disable=SC1091 - source "${venv_path}/bin/activate" + configure_pip "${pip_bin}" log_info "升级 pip ..." - pip install --upgrade pip setuptools wheel -q + pip_install_with_retry "${pip_bin}" --upgrade pip setuptools wheel - log_info "安装 PyTorch (CUDA 12.1) ..." - pip install torch torchvision torchaudio --index-url "${PYTORCH_INDEX}" -q + # 跳过已安装的 PyTorch + if [[ "${SKIP_PYTORCH:-0}" == "1" ]]; then + log_warn "SKIP_PYTORCH=1,跳过 PyTorch 安装" + elif "${python_bin}" -c "import torch; assert torch.cuda.is_available()" 2>/dev/null; then + log_ok "PyTorch 已安装且 CUDA 可用: $("${python_bin}" -c 'import torch; print(torch.__version__)')" + else + install_pytorch "${pip_bin}" + fi - log_info "安装项目依赖 ..." - pip install -r "${INSTALL_DIR}/requirements.txt" -q + log_info "安装项目依赖 (requirements.txt) ..." + pip_install_with_retry "${pip_bin}" -r "${INSTALL_DIR}/requirements.txt" # 验证 CUDA - if python -c "import torch; assert torch.cuda.is_available()" 2>/dev/null; then - log_ok "PyTorch CUDA 可用: $(python -c 'import torch; print(torch.cuda.get_device_name(0))')" + if "${python_bin}" -c "import torch; assert torch.cuda.is_available()" 2>/dev/null; then + log_ok "PyTorch CUDA 可用: $("${python_bin}" -c 'import torch; print(torch.cuda.get_device_name(0))')" else log_warn "PyTorch CUDA 不可用,请检查 NVIDIA 驱动与 CUDA 运行时。" fi - deactivate log_ok "Python 虚拟环境配置完成" } @@ -180,7 +303,6 @@ pm2_start() { log_info "通过 PM2 启动 Trading Studio ..." cd "${INSTALL_DIR}" - # 若已有同名进程则先删除再启动,避免重复 if pm2 describe "${PM2_APP_NAME}" &>/dev/null; then pm2 delete "${PM2_APP_NAME}" || true fi @@ -188,7 +310,6 @@ pm2_start() { pm2 start ecosystem.config.js pm2 save - # 配置 root 用户开机自启 local startup_cmd startup_cmd=$(pm2 startup systemd -u root --hp /root 2>&1 | grep "sudo env" || true) if [[ -n "${startup_cmd}" ]]; then @@ -247,6 +368,7 @@ cmd_install() { log_info "========== Trading Studio 一键部署开始 ==========" log_info "安装目录: ${INSTALL_DIR}" log_info "运行用户: root" + log_info "国内镜像: USE_CN_MIRROR=${USE_CN_MIRROR} pip超时: ${PIP_TIMEOUT}s" install_system_deps install_node_pm2 @@ -277,6 +399,12 @@ cmd_update() { log_ok "更新完成" } +cmd_deps_only() { + log_info "========== 仅安装/更新 Python 依赖 ==========" + setup_python_venv + log_ok "依赖安装完成" +} + print_usage() { cat <