CategorizeLabel/demo/test_batch_api.py

"""
测试通义千问Batch API是否能正常响应

这个脚本使用batch-test-model进行测试，不会产生模型推理费用
参考文档：docs/API/Tongyi-API.md
"""

import os
from pathlib import Path
from openai import OpenAI
import time


def main():
    """测试Batch API的完整流程"""

    # 初始化客户端
    client = OpenAI(
        api_key=os.getenv("DASHSCOPE_API_KEY"),
        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
    )

    # 文件路径（获取脚本所在目录）
    script_dir = os.path.dirname(os.path.abspath(__file__))
    input_file_path = os.path.join(script_dir, "test_model.jsonl")
    output_file_path = os.path.join(script_dir, "result.jsonl")
    error_file_path = os.path.join(script_dir, "error.jsonl")

    try:
        # Step 1: 上传包含请求信息的JSONL文件
        print("=" * 60)
        print("步骤1：上传测试文件")
        print("=" * 60)
        print(f"正在上传文件: {input_file_path}")
        file_object = client.files.create(file=Path(input_file_path), purpose="batch")
        print("[OK] 文件上传成功")
        print(f"  文件ID: {file_object.id}")
        print()

        # Step 2: 创建Batch任务
        print("=" * 60)
        print("步骤2：创建Batch任务")
        print("=" * 60)
        print("正在创建Batch任务...")
        # 重要：endpoint必须和请求文件中的url字段保持一致！
        # - 测试模型(batch-test-model): endpoint="/v1/chat/ds-test"
        # - 正式模型(qwen-plus等): endpoint="/v1/chat/completions"
        batch = client.batches.create(
            input_file_id=file_object.id,
            endpoint="/v1/chat/completions",  # 正式模型endpoint（与test_model.jsonl中的url匹配）
            completion_window="24h",
        )
        print("[OK] Batch任务创建成功")
        print(f"  任务ID: {batch.id}")
        print(f"  状态: {batch.status}")
        print()

        # Step 3: 轮询任务状态直到完成
        print("=" * 60)
        print("步骤3：等待任务完成")
        print("=" * 60)
        status = ""
        poll_count = 0
        while status not in ["completed", "failed", "expired", "cancelled"]:
            poll_count += 1
            batch = client.batches.retrieve(batch_id=batch.id)
            status = batch.status
            print(f"[轮询 #{poll_count}] 任务状态: {status}")

            if status not in ["completed", "failed", "expired", "cancelled"]:
                print("  等待10秒后再次查询...")
                time.sleep(10)

        print()

        # 检查任务是否失败
        if status == "failed":
            print("=" * 60)
            print("[ERROR] 任务失败")
            print("=" * 60)
            print(f"错误信息: {batch.errors}")
            print(
                "参考文档: https://help.aliyun.com/zh/model-studio/developer-reference/error-code"
            )
            return

        # Step 4: 下载结果
        print("=" * 60)
        print("步骤4：下载结果")
        print("=" * 60)

        # 下载成功结果
        if batch.output_file_id:
            print("正在下载成功结果...")
            content = client.files.content(batch.output_file_id)
            print("[OK] 成功结果（前500字符）:")
            print("-" * 60)
            print(content.text[:500])
            if len(content.text) > 500:
                print("...")
            print("-" * 60)

            # 保存结果到本地
            content.write_to_file(output_file_path)
            print(f"[OK] 完整结果已保存到: {output_file_path}")
        else:
            print("[WARN] 没有输出文件")

        print()

        # 下载错误信息（如果有）
        if batch.error_file_id:
            print("正在下载错误信息...")
            error_content = client.files.content(batch.error_file_id)
            print("[WARN] 错误信息（前500字符）:")
            print("-" * 60)
            print(error_content.text[:500])
            print("-" * 60)

            # 保存错误到本地
            error_content.write_to_file(error_file_path)
            print(f"[OK] 完整错误信息已保存到: {error_file_path}")
            print(
                "参考文档: https://help.aliyun.com/zh/model-studio/developer-reference/error-code"
            )

        print()
        print("=" * 60)
        print("[SUCCESS] 测试完成！API响应正常")
        print("=" * 60)

    except Exception as e:
        print()
        print("=" * 60)
        print("[ERROR] 发生错误")
        print("=" * 60)
        print(f"错误信息: {e}")
        print(
            "参考文档: https://help.aliyun.com/zh/model-studio/developer-reference/error-code"
        )
        print()
        print("常见问题排查:")
        print("1. 检查DASHSCOPE_API_KEY环境变量是否设置")
        print("2. 检查网络连接是否正常")
        print("3. 检查API Key是否有效")


if __name__ == "__main__":
    # 检查环境变量
    if not os.getenv("DASHSCOPE_API_KEY"):
        print("=" * 60)
        print("[ERROR] 错误：未设置API Key")
        print("=" * 60)
        print("请先设置环境变量 DASHSCOPE_API_KEY")
        print()
        print("Windows (PowerShell):")
        print('  $env:DASHSCOPE_API_KEY="sk-your-api-key"')
        print()
        print("Windows (CMD):")
        print("  set DASHSCOPE_API_KEY=sk-your-api-key")
        print()
        print("Linux/Mac:")
        print('  export DASHSCOPE_API_KEY="sk-your-api-key"')
        print("=" * 60)
        exit(1)

    main()