CategorizeLabel/tests/fixtures/create_test_data.py

81 lines
2.2 KiB
Python
Raw Permalink Normal View History

2025-10-15 17:19:26 +08:00
"""创建测试数据文件"""
import pandas as pd
import json
import os
# 确保fixtures目录存在
os.makedirs(os.path.dirname(__file__), exist_ok=True)
# 创建sample_products.xlsx
products_data = {
"产品编号": ["P001", "P002", "P003"],
"产品名称": ["黄山风景区门票", "黄山温泉酒店", "徽菜套餐"],
"景区名称": ["黄山", "黄山", "黄山"],
}
df = pd.DataFrame(products_data)
df.to_excel(
os.path.join(os.path.dirname(__file__), "sample_products.xlsx"), index=False
)
# 创建sample_categories.jsonl
categories = [
{"category": "门票", "type": "自然类", "sub_type": "自然风光"},
{"category": "住宿", "type": "商务酒店", "sub_type": ""},
{"category": "餐饮", "type": "地方特色", "sub_type": "徽菜"},
]
with open(
os.path.join(os.path.dirname(__file__), "sample_categories.jsonl"),
"w",
encoding="utf-8",
) as f:
for cat in categories:
f.write(json.dumps(cat, ensure_ascii=False) + "\n")
# 创建sample_batch_response.jsonl
responses = [
{
"id": "req_001",
"custom_id": "P001",
"response": {
"status_code": 200,
"body": {
"choices": [
{
"message": {
"content": '{"category":"门票","type":"自然类","sub_type":"自然风光"}'
}
}
]
},
},
"error": None,
},
{
"id": "req_002",
"custom_id": "P002",
"response": {
"status_code": 200,
"body": {
"choices": [
{
"message": {
"content": '{"category":"住宿","type":"商务酒店","sub_type":""}'
}
}
]
},
},
"error": None,
},
]
with open(
os.path.join(os.path.dirname(__file__), "sample_batch_response.jsonl"),
"w",
encoding="utf-8",
) as f:
for resp in responses:
f.write(json.dumps(resp, ensure_ascii=False) + "\n")
print("测试数据文件创建完成!")