TravelContentCreator/examples/test_topic_parser.py

159 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
测试TopicParser组件
该脚本演示如何独立使用topic_parser模块解析JSON格式的选题数据。
"""
import os
import sys
import json
import time
from pathlib import Path
# 将项目根目录添加到PATH
project_root = str(Path(__file__).parent.parent.absolute())
if project_root not in sys.path:
sys.path.append(project_root)
from core.topic_parser import TopicParser
def get_test_json():
"""返回测试用的选题JSON数据"""
return '''
[
{
"index": 1,
"date": "2024-05-30",
"logic": "这是一个著名的景点,有丰富的历史文化底蕴,适合文化和历史爱好者。",
"object": "泰宁古城",
"product": "文化体验套票.txt",
"product_logic": "泰宁古城有多处明清古建筑,游客可以通过体验套票更全面了解当地历史文化。",
"style": "人文历史.txt",
"style_logic": "古城拥有800多年历史古建筑保存完好适合人文历史风格。",
"target_audience": "家庭游客.txt",
"target_audience_logic": "古城内有适合家庭参观的展览和互动活动,老人孩子都能找到感兴趣的内容。"
},
{
"index": 2,
"date": "2024-05-30",
"logic": "这是一个自然景观与历史文化相结合的景点,适合各类游客。",
"object": "甘露岩寺",
"product": "亲子研学.txt",
"product_logic": "甘露岩寺建筑结构特殊,可作为建筑研学的典型案例。",
"style": "摄影指南.txt",
"style_logic": "寺庙依山而建,视角独特,适合摄影爱好者拍摄。",
"target_audience": "亲子游.txt",
"target_audience_logic": "有趣的建筑结构和历史故事适合亲子游客探索学习。"
}
]
'''
def get_invalid_json():
"""返回格式不正确的JSON数据进行测试"""
return '''
{
"topics": [
{"name": "泰宁古城", "description": "这是一个测试"},
{"name": "甘露岩寺", "description": "这是另一个测试"}
]
}
'''
def get_malformed_json():
"""返回格式错误的JSON字符串"""
return '''
[
{
"index": 1,
"date": "2024-05-30",
"object": "泰宁古城"
},
{
"index": 2
"date": "2024-05-30", // 缺少逗号
"object": "甘露岩寺"
}
]
'''
def main():
"""主函数"""
print("=== 测试 TopicParser 组件 ===")
# 测试1: 解析有效的JSON
print("\n测试1: 解析有效的JSON数据")
start_time = time.time()
valid_json = get_test_json()
print(f"输入JSON数据 ({len(valid_json)} 字节):\n{valid_json[:200]}...")
try:
topics = TopicParser.parse_topics(valid_json)
elapsed = time.time() - start_time
if topics:
print(f"成功解析! 解析到 {len(topics)} 个选题,耗时: {elapsed:.4f}")
print("\n解析结果:")
for i, topic in enumerate(topics):
print(f"\n选题 {i+1}:")
print(f" 对象: {topic.get('object', 'N/A')}")
print(f" 样式: {topic.get('style', 'N/A')}")
print(f" 目标受众: {topic.get('target_audience', 'N/A')}")
else:
print("解析失败: 未返回选题列表")
except Exception as e:
print(f"解析出错: {e}")
# 测试2: 解析无效结构的JSON
print("\n测试2: 解析结构无效的JSON数据")
invalid_json = get_invalid_json()
print(f"输入无效结构JSON:\n{invalid_json}")
try:
topics = TopicParser.parse_topics(invalid_json)
if topics:
print(f"注意: 成功解析了 {len(topics)} 个选题(预期应该失败)")
else:
print("测试通过: 正确地未能解析无效结构")
except Exception as e:
print(f"捕获到异常 (预期行为): {e}")
# 测试3: 格式错误的JSON
print("\n测试3: 解析格式错误的JSON")
malformed_json = get_malformed_json()
print(f"输入格式错误JSON:\n{malformed_json}")
try:
topics = TopicParser.parse_topics(malformed_json)
if topics:
print(f"注意: 成功解析了 {len(topics)} 个选题(预期应该失败)")
else:
print("测试通过: 正确地未能解析格式错误JSON")
except Exception as e:
print(f"捕获到异常 (预期行为): {e}")
# 测试4: 直接解析字典对象
print("\n测试4: 直接解析Python字典对象")
try:
# 将JSON字符串转换为Python对象
topic_list = json.loads(get_test_json())
# 测试直接传递Python对象
topics = TopicParser.parse_topics(topic_list)
if topics:
print(f"成功解析! 解析到 {len(topics)} 个选题")
print("测试通过: 可以接受Python对象作为输入")
else:
print("解析失败: 未返回选题列表")
except Exception as e:
print(f"解析出错: {e}")
print("\n=== 测试完成 ===")
if __name__ == "__main__":
main()