TravelContentCreator/scripts/query_products.py

224 lines
9.4 KiB
Python
Raw Normal View History

2025-05-14 10:05:25 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sqlite3
import csv
import sys
import argparse
from datetime import datetime
# 数据库路径
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'
def query_products(product_name=None, object_name=None, output_file=None, show_undistributed_only=False):
"""查询产品信息
Args:
product_name: 产品名称可选
object_name: 景点名称可选
output_file: 输出CSV文件路径可选
show_undistributed_only: 是否只显示未分发的内容
Returns:
查询结果列表
"""
# 连接数据库
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row # 设置结果为字典格式
cursor = conn.cursor()
print(f"已连接到数据库: {db_path}")
except sqlite3.Error as e:
print(f"数据库连接错误: {e}")
return []
try:
# 构建查询条件
conditions = []
params = []
if product_name:
conditions.append("product LIKE ?")
params.append(f"%{product_name}%")
if object_name:
conditions.append("object LIKE ?")
params.append(f"%{object_name}%")
if show_undistributed_only:
conditions.append("is_distributed = 0")
# 构建WHERE子句
where_clause = " AND ".join(conditions) if conditions else "1=1"
# 执行查询
query = f"""
SELECT
id, entry_id, product, object, date, logic, judge_status,
output_txt_path, poster_path, article_json_path, created_at, is_distributed
FROM
contents
WHERE
{where_clause}
ORDER BY
product, object, entry_id
"""
cursor.execute(query, params)
results = [dict(row) for row in cursor.fetchall()]
# 获取分布统计信息
statistics = {}
if results:
# 按产品分组统计
cursor.execute(f"""
SELECT
product,
COUNT(*) as count,
COUNT(CASE WHEN judge_status = 1 THEN 1 END) as approved_count,
COUNT(CASE WHEN is_distributed = 1 THEN 1 END) as distributed_count
FROM
contents
WHERE
product IS NOT NULL AND product != '' AND {where_clause}
GROUP BY
product
ORDER BY
count DESC
""", params)
statistics['products'] = [dict(row) for row in cursor.fetchall()]
# 按景点分组统计
cursor.execute(f"""
SELECT
object,
COUNT(*) as count,
COUNT(CASE WHEN judge_status = 1 THEN 1 END) as approved_count,
COUNT(CASE WHEN is_distributed = 1 THEN 1 END) as distributed_count
FROM
contents
WHERE
object IS NOT NULL AND object != '' AND {where_clause}
GROUP BY
object
ORDER BY
count DESC
LIMIT 20
""", params)
statistics['objects'] = [dict(row) for row in cursor.fetchall()]
# 输出结果
if results:
print(f"\n查询到 {len(results)} 条产品记录")
# 输出前10条记录
print("\n===== 查询结果 (前10条) =====")
for i, row in enumerate(results[:10], 1):
judge_status = '已通过' if row['judge_status'] == 1 else '未通过' if row['judge_status'] == 0 else '未知'
distributed = '已分发' if row['is_distributed'] == 1 else '未分发'
print(f"{i}. ID: {row['entry_id']}, 产品: {row['product']}, 景点: {row['object']}, 审核: {judge_status}, 分发状态: {distributed}")
if len(results) > 10:
print(f"... 还有 {len(results) - 10} 条记录未显示")
# 输出统计信息
if 'products' in statistics and statistics['products']:
print("\n===== 产品统计 =====")
for prod in statistics['products']:
dist_percent = (prod['distributed_count'] / prod['count'] * 100) if prod['count'] > 0 else 0
approved_percent = (prod['approved_count'] / prod['count'] * 100) if prod['count'] > 0 else 0
print(f"产品: {prod['product']}")
print(f" - 内容总数: {prod['count']}")
print(f" - 已审核通过: {prod['approved_count']} ({approved_percent:.1f}%)")
print(f" - 已分发: {prod['distributed_count']} ({dist_percent:.1f}%)")
print(f" - 未分发: {prod['count'] - prod['distributed_count']} ({100 - dist_percent:.1f}%)")
if 'objects' in statistics and statistics['objects'] and len(statistics['objects']) <= 10:
print("\n===== 景点统计 =====")
for obj in statistics['objects']:
dist_percent = (obj['distributed_count'] / obj['count'] * 100) if obj['count'] > 0 else 0
approved_percent = (obj['approved_count'] / obj['count'] * 100) if obj['count'] > 0 else 0
print(f"景点: {obj['object']}")
print(f" - 内容总数: {obj['count']}")
print(f" - 已审核通过: {obj['approved_count']} ({approved_percent:.1f}%)")
print(f" - 已分发: {obj['distributed_count']} ({dist_percent:.1f}%)")
print(f" - 未分发: {obj['count'] - obj['distributed_count']} ({100 - dist_percent:.1f}%)")
# 未分发内容和已分发内容的汇总
total_count = len(results)
distributed_count = sum(1 for r in results if r['is_distributed'] == 1)
undistributed_count = total_count - distributed_count
print("\n===== 分发状态汇总 =====")
print(f"总内容数: {total_count}")
print(f"已分发: {distributed_count} ({distributed_count/total_count*100:.1f}% 如果为0)" if total_count > 0 else "已分发: 0 (0%)")
print(f"未分发: {undistributed_count} ({undistributed_count/total_count*100:.1f}% 如果为100%)" if total_count > 0 else "未分发: 0 (0%)")
# 如果指定了输出文件保存结果到CSV
if output_file:
# 确保目录存在
os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
if results:
# 确定CSV列
fieldnames = list(results[0].keys())
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(results)
print(f"\n查询结果已保存到: {output_file}")
else:
print("\n未查询到相关产品记录")
# 检查数据库是否有任何内容记录
cursor.execute("SELECT COUNT(*) as count FROM contents")
count = cursor.fetchone()['count']
if count == 0:
print("\n提示: 数据库中没有任何内容记录,请先导入数据")
else:
print(f"\n提示: 数据库中有 {count} 条内容记录,但没有符合条件的产品")
return results
except Exception as e:
print(f"查询产品时出错: {e}")
import traceback
traceback.print_exc()
return []
finally:
conn.close()
def main():
parser = argparse.ArgumentParser(description="查询数据库中的产品信息")
parser.add_argument("--product", type=str, help="按产品名称查询")
parser.add_argument("--object", type=str, help="按景点名称查询")
parser.add_argument("--output", type=str, help="输出CSV文件路径")
parser.add_argument("--all", action="store_true", help="查询所有产品")
parser.add_argument("--export-csv", action="store_true", help="导出结果到CSV文件")
parser.add_argument("--undistributed", action="store_true", help="只显示未分发的内容")
args = parser.parse_args()
# 默认输出文件
output_file = None
if args.output:
output_file = args.output
elif args.export_csv or args.all:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
suffix = "_undistributed" if args.undistributed else ""
output_file = f"/root/autodl-tmp/TravelContentCreator/output/product_query{suffix}_{timestamp}.csv"
# 执行查询
if args.product or args.object or args.all or args.undistributed:
results = query_products(args.product, args.object, output_file, args.undistributed)
return len(results) > 0
else:
print("请提供查询条件: --product, --object, --all 或 --undistributed")
parser.print_help()
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)