TravelContentCreator/scripts/query_products.py

224 lines
9.4 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sqlite3
import csv
import sys
import argparse
from datetime import datetime
# 数据库路径
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'
def query_products(product_name=None, object_name=None, output_file=None, show_undistributed_only=False):
"""查询产品信息
Args:
product_name: 产品名称(可选)
object_name: 景点名称(可选)
output_file: 输出CSV文件路径可选
show_undistributed_only: 是否只显示未分发的内容
Returns:
查询结果列表
"""
# 连接数据库
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row # 设置结果为字典格式
cursor = conn.cursor()
print(f"已连接到数据库: {db_path}")
except sqlite3.Error as e:
print(f"数据库连接错误: {e}")
return []
try:
# 构建查询条件
conditions = []
params = []
if product_name:
conditions.append("product LIKE ?")
params.append(f"%{product_name}%")
if object_name:
conditions.append("object LIKE ?")
params.append(f"%{object_name}%")
if show_undistributed_only:
conditions.append("is_distributed = 0")
# 构建WHERE子句
where_clause = " AND ".join(conditions) if conditions else "1=1"
# 执行查询
query = f"""
SELECT
id, entry_id, product, object, date, logic, judge_status,
output_txt_path, poster_path, article_json_path, created_at, is_distributed
FROM
contents
WHERE
{where_clause}
ORDER BY
product, object, entry_id
"""
cursor.execute(query, params)
results = [dict(row) for row in cursor.fetchall()]
# 获取分布统计信息
statistics = {}
if results:
# 按产品分组统计
cursor.execute(f"""
SELECT
product,
COUNT(*) as count,
COUNT(CASE WHEN judge_status = 1 THEN 1 END) as approved_count,
COUNT(CASE WHEN is_distributed = 1 THEN 1 END) as distributed_count
FROM
contents
WHERE
product IS NOT NULL AND product != '' AND {where_clause}
GROUP BY
product
ORDER BY
count DESC
""", params)
statistics['products'] = [dict(row) for row in cursor.fetchall()]
# 按景点分组统计
cursor.execute(f"""
SELECT
object,
COUNT(*) as count,
COUNT(CASE WHEN judge_status = 1 THEN 1 END) as approved_count,
COUNT(CASE WHEN is_distributed = 1 THEN 1 END) as distributed_count
FROM
contents
WHERE
object IS NOT NULL AND object != '' AND {where_clause}
GROUP BY
object
ORDER BY
count DESC
LIMIT 20
""", params)
statistics['objects'] = [dict(row) for row in cursor.fetchall()]
# 输出结果
if results:
print(f"\n查询到 {len(results)} 条产品记录")
# 输出前10条记录
print("\n===== 查询结果 (前10条) =====")
for i, row in enumerate(results[:10], 1):
judge_status = '已通过' if row['judge_status'] == 1 else '未通过' if row['judge_status'] == 0 else '未知'
distributed = '已分发' if row['is_distributed'] == 1 else '未分发'
print(f"{i}. ID: {row['entry_id']}, 产品: {row['product']}, 景点: {row['object']}, 审核: {judge_status}, 分发状态: {distributed}")
if len(results) > 10:
print(f"... 还有 {len(results) - 10} 条记录未显示")
# 输出统计信息
if 'products' in statistics and statistics['products']:
print("\n===== 产品统计 =====")
for prod in statistics['products']:
dist_percent = (prod['distributed_count'] / prod['count'] * 100) if prod['count'] > 0 else 0
approved_percent = (prod['approved_count'] / prod['count'] * 100) if prod['count'] > 0 else 0
print(f"产品: {prod['product']}")
print(f" - 内容总数: {prod['count']}")
print(f" - 已审核通过: {prod['approved_count']} ({approved_percent:.1f}%)")
print(f" - 已分发: {prod['distributed_count']} ({dist_percent:.1f}%)")
print(f" - 未分发: {prod['count'] - prod['distributed_count']} ({100 - dist_percent:.1f}%)")
if 'objects' in statistics and statistics['objects'] and len(statistics['objects']) <= 10:
print("\n===== 景点统计 =====")
for obj in statistics['objects']:
dist_percent = (obj['distributed_count'] / obj['count'] * 100) if obj['count'] > 0 else 0
approved_percent = (obj['approved_count'] / obj['count'] * 100) if obj['count'] > 0 else 0
print(f"景点: {obj['object']}")
print(f" - 内容总数: {obj['count']}")
print(f" - 已审核通过: {obj['approved_count']} ({approved_percent:.1f}%)")
print(f" - 已分发: {obj['distributed_count']} ({dist_percent:.1f}%)")
print(f" - 未分发: {obj['count'] - obj['distributed_count']} ({100 - dist_percent:.1f}%)")
# 未分发内容和已分发内容的汇总
total_count = len(results)
distributed_count = sum(1 for r in results if r['is_distributed'] == 1)
undistributed_count = total_count - distributed_count
print("\n===== 分发状态汇总 =====")
print(f"总内容数: {total_count}")
print(f"已分发: {distributed_count} ({distributed_count/total_count*100:.1f}% 如果为0)" if total_count > 0 else "已分发: 0 (0%)")
print(f"未分发: {undistributed_count} ({undistributed_count/total_count*100:.1f}% 如果为100%)" if total_count > 0 else "未分发: 0 (0%)")
# 如果指定了输出文件保存结果到CSV
if output_file:
# 确保目录存在
os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
if results:
# 确定CSV列
fieldnames = list(results[0].keys())
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(results)
print(f"\n查询结果已保存到: {output_file}")
else:
print("\n未查询到相关产品记录")
# 检查数据库是否有任何内容记录
cursor.execute("SELECT COUNT(*) as count FROM contents")
count = cursor.fetchone()['count']
if count == 0:
print("\n提示: 数据库中没有任何内容记录,请先导入数据")
else:
print(f"\n提示: 数据库中有 {count} 条内容记录,但没有符合条件的产品")
return results
except Exception as e:
print(f"查询产品时出错: {e}")
import traceback
traceback.print_exc()
return []
finally:
conn.close()
def main():
parser = argparse.ArgumentParser(description="查询数据库中的产品信息")
parser.add_argument("--product", type=str, help="按产品名称查询")
parser.add_argument("--object", type=str, help="按景点名称查询")
parser.add_argument("--output", type=str, help="输出CSV文件路径")
parser.add_argument("--all", action="store_true", help="查询所有产品")
parser.add_argument("--export-csv", action="store_true", help="导出结果到CSV文件")
parser.add_argument("--undistributed", action="store_true", help="只显示未分发的内容")
args = parser.parse_args()
# 默认输出文件
output_file = None
if args.output:
output_file = args.output
elif args.export_csv or args.all:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
suffix = "_undistributed" if args.undistributed else ""
output_file = f"/root/autodl-tmp/TravelContentCreator/output/product_query{suffix}_{timestamp}.csv"
# 执行查询
if args.product or args.object or args.all or args.undistributed:
results = query_products(args.product, args.object, output_file, args.undistributed)
return len(results) > 0
else:
print("请提供查询条件: --product, --object, --all 或 --undistributed")
parser.print_help()
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)