TravelContentCreator/scripts/query_products.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sqlite3
import csv
import sys
import argparse
from datetime import datetime

# 数据库路径
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'

def query_products(product_name=None, object_name=None, output_file=None, show_undistributed_only=False):
    """查询产品信息
    
    Args:
        product_name: 产品名称（可选）
        object_name: 景点名称（可选）
        output_file: 输出CSV文件路径（可选）
        show_undistributed_only: 是否只显示未分发的内容
    
    Returns:
        查询结果列表
    """
    # 连接数据库
    try:
        conn = sqlite3.connect(db_path)
        conn.row_factory = sqlite3.Row  # 设置结果为字典格式
        cursor = conn.cursor()
        print(f"已连接到数据库: {db_path}")
    except sqlite3.Error as e:
        print(f"数据库连接错误: {e}")
        return []
    
    try:
        # 构建查询条件
        conditions = []
        params = []
        
        if product_name:
            conditions.append("product LIKE ?")
            params.append(f"%{product_name}%")
        
        if object_name:
            conditions.append("object LIKE ?")
            params.append(f"%{object_name}%")
        
        if show_undistributed_only:
            conditions.append("is_distributed = 0")
        
        # 构建WHERE子句
        where_clause = " AND ".join(conditions) if conditions else "1=1"
        
        # 执行查询
        query = f"""
            SELECT 
                id, entry_id, product, object, date, logic, judge_status,
                output_txt_path, poster_path, article_json_path, created_at, is_distributed
            FROM 
                contents
            WHERE 
                {where_clause}
            ORDER BY 
                product, object, entry_id
        """
        
        cursor.execute(query, params)
        results = [dict(row) for row in cursor.fetchall()]
        
        # 获取分布统计信息
        statistics = {}
        if results:
            # 按产品分组统计
            cursor.execute(f"""
                SELECT 
                    product, 
                    COUNT(*) as count,
                    COUNT(CASE WHEN judge_status = 1 THEN 1 END) as approved_count,
                    COUNT(CASE WHEN is_distributed = 1 THEN 1 END) as distributed_count
                FROM 
                    contents
                WHERE 
                    product IS NOT NULL AND product != '' AND {where_clause}
                GROUP BY 
                    product
                ORDER BY 
                    count DESC
            """, params)
            statistics['products'] = [dict(row) for row in cursor.fetchall()]
            
            # 按景点分组统计
            cursor.execute(f"""
                SELECT 
                    object, 
                    COUNT(*) as count,
                    COUNT(CASE WHEN judge_status = 1 THEN 1 END) as approved_count,
                    COUNT(CASE WHEN is_distributed = 1 THEN 1 END) as distributed_count
                FROM 
                    contents
                WHERE 
                    object IS NOT NULL AND object != '' AND {where_clause}
                GROUP BY 
                    object
                ORDER BY 
                    count DESC
                LIMIT 20
            """, params)
            statistics['objects'] = [dict(row) for row in cursor.fetchall()]
        
        # 输出结果
        if results:
            print(f"\n查询到 {len(results)} 条产品记录")
            
            # 输出前10条记录
            print("\n===== 查询结果 (前10条) =====")
            for i, row in enumerate(results[:10], 1):
                judge_status = '已通过' if row['judge_status'] == 1 else '未通过' if row['judge_status'] == 0 else '未知'
                distributed = '已分发' if row['is_distributed'] == 1 else '未分发'
                print(f"{i}. ID: {row['entry_id']}, 产品: {row['product']}, 景点: {row['object']}, 审核: {judge_status}, 分发状态: {distributed}")
            
            if len(results) > 10:
                print(f"... 还有 {len(results) - 10} 条记录未显示")
            
            # 输出统计信息
            if 'products' in statistics and statistics['products']:
                print("\n===== 产品统计 =====")
                for prod in statistics['products']:
                    dist_percent = (prod['distributed_count'] / prod['count'] * 100) if prod['count'] > 0 else 0
                    approved_percent = (prod['approved_count'] / prod['count'] * 100) if prod['count'] > 0 else 0
                    print(f"产品: {prod['product']}")
                    print(f"  - 内容总数: {prod['count']}")
                    print(f"  - 已审核通过: {prod['approved_count']} ({approved_percent:.1f}%)")
                    print(f"  - 已分发: {prod['distributed_count']} ({dist_percent:.1f}%)")
                    print(f"  - 未分发: {prod['count'] - prod['distributed_count']} ({100 - dist_percent:.1f}%)")
            
            if 'objects' in statistics and statistics['objects'] and len(statistics['objects']) <= 10:
                print("\n===== 景点统计 =====")
                for obj in statistics['objects']:
                    dist_percent = (obj['distributed_count'] / obj['count'] * 100) if obj['count'] > 0 else 0
                    approved_percent = (obj['approved_count'] / obj['count'] * 100) if obj['count'] > 0 else 0
                    print(f"景点: {obj['object']}")
                    print(f"  - 内容总数: {obj['count']}")
                    print(f"  - 已审核通过: {obj['approved_count']} ({approved_percent:.1f}%)")
                    print(f"  - 已分发: {obj['distributed_count']} ({dist_percent:.1f}%)")
                    print(f"  - 未分发: {obj['count'] - obj['distributed_count']} ({100 - dist_percent:.1f}%)")
            
            # 未分发内容和已分发内容的汇总
            total_count = len(results)
            distributed_count = sum(1 for r in results if r['is_distributed'] == 1)
            undistributed_count = total_count - distributed_count
            
            print("\n===== 分发状态汇总 =====")
            print(f"总内容数: {total_count}")
            print(f"已分发: {distributed_count} ({distributed_count/total_count*100:.1f}% 如果为0)" if total_count > 0 else "已分发: 0 (0%)")
            print(f"未分发: {undistributed_count} ({undistributed_count/total_count*100:.1f}% 如果为100%)" if total_count > 0 else "未分发: 0 (0%)")
            
            # 如果指定了输出文件，保存结果到CSV
            if output_file:
                # 确保目录存在
                os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
                
                with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
                    if results:
                        # 确定CSV列
                        fieldnames = list(results[0].keys())
                        writer = csv.DictWriter(f, fieldnames=fieldnames)
                        writer.writeheader()
                        writer.writerows(results)
                        print(f"\n查询结果已保存到: {output_file}")
        else:
            print("\n未查询到相关产品记录")
            
            # 检查数据库是否有任何内容记录
            cursor.execute("SELECT COUNT(*) as count FROM contents")
            count = cursor.fetchone()['count']
            
            if count == 0:
                print("\n提示: 数据库中没有任何内容记录，请先导入数据")
            else:
                print(f"\n提示: 数据库中有 {count} 条内容记录，但没有符合条件的产品")
        
        return results
    
    except Exception as e:
        print(f"查询产品时出错: {e}")
        import traceback
        traceback.print_exc()
        return []
    finally:
        conn.close()

def main():
    parser = argparse.ArgumentParser(description="查询数据库中的产品信息")
    parser.add_argument("--product", type=str, help="按产品名称查询")
    parser.add_argument("--object", type=str, help="按景点名称查询")
    parser.add_argument("--output", type=str, help="输出CSV文件路径")
    parser.add_argument("--all", action="store_true", help="查询所有产品")
    parser.add_argument("--export-csv", action="store_true", help="导出结果到CSV文件")
    parser.add_argument("--undistributed", action="store_true", help="只显示未分发的内容")
    
    args = parser.parse_args()
    
    # 默认输出文件
    output_file = None
    if args.output:
        output_file = args.output
    elif args.export_csv or args.all:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        suffix = "_undistributed" if args.undistributed else ""
        output_file = f"/root/autodl-tmp/TravelContentCreator/output/product_query{suffix}_{timestamp}.csv"
    
    # 执行查询
    if args.product or args.object or args.all or args.undistributed:
        results = query_products(args.product, args.object, output_file, args.undistributed)
        return len(results) > 0
    else:
        print("请提供查询条件: --product, --object, --all 或 --undistributed")
        parser.print_help()
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)