追加数据库模式

This commit is contained in:
jinye_huang 2025-05-13 19:01:27 +08:00
parent dfe2b4c872
commit 1a8ddef1ff
4 changed files with 776 additions and 327 deletions

View File

@ -7,7 +7,7 @@ import pandas as pd
import argparse import argparse
import random import random
import logging import logging
import subprocess import sqlite3
from datetime import datetime from datetime import datetime
import json import json
import smtplib import smtplib
@ -24,7 +24,7 @@ logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s', format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[ handlers=[
logging.FileHandler(f"content_distribution_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"), logging.FileHandler(f"content_distribution_db_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"),
logging.StreamHandler() logging.StreamHandler()
] ]
) )
@ -32,13 +32,9 @@ logger = logging.getLogger(__name__)
def parse_arguments(): def parse_arguments():
"""解析命令行参数""" """解析命令行参数"""
parser = argparse.ArgumentParser(description='内容分发') parser = argparse.ArgumentParser(description='内容分发 (数据库版)')
# 必需参数 # 必需参数
parser.add_argument('--user-csv', type=str, required=True,
help='小红书用户CSV文件路径')
parser.add_argument('--manifest-csv', type=str, required=True,
help='内容清单CSV文件路径')
parser.add_argument('--output-dir', type=str, default='distribution_results', parser.add_argument('--output-dir', type=str, default='distribution_results',
help='分发结果输出目录') help='分发结果输出目录')
parser.add_argument('--email-from', type=str, required=True, parser.add_argument('--email-from', type=str, required=True,
@ -46,11 +42,29 @@ def parse_arguments():
parser.add_argument('--email-password', type=str, required=True, parser.add_argument('--email-password', type=str, required=True,
help='发件人邮箱授权码') help='发件人邮箱授权码')
# 数据库相关参数
parser.add_argument('--db-path', type=str, default='/root/autodl-tmp/TravelContentCreator/distribution.db',
help='数据库文件路径')
# 内容筛选参数
parser.add_argument('--product', type=str, default=None,
help='指定产品名称进行筛选')
parser.add_argument('--object', type=str, default=None,
help='指定景点名称进行筛选')
parser.add_argument('--undistributed-only', action='store_true',
help='只选择未分发的内容')
# 用户筛选参数
parser.add_argument('--user-id', type=int, default=None,
help='指定用户ID进行分发')
parser.add_argument('--user-email', type=str, default=None,
help='指定用户邮箱进行分发')
parser.add_argument('--max-users', type=int, default=None,
help='最大用户数量')
# 可选参数 # 可选参数
parser.add_argument('--article-per-user', type=int, default=3, parser.add_argument('--article-per-user', type=int, default=3,
help='每个用户分配的文章数量') help='每个用户分配的文章数量')
parser.add_argument('--max-send-count', type=int, default=None,
help='最大发送数量限制')
parser.add_argument('--subject', type=str, default='您的旅游内容创作已就绪', parser.add_argument('--subject', type=str, default='您的旅游内容创作已就绪',
help='邮件主题') help='邮件主题')
parser.add_argument('--smtp-server', type=str, default='smtp.163.com', parser.add_argument('--smtp-server', type=str, default='smtp.163.com',
@ -59,25 +73,200 @@ def parse_arguments():
help='SMTP服务器端口') help='SMTP服务器端口')
parser.add_argument('--use-ssl', action='store_true', parser.add_argument('--use-ssl', action='store_true',
help='使用SSL连接SMTP服务器') help='使用SSL连接SMTP服务器')
parser.add_argument('--email-column', type=str, default='达人邮箱',
help='用户CSV中邮箱列的名称')
parser.add_argument('--username-column', type=str, default='小红书ID',
help='用户CSV中用户名列的名称')
parser.add_argument('--judge-only-success', action='store_true', parser.add_argument('--judge-only-success', action='store_true',
help='只分发审核成功的内容') help='只分发审核成功的内容')
parser.add_argument('--test-mode', action='store_true', parser.add_argument('--test-mode', action='store_true',
help='测试模式,不实际发送邮件') help='测试模式,不实际发送邮件')
parser.add_argument('--delay', type=int, default=2, parser.add_argument('--delay', type=int, default=2,
help='每封邮件发送之间的延迟时间(秒)') help='每封邮件发送之间的延迟时间(秒)')
parser.add_argument('--previous-distribution', type=str, default=None,
help='上一次分发结果CSV或报告文件路径用于避免重复发送')
parser.add_argument('--skip-sent-success', action='store_true',
help='跳过上次成功发送的文章')
parser.add_argument('--zip-filename', type=str, default=None, parser.add_argument('--zip-filename', type=str, default=None,
help='指定ZIP压缩包的基本文件名不含扩展名"文旅小红书带货笔记内容0512"') help='指定ZIP压缩包的基本文件名不含扩展名"文旅小红书带货笔记内容0512"')
return parser.parse_args() return parser.parse_args()
def create_database_connection(db_path):
"""创建到SQLite数据库的连接"""
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row # 使结果以字典形式访问
return conn
except sqlite3.Error as e:
logger.error(f"连接数据库失败: {e}")
return None
def query_contents_from_database(conn, product=None, object=None, judge_only_success=False, undistributed_only=False):
"""从数据库查询内容"""
try:
cursor = conn.cursor()
# 构建查询条件
conditions = []
params = []
if product:
conditions.append("product LIKE ?")
params.append(f"%{product}%")
if object:
conditions.append("object LIKE ?")
params.append(f"%{object}%")
if judge_only_success:
conditions.append("judge_status = 1")
if undistributed_only:
conditions.append("is_distributed = 0")
# 构建SQL查询
sql = "SELECT * FROM contents"
if conditions:
sql += " WHERE " + " AND ".join(conditions)
# 执行查询
cursor.execute(sql, params)
results = cursor.fetchall()
# 转换为DataFrame
content_list = []
for row in results:
content_list.append({
'id': row['id'], # 内部ID用于数据库关联
'EntryID': row['entry_id'],
'OutputTxtPath': row['output_txt_path'],
'PosterPath': row['poster_path'],
'ArticleJsonPath': row['article_json_path'],
'Product': row['product'],
'Object': row['object'],
'Date': row['date'],
'Logic': row['logic'],
'JudgeStatus': row['judge_status'],
'IsDistributed': row['is_distributed']
})
content_df = pd.DataFrame(content_list)
# 输出查询统计信息
logger.info(f"从数据库查询到 {len(content_df)} 条内容")
if product:
logger.info(f"按产品筛选: {product}")
if object:
logger.info(f"按景点筛选: {object}")
if judge_only_success:
logger.info("仅显示审核通过的内容")
if undistributed_only:
logger.info("仅显示未分发的内容")
return content_df
except Exception as e:
logger.error(f"从数据库查询内容失败: {e}")
return None
def query_users_from_database(conn, user_id=None, user_email=None, max_users=None):
"""从数据库查询用户"""
try:
cursor = conn.cursor()
# 构建查询条件
conditions = []
params = []
if user_id:
conditions.append("id = ?")
params.append(user_id)
if user_email:
conditions.append("email LIKE ?")
params.append(f"%{user_email}%")
# 构建SQL查询
sql = "SELECT id, email, username FROM users"
if conditions:
sql += " WHERE " + " AND ".join(conditions)
# 限制用户数量
if max_users:
sql += f" LIMIT {max_users}"
# 执行查询
cursor.execute(sql, params)
results = cursor.fetchall()
# 转换为DataFrame
user_list = []
for row in results:
user_list.append({
'id': row['id'],
'email': row['email'],
'username': row['username'] or row['email'].split('@')[0]
})
user_df = pd.DataFrame(user_list)
# 输出查询统计信息
logger.info(f"从数据库查询到 {len(user_df)} 个用户")
if user_id:
logger.info(f"按用户ID筛选: {user_id}")
if user_email:
logger.info(f"按用户邮箱筛选: {user_email}")
if max_users:
logger.info(f"限制最大用户数: {max_users}")
return user_df
except Exception as e:
logger.error(f"从数据库查询用户失败: {e}")
return None
def allocate_content_to_users(users_df, content_df, article_per_user):
"""为用户分配内容"""
try:
# 创建用户列表
users = []
for _, row in users_df.iterrows():
users.append({
'id': row['id'],
'email': row['email'],
'username': row['username'],
'contents': []
})
# 转换为记录列表
content_list = content_df.to_dict('records')
if not content_list:
logger.warning("没有可用内容进行分配")
return []
# 随机打乱内容列表
random.shuffle(content_list)
# 为每个用户分配内容
content_allocated = []
content_index = 0
for user in users:
user_contents = []
for _ in range(article_per_user):
if content_index >= len(content_list):
content_index = 0 # 如果内容不够,循环使用
logger.warning("内容不足,将循环使用现有内容")
content = content_list[content_index]
user_contents.append(content)
content_allocated.append(content)
content_index += 1
user['contents'] = user_contents
logger.info(f"已为 {len(users)} 个用户分配 {len(content_allocated)} 条内容")
unique_content_count = len(set([c['EntryID'] for c in content_allocated]))
logger.info(f"分配的唯一内容条数: {unique_content_count}")
return users
except Exception as e:
logger.error(f"分配内容失败: {e}")
return []
def find_additional_images(file_path): def find_additional_images(file_path):
"""查找与文章相关的所有额外图片""" """查找与文章相关的所有额外图片"""
if not file_path or pd.isna(file_path) or not os.path.exists(file_path): if not file_path or pd.isna(file_path) or not os.path.exists(file_path):
@ -105,161 +294,6 @@ def find_additional_images(file_path):
logger.info(f"找到 {len(image_files)} 张额外图片: {additional_images_dir}") logger.info(f"找到 {len(image_files)} 张额外图片: {additional_images_dir}")
return image_files return image_files
def read_user_csv(user_csv_path, email_column, username_column):
"""读取用户CSV文件"""
try:
df = pd.read_csv(user_csv_path)
# 检查必要的列是否存在
if email_column not in df.columns:
logger.error(f"用户CSV中缺少邮箱列 '{email_column}'")
return None
# 过滤有效邮箱(非空且包含@符号)
df = df[df[email_column].notna()]
df = df[df[email_column].astype(str).str.contains('@')]
# 获取用户名列,如果不存在则创建默认用户名
if username_column not in df.columns:
logger.warning(f"用户CSV中缺少用户名列 '{username_column}',使用邮箱前缀作为用户名")
df[username_column] = df[email_column].apply(lambda x: x.split('@')[0])
logger.info(f"成功读取 {len(df)} 个有效用户")
return df
except Exception as e:
logger.error(f"读取用户CSV失败: {e}")
return None
def read_manifest_csv(manifest_csv_path, judge_only_success=False):
"""读取内容清单CSV文件"""
try:
df = pd.read_csv(manifest_csv_path)
# 检查必要的列是否存在
required_columns = ['OutputTxtPath', 'PosterPath', 'ArticleJsonPath']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
logger.error(f"内容清单CSV中缺少必要列: {', '.join(missing_columns)}")
return None
# 过滤审核成功的内容
if judge_only_success and 'JudgeStatus' in df.columns:
original_count = len(df)
df = df[df['JudgeStatus'] == True]
logger.info(f"只保留审核成功的内容,从 {original_count} 条过滤为 {len(df)}")
logger.info(f"成功读取 {len(df)} 条内容")
return df
except Exception as e:
logger.error(f"读取内容清单CSV失败: {e}")
return None
def read_previous_distribution(previous_file, skip_sent_success=True):
"""读取上一次的分发结果获取已发放过的文章ID"""
if not previous_file or not os.path.exists(previous_file):
logger.info("未提供上一次分发结果文件或文件不存在,将不过滤已发放文章")
return set()
try:
df = pd.read_csv(previous_file)
# 确定文件类型并提取相关列
if 'entry_id' in df.columns: # distribution.csv 或 email_results.csv
if 'send_status' in df.columns and skip_sent_success:
# 只过滤成功发送的
already_sent = df[df['send_status'].str.contains('success', na=False)]['entry_id'].unique()
else:
# 过滤所有分配过的
already_sent = df['entry_id'].unique()
elif 'EntryID' in df.columns: # manifest_with_dist.csv
if 'sent_success' in df.columns and skip_sent_success:
# 只过滤成功发送的
already_sent = df[df['sent_success'] > 0]['EntryID'].unique()
else:
# 过滤所有分配过的
already_sent = df[df['assigned_count'] > 0]['EntryID'].unique()
elif 'entry_id' in df.columns: # distribution_summary.csv
if 'success' in df.columns and skip_sent_success:
# 只过滤成功发送的
already_sent = df[df['success'] > 0]['entry_id'].unique()
else:
# 过滤所有分配过的
already_sent = df['entry_id'].unique()
else:
logger.warning(f"无法识别的分发结果文件格式: {previous_file}")
return set()
already_sent_set = set(already_sent)
logger.info(f"从上一次分发结果中找到 {len(already_sent_set)} 篇已发放文章")
return already_sent_set
except Exception as e:
logger.error(f"读取上一次分发结果失败: {e}")
return set()
def allocate_content_to_users(users_df, content_df, article_per_user,
email_column, username_column, max_send_count=None,
already_sent_ids=None):
"""为用户分配内容"""
try:
# 创建用户列表
users = []
for _, row in users_df.iterrows():
email = row[email_column]
username = row[username_column] if not pd.isna(row[username_column]) else f"用户_{_}"
users.append({
'email': email,
'username': username,
'contents': []
})
# 转换为记录列表并过滤已发放的文章
content_list = content_df.to_dict('records')
if already_sent_ids:
original_count = len(content_list)
content_list = [c for c in content_list if c['EntryID'] not in already_sent_ids]
filtered_count = original_count - len(content_list)
logger.info(f"过滤掉 {filtered_count} 篇已发放文章,剩余 {len(content_list)} 篇可用")
if not content_list:
logger.warning("过滤后没有可用文章,请提供新内容或关闭过滤功能")
return []
# 随机打乱内容列表
random.shuffle(content_list)
# 为每个用户分配内容
content_allocated = []
content_index = 0
# 限制最大发送数量
if max_send_count is not None and max_send_count > 0:
users = users[:max_send_count]
logger.info(f"限制发送用户数量为 {max_send_count}")
for user in users:
user_contents = []
for _ in range(article_per_user):
if content_index >= len(content_list):
content_index = 0 # 如果内容不够,循环使用
logger.warning("内容不足,将循环使用现有内容")
content = content_list[content_index]
user_contents.append(content)
content_allocated.append(content)
content_index += 1
user['contents'] = user_contents
logger.info(f"已为 {len(users)} 个用户分配 {len(content_allocated)} 条内容")
unique_content_count = len(set([c['EntryID'] for c in content_allocated]))
logger.info(f"分配的唯一内容条数: {unique_content_count}")
return users
except Exception as e:
logger.error(f"分配内容失败: {e}")
return []
def prepare_distribution_csv(users_with_content, output_dir): def prepare_distribution_csv(users_with_content, output_dir):
"""准备分发CSV文件""" """准备分发CSV文件"""
try: try:
@ -274,15 +308,15 @@ def prepare_distribution_csv(users_with_content, output_dir):
additional_images = find_additional_images(output_txt_path) additional_images = find_additional_images(output_txt_path)
rows.append({ rows.append({
'user_id': user['id'],
'email': user['email'], 'email': user['email'],
'username': user['username'], 'username': user['username'],
'content_id': content['id'],
'entry_id': content['EntryID'],
'file_path': content['OutputTxtPath'], 'file_path': content['OutputTxtPath'],
'poster_path': content['PosterPath'], 'poster_path': content['PosterPath'],
'article_json_path': content['ArticleJsonPath'], 'article_json_path': content['ArticleJsonPath'],
'additional_images': ';'.join(additional_images), # 保存为分号分隔的字符串 'additional_images': ';'.join(additional_images), # 保存为分号分隔的字符串
'entry_id': content['EntryID'],
'topic_index': content.get('TopicIndex', ''),
'variant_index': content.get('VariantIndex', ''),
'product': content.get('Product', ''), 'product': content.get('Product', ''),
'object': content.get('Object', ''), 'object': content.get('Object', ''),
'date': content.get('Date', ''), 'date': content.get('Date', ''),
@ -423,6 +457,7 @@ def send_emails(distribution_csv, output_dir, email_from, email_password,
if not files: if not files:
logger.warning(f"邮箱 {email} 没有有效的附件文件,跳过") logger.warning(f"邮箱 {email} 没有有效的附件文件,跳过")
results["details"].append({ results["details"].append({
"user_id": rows[0]['user_id'],
"email": email, "email": email,
"status": "skipped", "status": "skipped",
"reason": "没有有效的附件文件", "reason": "没有有效的附件文件",
@ -447,6 +482,7 @@ def send_emails(distribution_csv, output_dir, email_from, email_password,
else: else:
logger.error(f"为邮箱 {email} 创建ZIP文件失败跳过") logger.error(f"为邮箱 {email} 创建ZIP文件失败跳过")
results["details"].append({ results["details"].append({
"user_id": rows[0]['user_id'],
"email": email, "email": email,
"status": "failed", "status": "failed",
"reason": "创建ZIP文件失败", "reason": "创建ZIP文件失败",
@ -461,12 +497,14 @@ def send_emails(distribution_csv, output_dir, email_from, email_password,
email_content = f"""您好!请查收今日带货笔记(文案+配图),内容在文件压缩包内。具体挂载商品等操作流程请查看对应达人微信群内信息。 email_content = f"""您好!请查收今日带货笔记(文案+配图),内容在文件压缩包内。具体挂载商品等操作流程请查看对应达人微信群内信息。
共包含{entries_count}篇文章内容请按照要求发布
""" """
# 发送邮件 # 发送邮件
if test_mode: if test_mode:
logger.info(f"测试模式: 模拟发送邮件到 {email}") logger.info(f"测试模式: 模拟发送邮件到 {email}")
results["details"].append({ results["details"].append({
"user_id": rows[0]['user_id'],
"email": email, "email": email,
"status": "success (test mode)", "status": "success (test mode)",
"files": files "files": files
@ -488,6 +526,7 @@ def send_emails(distribution_csv, output_dir, email_from, email_password,
if success: if success:
results["success"] += 1 results["success"] += 1
results["details"].append({ results["details"].append({
"user_id": rows[0]['user_id'],
"email": email, "email": email,
"status": "success", "status": "success",
"files": files "files": files
@ -495,6 +534,7 @@ def send_emails(distribution_csv, output_dir, email_from, email_password,
else: else:
results["failed"] += 1 results["failed"] += 1
results["details"].append({ results["details"].append({
"user_id": rows[0]['user_id'],
"email": email, "email": email,
"status": "failed", "status": "failed",
"reason": "SMTP发送失败", "reason": "SMTP发送失败",
@ -549,7 +589,74 @@ def send_emails(distribution_csv, output_dir, email_from, email_password,
logger.error(f"发送邮件过程中发生错误: {e}") logger.error(f"发送邮件过程中发生错误: {e}")
return None return None
def generate_distribution_report(distribution_csv, send_result_csv, manifest_csv, output_dir): def record_distribution_to_database(conn, distribution_csv, send_result_csv):
"""记录分发结果到数据库"""
try:
# 读取分发CSV
dist_df = pd.read_csv(distribution_csv)
# 读取发送结果CSV
if send_result_csv and os.path.exists(send_result_csv):
result_df = pd.read_csv(send_result_csv)
# 添加发送状态
email_status = {}
for _, row in result_df.iterrows():
email_status[row['email']] = row['status']
dist_df['send_status'] = dist_df['email'].map(email_status)
else:
dist_df['send_status'] = 'unknown'
# 记录到distributions表
cursor = conn.cursor()
batch_id = f"batch_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# 添加记录
insertion_count = 0
for _, row in dist_df.iterrows():
try:
user_id = row['user_id']
content_id = row['content_id']
# 检查是否成功发送
success = 'success' in str(row.get('send_status', '')).lower()
# 插入分发记录
cursor.execute("""
INSERT INTO distributions
(content_id, user_id, distribution_date, send_status, batch_id)
VALUES (?, ?, ?, ?, ?)
""", (
content_id,
user_id,
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
row.get('send_status', 'unknown'),
batch_id
))
insertion_count += 1
# 如果成功发送,更新内容分发状态
if success:
cursor.execute("""
UPDATE contents
SET is_distributed = 1
WHERE id = ?
""", (content_id,))
except Exception as e:
logger.warning(f"记录分发结果失败: {row.get('entry_id')} -> {row.get('email')}, 错误: {e}")
conn.commit()
logger.info(f"已记录 {insertion_count} 条分发记录到数据库批次ID: {batch_id}")
return True
except Exception as e:
logger.error(f"记录分发结果到数据库失败: {e}")
conn.rollback()
return False
def generate_distribution_report(conn, distribution_csv, send_result_csv, output_dir):
"""生成分发报告""" """生成分发报告"""
try: try:
# 读取分发CSV # 读取分发CSV
@ -573,6 +680,7 @@ def generate_distribution_report(distribution_csv, send_result_csv, manifest_csv
entry_id = row['entry_id'] entry_id = row['entry_id']
if entry_id not in article_stats: if entry_id not in article_stats:
article_stats[entry_id] = { article_stats[entry_id] = {
'content_id': row['content_id'],
'total_assigned': 0, 'total_assigned': 0,
'sent_success': 0, 'sent_success': 0,
'sent_failed': 0, 'sent_failed': 0,
@ -592,41 +700,43 @@ def generate_distribution_report(distribution_csv, send_result_csv, manifest_csv
else: else:
article_stats[entry_id]['sent_unknown'] += 1 article_stats[entry_id]['sent_unknown'] += 1
# 读取原始清单
manifest_df = pd.read_csv(manifest_csv)
# 创建带分发状态的清单
manifest_with_dist = manifest_df.copy()
manifest_with_dist['assigned_count'] = manifest_with_dist['EntryID'].map(
{k: v['total_assigned'] for k, v in article_stats.items()})
manifest_with_dist['sent_success'] = manifest_with_dist['EntryID'].map(
{k: v['sent_success'] for k, v in article_stats.items()})
# 填充NaN值
manifest_with_dist['assigned_count'] = manifest_with_dist['assigned_count'].fillna(0).astype(int)
manifest_with_dist['sent_success'] = manifest_with_dist['sent_success'].fillna(0).astype(int)
# 添加是否被分发的标记
manifest_with_dist['is_distributed'] = manifest_with_dist['assigned_count'] > 0
# 保存报告
report_csv = os.path.join(output_dir, f"distribution_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
manifest_with_dist.to_csv(report_csv, index=False)
# 创建简洁的分发摘要报告 # 创建简洁的分发摘要报告
summary_data = [] summary_data = []
cursor = conn.cursor()
for entry_id, stats in article_stats.items(): for entry_id, stats in article_stats.items():
summary_data.append({ # 从数据库获取内容详情
'entry_id': entry_id, cursor.execute("SELECT * FROM contents WHERE id = ?", (stats['content_id'],))
'product': stats['product'], content = cursor.fetchone()
'object': stats['object'],
'date': stats['date'], if content:
'logic': stats['logic'], summary_data.append({
'assigned': stats['total_assigned'], 'entry_id': entry_id,
'success': stats['sent_success'], 'content_id': stats['content_id'],
'failed': stats['sent_failed'], 'product': stats['product'] or content['product'],
'unknown': stats['sent_unknown'] 'object': stats['object'] or content['object'],
}) 'date': stats['date'] or content['date'],
'logic': stats['logic'] or content['logic'],
'judge_status': content['judge_status'],
'is_distributed': content['is_distributed'],
'assigned': stats['total_assigned'],
'success': stats['sent_success'],
'failed': stats['sent_failed'],
'unknown': stats['sent_unknown']
})
else:
summary_data.append({
'entry_id': entry_id,
'content_id': stats['content_id'],
'product': stats['product'],
'object': stats['object'],
'date': stats['date'],
'logic': stats['logic'],
'assigned': stats['total_assigned'],
'success': stats['sent_success'],
'failed': stats['sent_failed'],
'unknown': stats['sent_unknown']
})
if summary_data: if summary_data:
summary_df = pd.DataFrame(summary_data) summary_df = pd.DataFrame(summary_data)
@ -640,29 +750,40 @@ def generate_distribution_report(distribution_csv, send_result_csv, manifest_csv
json.dump(article_stats, f, indent=2) json.dump(article_stats, f, indent=2)
# 统计摘要 # 统计摘要
total_articles = len(manifest_df) cursor.execute("SELECT COUNT(*) FROM contents")
distributed_articles = sum(1 for _, v in article_stats.items() if v['total_assigned'] > 0) total_articles = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM contents WHERE is_distributed = 1")
distributed_articles = cursor.fetchone()[0]
success_sent = sum(1 for _, v in article_stats.items() if v['sent_success'] > 0) success_sent = sum(1 for _, v in article_stats.items() if v['sent_success'] > 0)
logger.info(f"分发报告已保存到: {report_csv}")
logger.info(f"文章统计已保存到: {stats_json}") logger.info(f"文章统计已保存到: {stats_json}")
logger.info("\n===== 分发统计 =====") logger.info("\n===== 分发统计 =====")
logger.info(f"总文章数: {total_articles}") logger.info(f"总文章数: {total_articles}")
logger.info(f"已分配文章数: {distributed_articles} ({distributed_articles/total_articles*100:.2f}%)") logger.info(f"已分配文章数: {distributed_articles} ({distributed_articles/total_articles*100:.2f}%)")
logger.info(f"成功发送文章数: {success_sent} ({success_sent/total_articles*100:.2f}%)") logger.info(f"本次成功发送文章数: {success_sent}")
# 按产品统计 # 按产品统计
if 'Product' in manifest_df.columns: cursor.execute("""
product_stats = manifest_with_dist.groupby('Product')['is_distributed'].value_counts().unstack().fillna(0) SELECT product,
if not product_stats.empty: COUNT(*) as total,
logger.info("\n===== 按产品分发统计 =====") SUM(CASE WHEN is_distributed = 1 THEN 1 ELSE 0 END) as distributed
for product, row in product_stats.iterrows(): FROM contents
if True in row: GROUP BY product
distributed = row.get(True, 0) """)
total = row.sum() product_stats = cursor.fetchall()
logger.info(f"产品 '{product}': {distributed}/{total} ({distributed/total*100:.2f}%)")
return report_csv if product_stats:
logger.info("\n===== 按产品分发统计 =====")
for row in product_stats:
product = row['product']
total = row['total']
distributed = row['distributed']
if total > 0:
logger.info(f"产品 '{product}': {distributed}/{total} ({distributed/total*100:.2f}%)")
return stats_json
except Exception as e: except Exception as e:
logger.error(f"生成分发报告失败: {e}") logger.error(f"生成分发报告失败: {e}")
return None return None
@ -673,51 +794,73 @@ def main():
# 创建输出目录 # 创建输出目录
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
# 读取用户CSV # 连接数据库
users_df = read_user_csv(args.user_csv, args.email_column, args.username_column) conn = create_database_connection(args.db_path)
if users_df is None: if not conn:
logger.error("无法处理用户CSV,程序退出") logger.error("无法连接到数据库,程序退出")
return return
# 读取内容清单CSV try:
content_df = read_manifest_csv(args.manifest_csv, args.judge_only_success) # 从数据库查询内容
if content_df is None: content_df = query_contents_from_database(
logger.error("无法处理内容清单CSV程序退出") conn,
return product=args.product,
object=args.object,
# 读取上一次分发结果(如果提供) judge_only_success=args.judge_only_success,
already_sent_ids = None undistributed_only=args.undistributed_only
if args.previous_distribution: )
already_sent_ids = read_previous_distribution(args.previous_distribution, args.skip_sent_success)
if content_df is None or len(content_df) == 0:
# 为用户分配内容 logger.error("没有找到符合条件的内容,程序退出")
users_with_content = allocate_content_to_users( return
users_df, content_df, args.article_per_user,
args.email_column, args.username_column, args.max_send_count, # 从数据库查询用户
already_sent_ids users_df = query_users_from_database(
) conn,
user_id=args.user_id,
if not users_with_content: user_email=args.user_email,
logger.error("内容分配失败,程序退出") max_users=args.max_users
return )
# 准备分发CSV if users_df is None or len(users_df) == 0:
distribution_csv = prepare_distribution_csv(users_with_content, args.output_dir) logger.error("没有找到符合条件的用户,程序退出")
if not distribution_csv: return
logger.error("准备分发CSV失败程序退出")
return # 为用户分配内容
users_with_content = allocate_content_to_users(
# 发送邮件 users_df, content_df, args.article_per_user
send_result_csv = send_emails( )
distribution_csv, args.output_dir, args.email_from, args.email_password,
args.subject, args.smtp_server, args.smtp_port, args.use_ssl, args.test_mode, if not users_with_content:
args.delay, args.zip_filename logger.error("内容分配失败,程序退出")
) return
# 生成分发报告 # 准备分发CSV
generate_distribution_report(distribution_csv, send_result_csv, args.manifest_csv, args.output_dir) distribution_csv = prepare_distribution_csv(users_with_content, args.output_dir)
if not distribution_csv:
logger.info("内容分发流程完成") logger.error("准备分发CSV失败程序退出")
return
# 发送邮件
send_result_csv = send_emails(
distribution_csv, args.output_dir, args.email_from, args.email_password,
args.subject, args.smtp_server, args.smtp_port, args.use_ssl, args.test_mode,
args.delay, args.zip_filename
)
# 记录分发结果到数据库
if not args.test_mode:
record_distribution_to_database(conn, distribution_csv, send_result_csv)
else:
logger.info("测试模式,不记录分发结果到数据库")
# 生成分发报告
generate_distribution_report(conn, distribution_csv, send_result_csv, args.output_dir)
logger.info("内容分发流程完成")
finally:
conn.close()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,6 +1,7 @@
#!/bin/bash #!/bin/bash
# 内容分发系统数据库版启动脚本(完全从数据库获取数据)
# 设置时间戳变量 # 设置默认时间戳变量
TIMESTAMP=$(date +"%Y%m%d_%H%M%S") TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
# 设置路径变量 # 设置路径变量
@ -8,72 +9,195 @@ BASE_DIR="/root/autodl-tmp/TravelContentCreator"
LOG_DIR="$BASE_DIR/log" LOG_DIR="$BASE_DIR/log"
RESULT_DIR="$BASE_DIR/output/$TIMESTAMP" RESULT_DIR="$BASE_DIR/output/$TIMESTAMP"
OUTPUT_DIR="$RESULT_DIR/distribution_results" OUTPUT_DIR="$RESULT_DIR/distribution_results"
# 设置其他变量 DB_PATH="$BASE_DIR/distribution.db"
USER_CSV="$BASE_DIR/output/5.12.csv"
MANIFEST_CSV="$BASE_DIR/output/2025-05-12_18-30-15/manifest_2025-05-12_18-30-15.csv" # 设置邮件相关变量
EMAIL_FROM="zwysendemail@163.com" EMAIL_FROM="zwysendemail@163.com"
EMAIL_PASSWORD="NMhVGFmCJkGEy3B5" EMAIL_PASSWORD="NMhVGFmCJkGEy3B5"
SUBJECT="文旅小红书带货笔记内容0512" SUBJECT="文旅小红书带货笔记内容0513"
# 上一次分发结果文件(如果存在) ZIP_FILENAME="文旅小红书带货笔记内容0513"
PREVIOUS_DIST=""
# 压缩包文件名 # 设置分发配置
ZIP_FILENAME="文旅小红书带货笔记内容0512" ARTICLE_PER_USER=1
MAX_USERS=10 # 最多发送给多少用户,不限制则设置为-1
TEST_MODE=true # 测试模式,不实际发送邮件
JUDGE_ONLY=true # 只分发审核通过的内容
UNDISTRIBUTED_ONLY=true # 只分发未分发的内容
# 内容筛选配置
TARGET_PRODUCT="【奇妙萌可卡牌套票】四季梦幻亲子乐园单人票" # 为空则不筛选特定产品
TARGET_OBJECT="" # 为空则不筛选特定景点
# 用户筛选配置
TARGET_USER_ID="" # 为空则不筛选特定用户ID
TARGET_USER_EMAIL="" # 为空则不筛选特定用户邮箱
# 创建必要的目录 # 创建必要的目录
mkdir -p "$LOG_DIR" mkdir -p "$LOG_DIR"
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
# 将日志同时输出到控制台和日志文件 # 将日志同时输出到控制台和日志文件
LOG_FILE="$LOG_DIR/distribution_$TIMESTAMP.log" LOG_FILE="$LOG_DIR/distribution_db_$TIMESTAMP.log"
exec > >(tee -a "$LOG_FILE") 2>&1 exec > >(tee -a "$LOG_FILE") 2>&1
echo "开始执行分发脚本 - $(date)" echo "==================================================="
echo "内容分发系统启动 (数据库全依赖版) - $(date)"
echo "==================================================="
echo "日志保存在: $LOG_FILE" echo "日志保存在: $LOG_FILE"
echo "结果保存在: $RESULT_DIR" echo "结果保存在: $RESULT_DIR"
# 测试模式运行 # 检查数据库是否存在
echo "在测试模式下运行,不会实际发送邮件..." if [ ! -f "$DB_PATH" ]; then
python scripts/distribute_content.py \ echo "数据库文件不存在: $DB_PATH"
--user-csv "$USER_CSV" \ echo "请先初始化数据库或检查路径是否正确"
--manifest-csv "$MANIFEST_CSV" \ exit 1
--output-dir "$OUTPUT_DIR" \ fi
--email-from "$EMAIL_FROM" \
--email-password "$EMAIL_PASSWORD" \
--subject "$SUBJECT" \
--article-per-user 1 \
--judge-only-success \
--previous-distribution "$PREVIOUS_DIST" \
--skip-sent-success \
--zip-filename "$ZIP_FILENAME"
# 实际发送邮件的命令(取消注释以启用) # 检查内容表是否有记录
# echo "开始实际发送邮件..." CONTENT_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM contents;")
# python scripts/distribute_content.py \ if [ "$CONTENT_COUNT" -eq 0 ]; then
# --user-csv "$USER_CSV" \ echo "警告: 内容表中没有记录,请先导入内容"
# --manifest-csv "$MANIFEST_CSV" \ exit 1
# --output-dir "$OUTPUT_DIR" \ fi
# --email-from "$EMAIL_FROM" \
# --email-password "$EMAIL_PASSWORD" \
# --subject "$SUBJECT" \
# --article-per-user 3 \
# --use-ssl \
# --smtp-port 465 \
# --judge-only-success \
# --max-send-count 10 \ # 限制最多发送给10个用户
# --previous-distribution "$PREVIOUS_DIST" \
# --skip-sent-success \
# --zip-filename "$ZIP_FILENAME"
# 不使用过滤功能的示例 # 检查用户表是否有记录
# python scripts/distribute_content.py \ USER_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM users;")
# --user-csv "$USER_CSV" \ if [ "$USER_COUNT" -eq 0 ]; then
# --manifest-csv "$MANIFEST_CSV" \ echo "警告: 用户表中没有记录,请先导入用户"
# --output-dir "$OUTPUT_DIR" \ exit 1
# --email-from "$EMAIL_FROM" \ fi
# --email-password "$EMAIL_PASSWORD" \
# --subject "$SUBJECT" \
# --article-per-user 3 \
# --judge-only-success \
# --zip-filename "$ZIP_FILENAME"
echo "脚本执行完成 - $(date)" # 数据库状态统计
echo "数据库状态统计:"
sqlite3 "$DB_PATH" <<EOF
.mode column
.headers on
.width 50 10
SELECT '总内容数:' as 统计项, COUNT(*) as 数量 FROM contents;
SELECT '已审核通过内容数:' as 统计项, COUNT(*) as 数量 FROM contents WHERE judge_status = 1;
SELECT '已分发内容数:' as 统计项, COUNT(*) as 数量 FROM contents WHERE is_distributed = 1;
SELECT '未分发已审核内容数:' as 统计项, COUNT(*) as 数量 FROM contents WHERE judge_status = 1 AND is_distributed = 0;
SELECT '总用户数:' as 统计项, COUNT(*) as 数量 FROM users;
SELECT '=== 按产品统计 ===' as '';
.width 50 10 10 10
SELECT
product AS 产品名称,
COUNT(*) AS 总数量,
SUM(CASE WHEN judge_status = 1 THEN 1 ELSE 0 END) AS 已审核数,
SUM(CASE WHEN is_distributed = 1 THEN 1 ELSE 0 END) AS 已分发数
FROM contents
GROUP BY product;
EOF
# 构建命令行参数
CMD_ARGS="--output-dir \"$OUTPUT_DIR\" --email-from \"$EMAIL_FROM\" --email-password \"$EMAIL_PASSWORD\" --subject \"$SUBJECT\" --db-path \"$DB_PATH\""
# 添加内容筛选参数
if [ -n "$TARGET_PRODUCT" ]; then
CMD_ARGS="$CMD_ARGS --product \"$TARGET_PRODUCT\""
echo "筛选目标产品: $TARGET_PRODUCT"
fi
if [ -n "$TARGET_OBJECT" ]; then
CMD_ARGS="$CMD_ARGS --object \"$TARGET_OBJECT\""
echo "筛选目标景点: $TARGET_OBJECT"
fi
# 添加用户筛选参数
if [ -n "$TARGET_USER_ID" ]; then
CMD_ARGS="$CMD_ARGS --user-id $TARGET_USER_ID"
echo "筛选目标用户ID: $TARGET_USER_ID"
fi
if [ -n "$TARGET_USER_EMAIL" ]; then
CMD_ARGS="$CMD_ARGS --user-email \"$TARGET_USER_EMAIL\""
echo "筛选目标用户邮箱: $TARGET_USER_EMAIL"
fi
if [ "$MAX_USERS" -gt 0 ]; then
CMD_ARGS="$CMD_ARGS --max-users $MAX_USERS"
echo "限制最大用户数: $MAX_USERS"
fi
# 添加其他参数
if [ "$JUDGE_ONLY" = true ]; then
CMD_ARGS="$CMD_ARGS --judge-only-success"
fi
if [ "$UNDISTRIBUTED_ONLY" = true ]; then
CMD_ARGS="$CMD_ARGS --undistributed-only"
fi
if [ "$TEST_MODE" = true ]; then
CMD_ARGS="$CMD_ARGS --test-mode"
echo "注意: 当前为测试模式,不会实际发送邮件"
fi
# 设置每用户文章数量
CMD_ARGS="$CMD_ARGS --article-per-user $ARTICLE_PER_USER"
# 设置ZIP文件名
if [ -n "$ZIP_FILENAME" ]; then
CMD_ARGS="$CMD_ARGS --zip-filename \"$ZIP_FILENAME\""
fi
# 执行分发命令
echo "执行内容分发命令..."
echo "参数: $CMD_ARGS"
echo "---------------------------------------------------"
# 使用eval执行命令确保参数被正确解析
eval "python \"$BASE_DIR/scripts/distribution/distribute_content.py\" $CMD_ARGS"
# 检查命令执行结果
if [ $? -eq 0 ]; then
echo "---------------------------------------------------"
echo "分发命令执行成功!"
# 分发后更新数据库统计
echo "分发后数据库统计:"
sqlite3 "$DB_PATH" <<EOF
.mode column
.headers on
.width 50 10
SELECT '总内容数:' as 统计项, COUNT(*) as 数量 FROM contents;
SELECT '已审核通过内容数:' as 统计项, COUNT(*) as 数量 FROM contents WHERE judge_status = 1;
SELECT '已分发内容数:' as 统计项, COUNT(*) as 数量 FROM contents WHERE is_distributed = 1;
SELECT '未分发已审核内容数:' as 统计项, COUNT(*) as 数量 FROM contents WHERE judge_status = 1 AND is_distributed = 0;
SELECT '=== 分发记录统计 ===' as '';
.width 40 10
SELECT COUNT(*) AS '总分发记录数' FROM distributions;
SELECT COUNT(DISTINCT content_id) AS '已分发内容数' FROM distributions;
SELECT COUNT(DISTINCT user_id) AS '已接收用户数' FROM distributions;
SELECT '=== 最近分发批次 ===' as '';
.width 40 20 10
SELECT
batch_id,
substr(distribution_date, 1, 16) as 时间,
COUNT(*) as 记录数
FROM distributions
GROUP BY batch_id
ORDER BY distribution_date DESC
LIMIT 5;
EOF
# 如果是测试模式,给出提示
if [ "$TEST_MODE" = true ]; then
echo "---------------------------------------------------"
echo "测试模式已完成,若要实际发送邮件,请设置 TEST_MODE=false"
echo "然后重新运行此脚本"
fi
else
echo "---------------------------------------------------"
echo "错误: 分发命令执行失败!请检查日志文件和错误信息"
fi
echo "==================================================="
echo "内容分发系统执行完毕 - $(date)"
echo "==================================================="

View File

@ -9,6 +9,127 @@ import traceback
import re import re
import argparse import argparse
from datetime import datetime from datetime import datetime
import sqlite3
import logging
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
# 内置数据库记录功能
def init_database(db_path):
"""初始化数据库,创建表结构"""
try:
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA foreign_keys = ON")
cursor = conn.cursor()
# 创建内容表
cursor.execute("""
CREATE TABLE IF NOT EXISTS contents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entry_id TEXT NOT NULL UNIQUE,
output_txt_path TEXT,
poster_path TEXT,
article_json_path TEXT,
product TEXT,
object TEXT,
date TEXT,
logic TEXT,
judge_status INTEGER,
is_distributed INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# 创建索引
cursor.execute("CREATE INDEX IF NOT EXISTS idx_contents_entry_id ON contents(entry_id)")
conn.commit()
logger.info("数据库初始化成功")
return conn
except sqlite3.Error as e:
logger.error(f"初始化数据库失败: {e}")
return None
def record_to_database(
db_path,
entry_id,
output_txt_path=None,
poster_path=None,
article_json_path=None,
product=None,
object=None,
date=None,
logic=None,
judge_status=None,
is_distributed=0
):
"""将内容记录到数据库"""
try:
# 检查数据库是否存在,如果不存在则初始化
if not os.path.exists(db_path):
logger.info(f"数据库文件不存在: {db_path},将自动创建")
conn = init_database(db_path)
if not conn:
return False
else:
try:
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA foreign_keys = ON")
except sqlite3.Error as e:
logger.error(f"连接数据库失败: {e}")
return False
try:
cursor = conn.cursor()
# 准备数据
data = (
entry_id,
output_txt_path or '',
poster_path or '',
article_json_path or '',
product or '',
object or '',
date or '',
logic or '',
judge_status if judge_status is not None else None,
is_distributed
)
# 插入或更新内容
cursor.execute("""
INSERT OR REPLACE INTO contents
(entry_id, output_txt_path, poster_path, article_json_path,
product, object, date, logic, judge_status, is_distributed)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", data)
conn.commit()
logger.info(f"已将内容 {entry_id} 记录到数据库")
return True
except Exception as e:
logger.error(f"记录内容到数据库失败: {e}")
try:
conn.rollback()
except:
pass
return False
finally:
try:
conn.close()
except:
pass
except Exception as e:
logger.error(f"记录提取内容时发生错误: {e}")
return False
def convert_json_to_txt_content(json_path, prefer_original=False): def convert_json_to_txt_content(json_path, prefer_original=False):
""" """
@ -112,7 +233,7 @@ def load_topic_data(source_dir, run_id):
return topic_data return topic_data
def process_result_directory(source_dir, output_dir, run_id=None, prefer_original=False): def process_result_directory(source_dir, output_dir, run_id=None, prefer_original=False, db_path=None):
""" """
处理指定的结果目录提取内容并渲染到输出目录 处理指定的结果目录提取内容并渲染到输出目录
@ -121,11 +242,19 @@ def process_result_directory(source_dir, output_dir, run_id=None, prefer_origina
output_dir: 输出目录路径 output_dir: 输出目录路径
run_id: 可选的运行ID如果不提供则使用源目录名 run_id: 可选的运行ID如果不提供则使用源目录名
prefer_original: 是否优先使用原始内容无视judge_success结果 prefer_original: 是否优先使用原始内容无视judge_success结果
db_path: 数据库路径若不提供则使用默认路径
""" """
if not os.path.isdir(source_dir): if not os.path.isdir(source_dir):
print(f"错误: 源目录不存在: {source_dir}") print(f"错误: 源目录不存在: {source_dir}")
return return
# 设置默认数据库路径
if db_path is None:
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'
# 数据库是否启用
db_enabled = True
# 创建输出目录 # 创建输出目录
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
print(f"确保输出目录存在: {output_dir}") print(f"确保输出目录存在: {output_dir}")
@ -161,7 +290,9 @@ def process_result_directory(source_dir, output_dir, run_id=None, prefer_origina
"Status", "Status",
"Details", "Details",
"JudgeStatus", "JudgeStatus",
"ContentSource" "ContentSource",
"RecordedInDB",
"IsDistributed"
] ]
] ]
@ -218,7 +349,9 @@ def process_result_directory(source_dir, output_dir, run_id=None, prefer_origina
"Status": "Processing", "Status": "Processing",
"Details": "", "Details": "",
"JudgeStatus": "", "JudgeStatus": "",
"ContentSource": "unknown" "ContentSource": "unknown",
"RecordedInDB": "No",
"IsDistributed": "No"
} }
# 创建输出条目目录 # 创建输出条目目录
@ -237,8 +370,9 @@ def process_result_directory(source_dir, output_dir, run_id=None, prefer_origina
record["ArticleJsonPath"] = json_path record["ArticleJsonPath"] = json_path
record["OutputTxtPath"] = txt_path record["OutputTxtPath"] = txt_path
# 读取article.json
article_data = {}
if os.path.exists(json_path): if os.path.exists(json_path):
# 读取article.json
try: try:
with open(json_path, 'r', encoding='utf-8') as f_json: with open(json_path, 'r', encoding='utf-8') as f_json:
article_data = json.load(f_json) article_data = json.load(f_json)
@ -332,6 +466,46 @@ def process_result_directory(source_dir, output_dir, run_id=None, prefer_origina
record["Status"] = "Success" record["Status"] = "Success"
record["Details"] = "处理成功完成" record["Details"] = "处理成功完成"
# 4. 将内容记录到数据库
if db_enabled:
try:
# 准备judge_status值
if record["JudgeStatus"] == "True":
judge_status = 1
elif record["JudgeStatus"] == "False":
judge_status = 0
else:
judge_status = None
# 调用数据库记录函数
success = record_to_database(
db_path,
entry_id=record["EntryID"],
output_txt_path=record["OutputTxtPath"],
poster_path=record["PosterPath"],
article_json_path=record["ArticleJsonPath"],
product=record["Product"],
object=record["Object"],
date=record["Date"],
logic=record["Logic"],
judge_status=judge_status,
is_distributed=0 # 默认为未分发
)
if success:
record["RecordedInDB"] = "Yes"
print(f" - 成功将内容记录到数据库")
else:
record["RecordedInDB"] = "Failed"
print(f" - 警告: 内容记录到数据库失败")
except Exception as e:
record["RecordedInDB"] = "Error"
print(f" - 错误: 记录到数据库时发生异常: {e}")
traceback.print_exc() # 打印详细的异常堆栈
else:
record["RecordedInDB"] = "Disabled"
print(f" - 信息: 数据库记录功能已禁用")
# 添加记录到CSV数据 # 添加记录到CSV数据
csv_data.append([record[col] for col in csv_data[0]]) csv_data.append([record[col] for col in csv_data[0]])
@ -355,14 +529,17 @@ def main():
parser.add_argument("--output", type=str, help="输出目录路径") parser.add_argument("--output", type=str, help="输出目录路径")
parser.add_argument("--run-id", type=str, help="自定义运行ID") parser.add_argument("--run-id", type=str, help="自定义运行ID")
parser.add_argument("--prefer-original", action="store_true", help="优先使用原始内容,忽略审核结果") parser.add_argument("--prefer-original", action="store_true", help="优先使用原始内容,忽略审核结果")
parser.add_argument("--db-path", type=str, help="数据库路径,若不提供则使用默认路径")
parser.add_argument("--disable-db", action="store_true", help="禁用数据库记录功能")
args = parser.parse_args() args = parser.parse_args()
# 默认值设置 # 默认值设置
source = args.source if args.source else "/root/autodl-tmp/TravelContentCreator/result/2025-05-12_18-30-15" source = args.source if args.source else "/root/autodl-tmp/TravelContentCreator/result/2025-05-12_21-36-33"
output = args.output if args.output else "/root/autodl-tmp/TravelContentCreator/output/2025-05-12_18-30-15" output = args.output if args.output else "/root/autodl-tmp/TravelContentCreator/output/2025-05-12_21-36-33"
run_id = args.run_id if args.run_id else os.path.basename(source) run_id = args.run_id if args.run_id else os.path.basename(source)
prefer_original = args.prefer_original prefer_original = args.prefer_original
db_path = args.db_path if args.db_path else '/root/autodl-tmp/TravelContentCreator/distribution.db'
print("-" * 60) print("-" * 60)
print(f"开始提取和渲染流程") print(f"开始提取和渲染流程")
@ -373,9 +550,14 @@ def main():
print("内容模式: 优先使用原始内容") print("内容模式: 优先使用原始内容")
else: else:
print("内容模式: 根据审核结果选择内容") print("内容模式: 根据审核结果选择内容")
if args.disable_db:
print("数据库记录: 已禁用")
else:
print(f"数据库记录: 已启用 (路径: {db_path})")
print("-" * 60) print("-" * 60)
process_result_directory(source, output, run_id, prefer_original) process_result_directory(source, output, run_id, prefer_original, db_path)
print("\n脚本执行完毕.") print("\n脚本执行完毕.")