1042 lines
42 KiB
Python
Executable File
1042 lines
42 KiB
Python
Executable File
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
import os
|
||
import sys
|
||
import pandas as pd
|
||
import argparse
|
||
import random
|
||
import logging
|
||
import sqlite3
|
||
from datetime import datetime
|
||
import json
|
||
import smtplib
|
||
import zipfile
|
||
import time
|
||
import glob
|
||
from email.mime.text import MIMEText
|
||
from email.mime.multipart import MIMEMultipart
|
||
from email.mime.application import MIMEApplication
|
||
from email.header import Header
|
||
|
||
# 配置日志
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||
handlers=[
|
||
logging.FileHandler(f"content_distribution_db_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"),
|
||
logging.StreamHandler()
|
||
]
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def parse_arguments():
|
||
"""解析命令行参数"""
|
||
parser = argparse.ArgumentParser(description='内容分发 (数据库版)')
|
||
|
||
# 必需参数
|
||
parser.add_argument('--output-dir', type=str, default='distribution_results',
|
||
help='分发结果输出目录')
|
||
parser.add_argument('--email-from', type=str, required=True,
|
||
help='发件人邮箱地址')
|
||
parser.add_argument('--email-password', type=str, required=True,
|
||
help='发件人邮箱授权码')
|
||
|
||
# 数据库相关参数
|
||
parser.add_argument('--db-path', type=str, default='/root/autodl-tmp/TravelContentCreator/distribution.db',
|
||
help='数据库文件路径')
|
||
|
||
# 内容筛选参数
|
||
parser.add_argument('--product', type=str, default=None,
|
||
help='指定产品名称进行筛选')
|
||
parser.add_argument('--object', type=str, default=None,
|
||
help='指定景点名称进行筛选')
|
||
parser.add_argument('--undistributed-only', action='store_true',
|
||
help='只选择未分发的内容')
|
||
|
||
# 用户筛选参数
|
||
parser.add_argument('--user-id', type=int, default=None,
|
||
help='指定用户ID进行分发')
|
||
parser.add_argument('--user-email', type=str, default=None,
|
||
help='指定用户邮箱进行分发')
|
||
parser.add_argument('--max-users', type=int, default=None,
|
||
help='最大用户数量')
|
||
|
||
# 可选参数
|
||
parser.add_argument('--article-per-user', type=int, default=3,
|
||
help='每个用户分配的文章数量')
|
||
parser.add_argument('--subject', type=str, default='您的旅游内容创作已就绪',
|
||
help='邮件主题')
|
||
parser.add_argument('--smtp-server', type=str, default='smtp.163.com',
|
||
help='SMTP服务器地址')
|
||
parser.add_argument('--smtp-port', type=int, default=25,
|
||
help='SMTP服务器端口')
|
||
parser.add_argument('--use-ssl', action='store_true',
|
||
help='使用SSL连接SMTP服务器')
|
||
parser.add_argument('--judge-only-success', action='store_true',
|
||
help='只分发审核成功的内容')
|
||
parser.add_argument('--test-mode', action='store_true',
|
||
help='测试模式,不实际发送邮件')
|
||
parser.add_argument('--test-email', type=str, default=None,
|
||
help='测试邮箱地址,所有邮件都会发送到这个邮箱')
|
||
parser.add_argument('--delay', type=int, default=2,
|
||
help='每封邮件发送之间的延迟时间(秒)')
|
||
parser.add_argument('--zip-filename', type=str, default=None,
|
||
help='指定ZIP压缩包的基本文件名(不含扩展名),如"文旅小红书带货笔记内容0512"')
|
||
# 新增强制性附件参数
|
||
parser.add_argument('--force-attachments', type=str, default=None,
|
||
help='强制添加的附件文件路径,多个文件用逗号分隔')
|
||
|
||
return parser.parse_args()
|
||
|
||
def create_database_connection(db_path):
|
||
"""创建到SQLite数据库的连接"""
|
||
try:
|
||
conn = sqlite3.connect(db_path)
|
||
conn.row_factory = sqlite3.Row # 使结果以字典形式访问
|
||
return conn
|
||
except sqlite3.Error as e:
|
||
logger.error(f"连接数据库失败: {e}")
|
||
return None
|
||
|
||
def query_contents_from_database(conn, product=None, object=None, judge_only_success=False, undistributed_only=False):
|
||
"""从数据库查询内容"""
|
||
try:
|
||
cursor = conn.cursor()
|
||
|
||
# 构建查询条件
|
||
conditions = []
|
||
params = []
|
||
|
||
if product:
|
||
conditions.append("product LIKE ?")
|
||
params.append(f"%{product}%")
|
||
|
||
if object:
|
||
conditions.append("object LIKE ?")
|
||
params.append(f"%{object}%")
|
||
|
||
if judge_only_success:
|
||
conditions.append("judge_status = 1")
|
||
|
||
if undistributed_only:
|
||
conditions.append("is_distributed = 0")
|
||
|
||
# 构建SQL查询
|
||
sql = "SELECT * FROM contents"
|
||
if conditions:
|
||
sql += " WHERE " + " AND ".join(conditions)
|
||
|
||
# 执行查询
|
||
cursor.execute(sql, params)
|
||
results = cursor.fetchall()
|
||
|
||
# 转换为DataFrame
|
||
content_list = []
|
||
for row in results:
|
||
content_list.append({
|
||
'id': row['id'], # 内部ID,用于数据库关联
|
||
'EntryID': row['entry_id'],
|
||
'OutputTxtPath': row['output_txt_path'],
|
||
'PosterPath': row['poster_path'],
|
||
'ArticleJsonPath': row['article_json_path'],
|
||
'Product': row['product'],
|
||
'Object': row['object'],
|
||
'Date': row['date'],
|
||
'Logic': row['logic'],
|
||
'JudgeStatus': row['judge_status'],
|
||
'IsDistributed': row['is_distributed']
|
||
})
|
||
|
||
content_df = pd.DataFrame(content_list)
|
||
|
||
# 输出查询统计信息
|
||
logger.info(f"从数据库查询到 {len(content_df)} 条内容")
|
||
if product:
|
||
logger.info(f"按产品筛选: {product}")
|
||
if object:
|
||
logger.info(f"按景点筛选: {object}")
|
||
if judge_only_success:
|
||
logger.info("仅显示审核通过的内容")
|
||
if undistributed_only:
|
||
logger.info("仅显示未分发的内容")
|
||
|
||
return content_df
|
||
|
||
except Exception as e:
|
||
logger.error(f"从数据库查询内容失败: {e}")
|
||
return None
|
||
|
||
def query_users_from_database(conn, user_id=None, user_email=None, max_users=None):
|
||
"""从数据库查询用户"""
|
||
try:
|
||
cursor = conn.cursor()
|
||
|
||
# 构建查询条件
|
||
conditions = []
|
||
params = []
|
||
|
||
if user_id:
|
||
conditions.append("id = ?")
|
||
params.append(user_id)
|
||
|
||
if user_email:
|
||
conditions.append("email LIKE ?")
|
||
params.append(f"%{user_email}%")
|
||
|
||
# 构建SQL查询
|
||
sql = "SELECT id, email, username FROM users"
|
||
if conditions:
|
||
sql += " WHERE " + " AND ".join(conditions)
|
||
|
||
# 限制用户数量
|
||
if max_users:
|
||
sql += f" LIMIT {max_users}"
|
||
|
||
# 执行查询
|
||
cursor.execute(sql, params)
|
||
results = cursor.fetchall()
|
||
|
||
# 转换为DataFrame
|
||
user_list = []
|
||
for row in results:
|
||
user_list.append({
|
||
'id': row['id'],
|
||
'email': row['email'],
|
||
'username': row['username'] or row['email'].split('@')[0]
|
||
})
|
||
|
||
user_df = pd.DataFrame(user_list)
|
||
|
||
# 输出查询统计信息
|
||
logger.info(f"从数据库查询到 {len(user_df)} 个用户")
|
||
if user_id:
|
||
logger.info(f"按用户ID筛选: {user_id}")
|
||
if user_email:
|
||
logger.info(f"按用户邮箱筛选: {user_email}")
|
||
if max_users:
|
||
logger.info(f"限制最大用户数: {max_users}")
|
||
|
||
return user_df
|
||
|
||
except Exception as e:
|
||
logger.error(f"从数据库查询用户失败: {e}")
|
||
return None
|
||
|
||
def allocate_content_to_users(users_df, content_df, article_per_user):
|
||
"""为用户分配内容"""
|
||
try:
|
||
# 创建用户列表
|
||
users = []
|
||
users_with_insufficient_content = [] # 新增:跟踪内容不足的用户
|
||
|
||
for _, row in users_df.iterrows():
|
||
users.append({
|
||
'id': row['id'],
|
||
'email': row['email'],
|
||
'username': row['username'],
|
||
'contents': []
|
||
})
|
||
|
||
# 转换为记录列表
|
||
content_list = content_df.to_dict('records')
|
||
if not content_list:
|
||
logger.warning("没有可用内容进行分配")
|
||
return [], []
|
||
|
||
# 随机打乱内容列表
|
||
random.shuffle(content_list)
|
||
|
||
# 为每个用户分配内容
|
||
content_allocated = []
|
||
content_index = 0
|
||
|
||
for user in users:
|
||
user_contents = []
|
||
expected_count = article_per_user # 预期内容数量
|
||
|
||
for _ in range(article_per_user):
|
||
if content_index >= len(content_list):
|
||
# 不再循环使用内容,而是停止分配
|
||
logger.warning("内容不足,只分配可用内容")
|
||
# 如果用户获得的内容少于预期,记录下来
|
||
if len(user_contents) < expected_count:
|
||
users_with_insufficient_content.append({
|
||
'user_id': user['id'],
|
||
'email': user['email'],
|
||
'username': user['username'],
|
||
'expected_content': expected_count,
|
||
'actual_content': len(user_contents)
|
||
})
|
||
break
|
||
|
||
content = content_list[content_index]
|
||
user_contents.append(content)
|
||
content_allocated.append(content)
|
||
content_index += 1
|
||
|
||
user['contents'] = user_contents
|
||
|
||
# 检查是否获得了预期数量的内容
|
||
if len(user_contents) < expected_count:
|
||
users_with_insufficient_content.append({
|
||
'user_id': user['id'],
|
||
'email': user['email'],
|
||
'username': user['username'],
|
||
'expected_content': expected_count,
|
||
'actual_content': len(user_contents)
|
||
})
|
||
|
||
logger.info(f"已为 {len(users)} 个用户分配 {len(content_allocated)} 条内容")
|
||
unique_content_count = len(set([c['EntryID'] for c in content_allocated]))
|
||
logger.info(f"分配的唯一内容条数: {unique_content_count}")
|
||
|
||
if users_with_insufficient_content:
|
||
logger.warning(f"有 {len(users_with_insufficient_content)} 个用户获得的内容少于预期数量")
|
||
|
||
# 返回用户列表和内容不足的用户列表
|
||
return users, users_with_insufficient_content
|
||
except Exception as e:
|
||
logger.error(f"分配内容失败: {e}")
|
||
return [], []
|
||
|
||
def find_additional_images(file_path):
|
||
"""查找与文章相关的所有额外图片"""
|
||
if not file_path or pd.isna(file_path) or not os.path.exists(file_path):
|
||
return []
|
||
|
||
# 获取文章目录
|
||
article_dir = os.path.dirname(file_path)
|
||
|
||
# 查找可能的额外图片目录
|
||
additional_images_dir = os.path.join(article_dir, "additional_images")
|
||
if not os.path.exists(additional_images_dir):
|
||
# 尝试向上一级寻找
|
||
parent_dir = os.path.dirname(article_dir)
|
||
additional_images_dir = os.path.join(parent_dir, "additional_images")
|
||
if not os.path.exists(additional_images_dir):
|
||
return []
|
||
|
||
# 查找所有图片文件
|
||
image_patterns = ["*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp"]
|
||
image_files = []
|
||
|
||
for pattern in image_patterns:
|
||
image_files.extend(glob.glob(os.path.join(additional_images_dir, pattern)))
|
||
|
||
logger.info(f"找到 {len(image_files)} 张额外图片: {additional_images_dir}")
|
||
return image_files
|
||
|
||
def prepare_distribution_csv(users_with_content, output_dir, users_with_insufficient_content=None):
|
||
"""准备分发CSV文件"""
|
||
try:
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 准备CSV数据
|
||
rows = []
|
||
for user in users_with_content:
|
||
for content in user['contents']:
|
||
# 查找额外图片
|
||
output_txt_path = content['OutputTxtPath']
|
||
additional_images = find_additional_images(output_txt_path)
|
||
|
||
rows.append({
|
||
'user_id': user['id'],
|
||
'email': user['email'],
|
||
'username': user['username'],
|
||
'content_id': content['id'],
|
||
'entry_id': content['EntryID'],
|
||
'file_path': content['OutputTxtPath'],
|
||
'poster_path': content['PosterPath'],
|
||
'article_json_path': content['ArticleJsonPath'],
|
||
'additional_images': ';'.join(additional_images), # 保存为分号分隔的字符串
|
||
'product': content.get('Product', ''),
|
||
'object': content.get('Object', ''),
|
||
'date': content.get('Date', ''),
|
||
'logic': content.get('Logic', '')
|
||
})
|
||
|
||
# 创建DataFrame并保存
|
||
df = pd.DataFrame(rows)
|
||
distribution_csv = os.path.join(output_dir, f"distribution_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
|
||
df.to_csv(distribution_csv, index=False)
|
||
|
||
logger.info(f"分发CSV已保存到: {distribution_csv}")
|
||
|
||
# 如果有内容不足的用户,导出到CSV
|
||
if users_with_insufficient_content:
|
||
insufficient_df = pd.DataFrame(users_with_insufficient_content)
|
||
insufficient_csv = os.path.join(output_dir, f"insufficient_content_users_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
|
||
insufficient_df.to_csv(insufficient_csv, index=False)
|
||
logger.info(f"内容不足的用户列表已保存到: {insufficient_csv}")
|
||
|
||
return distribution_csv
|
||
except Exception as e:
|
||
logger.error(f"准备分发CSV失败: {e}")
|
||
return None
|
||
|
||
def create_zip_file(files, output_path):
|
||
"""将文件打包为ZIP"""
|
||
try:
|
||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||
for file_path in files:
|
||
if not file_path or pd.isna(file_path):
|
||
continue
|
||
|
||
if os.path.exists(file_path):
|
||
arcname = os.path.basename(file_path)
|
||
zipf.write(file_path, arcname=arcname)
|
||
else:
|
||
logger.warning(f"文件不存在,跳过: {file_path}")
|
||
|
||
logger.info(f"成功创建ZIP文件: {output_path}")
|
||
return output_path
|
||
except Exception as e:
|
||
logger.error(f"创建ZIP文件失败: {e}")
|
||
return None
|
||
|
||
def send_single_email(from_addr, password, to_addr, subject, content, attachments,
|
||
smtp_server, smtp_port, use_ssl=False):
|
||
"""发送单封邮件"""
|
||
try:
|
||
# 创建邮件对象
|
||
message = MIMEMultipart()
|
||
message['From'] = Header(from_addr)
|
||
message['To'] = Header(to_addr)
|
||
message['Subject'] = Header(subject)
|
||
|
||
# 添加正文
|
||
message.attach(MIMEText(content, 'plain', 'utf-8'))
|
||
|
||
# 添加附件
|
||
for attachment in attachments:
|
||
if not os.path.exists(attachment):
|
||
logger.warning(f"附件不存在,跳过: {attachment}")
|
||
continue
|
||
|
||
with open(attachment, 'rb') as f:
|
||
part = MIMEApplication(f.read())
|
||
part.add_header('Content-Disposition', 'attachment',
|
||
filename=Header(os.path.basename(attachment), 'utf-8').encode())
|
||
message.attach(part)
|
||
|
||
# 连接SMTP服务器并发送
|
||
if use_ssl:
|
||
server = smtplib.SMTP_SSL(smtp_server, smtp_port)
|
||
else:
|
||
server = smtplib.SMTP(smtp_server, smtp_port)
|
||
|
||
server.login(from_addr, password)
|
||
server.sendmail(from_addr, to_addr, message.as_string())
|
||
server.quit()
|
||
|
||
logger.info(f"成功发送邮件到: {to_addr}")
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"发送邮件到 {to_addr} 失败: {e}")
|
||
return False
|
||
|
||
def send_emails(distribution_csv, output_dir, email_from, email_password,
|
||
subject, smtp_server, smtp_port, use_ssl, test_mode, delay=2, zip_filename=None, test_email=None, force_attachments=None):
|
||
"""发送邮件"""
|
||
try:
|
||
# 确保输出目录存在
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 读取分发CSV
|
||
df = pd.read_csv(distribution_csv)
|
||
logger.info(f"从 {distribution_csv} 读取了 {len(df)} 条分发记录")
|
||
|
||
# 测试邮箱模式提示
|
||
if test_email:
|
||
logger.info(f"测试邮箱模式已启用,所有邮件将发送到: {test_email}")
|
||
|
||
# 处理强制性附件
|
||
forced_attachments = []
|
||
if force_attachments:
|
||
for attachment_path in force_attachments.split(','):
|
||
attachment_path = attachment_path.strip()
|
||
if os.path.exists(attachment_path):
|
||
if os.path.isdir(attachment_path):
|
||
# 如果是目录,添加目录下所有文件
|
||
for root, _, files in os.walk(attachment_path):
|
||
for file in files:
|
||
file_path = os.path.join(root, file)
|
||
forced_attachments.append(file_path)
|
||
logger.info(f"添加强制性附件目录文件: {file_path}")
|
||
else:
|
||
# 如果是文件,直接添加
|
||
forced_attachments.append(attachment_path)
|
||
logger.info(f"添加强制性附件: {attachment_path}")
|
||
else:
|
||
logger.warning(f"强制性附件不存在,跳过: {attachment_path}")
|
||
|
||
logger.info(f"共添加 {len(forced_attachments)} 个强制性附件")
|
||
|
||
# 按用户ID分组而不是按邮箱分组,确保即使邮箱相同也会发送多封邮件
|
||
user_groups = {}
|
||
for _, row in df.iterrows():
|
||
user_id = row['user_id']
|
||
if user_id not in user_groups:
|
||
user_groups[user_id] = []
|
||
user_groups[user_id].append(row)
|
||
|
||
logger.info(f"共有 {len(user_groups)} 个用户需要发送")
|
||
|
||
# 结果记录
|
||
results = {
|
||
"total": len(user_groups),
|
||
"success": 0,
|
||
"failed": 0,
|
||
"details": []
|
||
}
|
||
|
||
# 发送邮件
|
||
for user_id, rows in user_groups.items():
|
||
try:
|
||
# 获取用户邮箱
|
||
email = rows[0]['email']
|
||
logger.info(f"处理用户: {user_id}, 邮箱: {email}")
|
||
|
||
# 收集所有文件路径
|
||
files = []
|
||
article_contents = []
|
||
valid_article_count = 0
|
||
|
||
for row in rows:
|
||
# 收集文本文件并读取内容
|
||
file_path = row.get('file_path')
|
||
has_article_content = False
|
||
|
||
if file_path and not pd.isna(file_path) and os.path.exists(file_path):
|
||
files.append(file_path)
|
||
|
||
# 读取文章内容
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
article_content = f.read()
|
||
if article_content.strip(): # 确保内容不为空
|
||
article_contents.append(f"\n\n{article_content}\n\n{'='*50}\n\n")
|
||
has_article_content = True
|
||
valid_article_count += 1
|
||
except Exception as e:
|
||
logger.warning(f"读取文章内容失败: {file_path}, 错误: {e}")
|
||
|
||
# 收集海报图片
|
||
poster_path = row.get('poster_path')
|
||
if poster_path and not pd.isna(poster_path) and os.path.exists(poster_path):
|
||
files.append(poster_path)
|
||
|
||
# 收集所有额外图片
|
||
additional_images = row.get('additional_images', '')
|
||
if additional_images and not pd.isna(additional_images):
|
||
for img_path in additional_images.split(';'):
|
||
if img_path.strip() and os.path.exists(img_path):
|
||
files.append(img_path)
|
||
|
||
# 添加强制性附件
|
||
if forced_attachments:
|
||
files.extend(forced_attachments)
|
||
|
||
# 检查是否有有效的文件和文章内容
|
||
if not files or valid_article_count == 0:
|
||
logger.warning(f"用户 {user_id} (邮箱: {email}) 没有有效的文件或文章内容,尝试查找替代内容")
|
||
|
||
# 尝试查找替代内容
|
||
try:
|
||
# 连接数据库
|
||
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'
|
||
conn = sqlite3.connect(db_path)
|
||
conn.row_factory = sqlite3.Row
|
||
|
||
# 查询一篇审核通过但未分发的内容
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
SELECT * FROM contents
|
||
WHERE judge_status = 1 AND is_distributed = 0
|
||
LIMIT 1
|
||
""")
|
||
|
||
alt_content = cursor.fetchone()
|
||
|
||
if alt_content:
|
||
logger.info(f"找到替代内容: {alt_content['entry_id']}")
|
||
|
||
# 清空之前的内容
|
||
files = []
|
||
article_contents = []
|
||
|
||
# 添加替代内容
|
||
alt_file_path = alt_content['output_txt_path']
|
||
if alt_file_path and os.path.exists(alt_file_path):
|
||
files.append(alt_file_path)
|
||
# 读取文章内容
|
||
try:
|
||
with open(alt_file_path, 'r', encoding='utf-8') as f:
|
||
alt_article_content = f.read()
|
||
article_contents.append(f"\n\n{alt_article_content}\n\n{'='*50}\n\n")
|
||
valid_article_count = 1
|
||
except Exception as e:
|
||
logger.warning(f"读取替代内容失败: {alt_file_path}, 错误: {e}")
|
||
|
||
# 添加海报和额外图片
|
||
alt_poster_path = alt_content['poster_path']
|
||
if alt_poster_path and os.path.exists(alt_poster_path):
|
||
files.append(alt_poster_path)
|
||
|
||
# 寻找额外图片
|
||
alt_additional_images = find_additional_images(alt_file_path)
|
||
for img_path in alt_additional_images:
|
||
if os.path.exists(img_path):
|
||
files.append(img_path)
|
||
else:
|
||
logger.warning("未找到替代内容")
|
||
|
||
conn.close()
|
||
|
||
except Exception as e:
|
||
logger.error(f"查找替代内容时出错: {e}")
|
||
|
||
# 再次检查是否有有效的文件和内容
|
||
if not files or valid_article_count == 0:
|
||
logger.warning(f"用户 {user_id} (邮箱: {email}) 没有有效的附件文件或文章内容,跳过")
|
||
results["details"].append({
|
||
"user_id": user_id,
|
||
"email": email,
|
||
"status": "skipped",
|
||
"reason": "没有有效的附件文件或文章内容",
|
||
"files": []
|
||
})
|
||
results["failed"] += 1
|
||
continue
|
||
|
||
# 构建邮件内容
|
||
files_count = len(files)
|
||
entries_count = valid_article_count
|
||
|
||
# 基本邮件内容
|
||
base_email_content = f"""
|
||
您好!请查收今日带货笔记(文案+配图),内容在邮件正文和附件中。具体挂载商品等操作流程请查看对应达人微信群内信息。
|
||
|
||
优化建议:
|
||
①所有提供的材料仅为参考,支持自行修改(您可以参考我们整理给您的<小红书爆款标题公式文件>针对标题进行修改仿写)
|
||
②标题优化和格式的规整更有利于带来小眼睛
|
||
③本测试提供的商品均为近期高佣金筛选后的优质商单,质量保证
|
||
|
||
===== 文章内容 =====
|
||
|
||
"""
|
||
|
||
# 添加文章内容
|
||
full_email_content = base_email_content + '\n'.join(article_contents)
|
||
|
||
# 发送邮件
|
||
if test_mode:
|
||
logger.info(f"测试模式: 模拟发送邮件到用户 {user_id} (邮箱: {email})")
|
||
results["details"].append({
|
||
"user_id": user_id,
|
||
"email": email,
|
||
"status": "success (test mode)",
|
||
"files": files
|
||
})
|
||
results["success"] += 1
|
||
else:
|
||
# 如果启用了测试邮箱,则覆盖实际接收者邮箱
|
||
target_email = test_email if test_email else email
|
||
|
||
if test_email:
|
||
logger.info(f"测试邮箱模式: 邮件将发送到 {test_email} 而不是 {email}")
|
||
|
||
success = send_single_email(
|
||
email_from,
|
||
email_password,
|
||
target_email,
|
||
subject,
|
||
full_email_content,
|
||
files, # 直接传递所有文件作为附件
|
||
smtp_server,
|
||
smtp_port,
|
||
use_ssl
|
||
)
|
||
|
||
if success:
|
||
results["success"] += 1
|
||
results["details"].append({
|
||
"user_id": user_id,
|
||
"email": email,
|
||
"status": "success",
|
||
"files": files
|
||
})
|
||
else:
|
||
results["failed"] += 1
|
||
results["details"].append({
|
||
"user_id": user_id,
|
||
"email": email,
|
||
"status": "failed",
|
||
"reason": "SMTP发送失败",
|
||
"files": files
|
||
})
|
||
|
||
# 添加延迟,避免发送过快
|
||
if not test_mode and delay > 0:
|
||
logger.info(f"等待 {delay} 秒后继续发送下一封邮件...")
|
||
time.sleep(delay)
|
||
|
||
except Exception as e:
|
||
logger.error(f"处理用户 {user_id} (邮箱: {email}) 时出错: {e}")
|
||
results["failed"] += 1
|
||
results["details"].append({
|
||
"user_id": user_id,
|
||
"email": email,
|
||
"status": "failed",
|
||
"reason": str(e),
|
||
"files": []
|
||
})
|
||
|
||
# 打印发送结果摘要
|
||
logger.info("\n===== 邮件发送结果摘要 =====")
|
||
logger.info(f"总计: {results['total']} 个用户")
|
||
logger.info(f"成功: {results['success']} 个用户")
|
||
logger.info(f"失败: {results['failed']} 个用户")
|
||
|
||
# 将结果写入CSV文件
|
||
try:
|
||
result_df = pd.DataFrame(results["details"])
|
||
result_csv = os.path.join(output_dir,
|
||
f"email_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
|
||
result_df.to_csv(result_csv, index=False)
|
||
logger.info(f"发送结果已保存到: {result_csv}")
|
||
return result_csv
|
||
except Exception as e:
|
||
logger.error(f"保存结果到CSV失败: {e}")
|
||
return None
|
||
|
||
except Exception as e:
|
||
logger.error(f"发送邮件过程中发生错误: {e}")
|
||
return None
|
||
|
||
def record_distribution_to_database(conn, distribution_csv, send_result_csv):
|
||
"""记录分发结果到数据库"""
|
||
try:
|
||
# 读取分发CSV
|
||
dist_df = pd.read_csv(distribution_csv)
|
||
|
||
# 读取发送结果CSV
|
||
if send_result_csv and os.path.exists(send_result_csv):
|
||
result_df = pd.read_csv(send_result_csv)
|
||
# 添加发送状态
|
||
email_status = {}
|
||
for _, row in result_df.iterrows():
|
||
email_status[row['email']] = row['status']
|
||
|
||
dist_df['send_status'] = dist_df['email'].map(email_status)
|
||
else:
|
||
dist_df['send_status'] = 'unknown'
|
||
|
||
# 记录到distributions表
|
||
cursor = conn.cursor()
|
||
batch_id = f"batch_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||
|
||
# 添加记录
|
||
insertion_count = 0
|
||
for _, row in dist_df.iterrows():
|
||
try:
|
||
user_id = row['user_id']
|
||
content_id = row['content_id']
|
||
|
||
# 检查是否成功发送
|
||
success = 'success' in str(row.get('send_status', '')).lower()
|
||
|
||
# 插入分发记录
|
||
cursor.execute("""
|
||
INSERT INTO distributions
|
||
(content_id, user_id, distribution_date, send_status, batch_id)
|
||
VALUES (?, ?, ?, ?, ?)
|
||
""", (
|
||
content_id,
|
||
user_id,
|
||
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||
row.get('send_status', 'unknown'),
|
||
batch_id
|
||
))
|
||
|
||
insertion_count += 1
|
||
|
||
# 如果成功发送,更新内容分发状态
|
||
if success:
|
||
cursor.execute("""
|
||
UPDATE contents
|
||
SET is_distributed = 1
|
||
WHERE id = ?
|
||
""", (content_id,))
|
||
|
||
except Exception as e:
|
||
logger.warning(f"记录分发结果失败: {row.get('entry_id')} -> {row.get('email')}, 错误: {e}")
|
||
|
||
conn.commit()
|
||
logger.info(f"已记录 {insertion_count} 条分发记录到数据库,批次ID: {batch_id}")
|
||
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"记录分发结果到数据库失败: {e}")
|
||
conn.rollback()
|
||
return False
|
||
|
||
def generate_distribution_report(conn, distribution_csv, send_result_csv, output_dir, insufficient_content_users=None):
|
||
"""生成分发报告"""
|
||
try:
|
||
# 读取分发CSV
|
||
dist_df = pd.read_csv(distribution_csv)
|
||
|
||
# 读取发送结果CSV
|
||
if send_result_csv and os.path.exists(send_result_csv):
|
||
result_df = pd.read_csv(send_result_csv)
|
||
# 添加发送状态
|
||
email_status = {}
|
||
for _, row in result_df.iterrows():
|
||
email_status[row['email']] = row['status']
|
||
|
||
dist_df['send_status'] = dist_df['email'].map(email_status)
|
||
else:
|
||
dist_df['send_status'] = 'unknown'
|
||
|
||
# 统计文章分发情况
|
||
article_stats = {}
|
||
for _, row in dist_df.iterrows():
|
||
entry_id = row['entry_id']
|
||
if entry_id not in article_stats:
|
||
article_stats[entry_id] = {
|
||
'content_id': row['content_id'],
|
||
'total_assigned': 0,
|
||
'sent_success': 0,
|
||
'sent_failed': 0,
|
||
'sent_unknown': 0,
|
||
'product': row.get('product', ''),
|
||
'object': row.get('object', ''),
|
||
'date': row.get('date', ''),
|
||
'logic': row.get('logic', '')
|
||
}
|
||
|
||
article_stats[entry_id]['total_assigned'] += 1
|
||
|
||
if 'success' in str(row['send_status']).lower():
|
||
article_stats[entry_id]['sent_success'] += 1
|
||
elif row['send_status'] in ['failed', 'skipped']:
|
||
article_stats[entry_id]['sent_failed'] += 1
|
||
else:
|
||
article_stats[entry_id]['sent_unknown'] += 1
|
||
|
||
# 创建简洁的分发摘要报告
|
||
summary_data = []
|
||
cursor = conn.cursor()
|
||
|
||
for entry_id, stats in article_stats.items():
|
||
# 从数据库获取内容详情
|
||
cursor.execute("SELECT * FROM contents WHERE id = ?", (stats['content_id'],))
|
||
content = cursor.fetchone()
|
||
|
||
if content:
|
||
summary_data.append({
|
||
'entry_id': entry_id,
|
||
'content_id': stats['content_id'],
|
||
'product': stats['product'] or content['product'],
|
||
'object': stats['object'] or content['object'],
|
||
'date': stats['date'] or content['date'],
|
||
'logic': stats['logic'] or content['logic'],
|
||
'judge_status': content['judge_status'],
|
||
'is_distributed': content['is_distributed'],
|
||
'assigned': stats['total_assigned'],
|
||
'success': stats['sent_success'],
|
||
'failed': stats['sent_failed'],
|
||
'unknown': stats['sent_unknown']
|
||
})
|
||
else:
|
||
summary_data.append({
|
||
'entry_id': entry_id,
|
||
'content_id': stats['content_id'],
|
||
'product': stats['product'],
|
||
'object': stats['object'],
|
||
'date': stats['date'],
|
||
'logic': stats['logic'],
|
||
'assigned': stats['total_assigned'],
|
||
'success': stats['sent_success'],
|
||
'failed': stats['sent_failed'],
|
||
'unknown': stats['sent_unknown']
|
||
})
|
||
|
||
if summary_data:
|
||
summary_df = pd.DataFrame(summary_data)
|
||
summary_csv = os.path.join(output_dir, f"distribution_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
|
||
summary_df.to_csv(summary_csv, index=False)
|
||
logger.info(f"分发摘要报告已保存到: {summary_csv}")
|
||
|
||
# 保存文章统计
|
||
stats_json = os.path.join(output_dir, f"article_stats_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
|
||
with open(stats_json, 'w') as f:
|
||
json.dump(article_stats, f, indent=2)
|
||
|
||
# 如果有内容不足的用户,重新导出一份详细报告
|
||
if insufficient_content_users:
|
||
detailed_insufficient_users = []
|
||
|
||
# 获取收件用户的实际发送情况
|
||
user_send_stats = {}
|
||
for _, row in dist_df.iterrows():
|
||
user_id = row['user_id']
|
||
if user_id not in user_send_stats:
|
||
user_send_stats[user_id] = {
|
||
'email': row['email'],
|
||
'username': row['username'],
|
||
'sent_count': 0,
|
||
'success_count': 0
|
||
}
|
||
|
||
user_send_stats[user_id]['sent_count'] += 1
|
||
if 'success' in str(row.get('send_status', '')).lower():
|
||
user_send_stats[user_id]['success_count'] += 1
|
||
|
||
# 合并内容不足用户的实际发送情况
|
||
for user in insufficient_content_users:
|
||
user_id = user['user_id']
|
||
send_info = user_send_stats.get(user_id, {'sent_count': 0, 'success_count': 0})
|
||
|
||
detailed_insufficient_users.append({
|
||
'user_id': user['user_id'],
|
||
'email': user['email'],
|
||
'username': user['username'],
|
||
'expected_content': user['expected_content'],
|
||
'allocated_content': user['actual_content'],
|
||
'sent_count': send_info['sent_count'],
|
||
'success_count': send_info['success_count']
|
||
})
|
||
|
||
# 导出详细的内容不足用户报告
|
||
if detailed_insufficient_users:
|
||
detailed_df = pd.DataFrame(detailed_insufficient_users)
|
||
detailed_csv = os.path.join(output_dir, f"insufficient_content_users_detailed_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
|
||
detailed_df.to_csv(detailed_csv, index=False)
|
||
logger.info(f"内容不足用户的详细报告已保存到: {detailed_csv}")
|
||
|
||
# 统计摘要
|
||
cursor.execute("SELECT COUNT(*) FROM contents")
|
||
total_articles = cursor.fetchone()[0]
|
||
|
||
cursor.execute("SELECT COUNT(*) FROM contents WHERE is_distributed = 1")
|
||
distributed_articles = cursor.fetchone()[0]
|
||
|
||
success_sent = sum(1 for _, v in article_stats.items() if v['sent_success'] > 0)
|
||
|
||
logger.info(f"文章统计已保存到: {stats_json}")
|
||
logger.info("\n===== 分发统计 =====")
|
||
logger.info(f"总文章数: {total_articles}")
|
||
logger.info(f"已分配文章数: {distributed_articles} ({distributed_articles/total_articles*100:.2f}%)")
|
||
logger.info(f"本次成功发送文章数: {success_sent}")
|
||
|
||
# 内容不足的用户统计
|
||
if insufficient_content_users:
|
||
logger.info(f"内容不足的用户数: {len(insufficient_content_users)}")
|
||
|
||
# 按产品统计
|
||
cursor.execute("""
|
||
SELECT product,
|
||
COUNT(*) as total,
|
||
SUM(CASE WHEN is_distributed = 1 THEN 1 ELSE 0 END) as distributed
|
||
FROM contents
|
||
GROUP BY product
|
||
""")
|
||
product_stats = cursor.fetchall()
|
||
|
||
if product_stats:
|
||
logger.info("\n===== 按产品分发统计 =====")
|
||
for row in product_stats:
|
||
product = row['product']
|
||
total = row['total']
|
||
distributed = row['distributed']
|
||
if total > 0:
|
||
logger.info(f"产品 '{product}': {distributed}/{total} ({distributed/total*100:.2f}%)")
|
||
|
||
return stats_json
|
||
except Exception as e:
|
||
logger.error(f"生成分发报告失败: {e}")
|
||
return None
|
||
|
||
def main():
|
||
args = parse_arguments()
|
||
|
||
# 创建输出目录
|
||
os.makedirs(args.output_dir, exist_ok=True)
|
||
|
||
# 连接数据库
|
||
conn = create_database_connection(args.db_path)
|
||
if not conn:
|
||
logger.error("无法连接到数据库,程序退出")
|
||
return
|
||
|
||
try:
|
||
# 从数据库查询内容
|
||
content_df = query_contents_from_database(
|
||
conn,
|
||
product=args.product,
|
||
object=args.object,
|
||
judge_only_success=args.judge_only_success,
|
||
undistributed_only=args.undistributed_only
|
||
)
|
||
|
||
if content_df is None or len(content_df) == 0:
|
||
logger.error("没有找到符合条件的内容,程序退出")
|
||
return
|
||
|
||
# 从数据库查询用户
|
||
users_df = query_users_from_database(
|
||
conn,
|
||
user_id=args.user_id,
|
||
user_email=args.user_email,
|
||
max_users=args.max_users
|
||
)
|
||
|
||
if users_df is None or len(users_df) == 0:
|
||
logger.error("没有找到符合条件的用户,程序退出")
|
||
return
|
||
|
||
# 为用户分配内容
|
||
users_with_content, insufficient_content_users = allocate_content_to_users(
|
||
users_df, content_df, args.article_per_user
|
||
)
|
||
|
||
if not users_with_content:
|
||
logger.error("内容分配失败,程序退出")
|
||
return
|
||
|
||
# 准备分发CSV
|
||
distribution_csv = prepare_distribution_csv(users_with_content, args.output_dir, insufficient_content_users)
|
||
if not distribution_csv:
|
||
logger.error("准备分发CSV失败,程序退出")
|
||
return
|
||
|
||
# 发送邮件
|
||
send_result_csv = send_emails(
|
||
distribution_csv, args.output_dir, args.email_from, args.email_password,
|
||
args.subject, args.smtp_server, args.smtp_port, args.use_ssl, args.test_mode,
|
||
args.delay, args.zip_filename, args.test_email, args.force_attachments
|
||
)
|
||
|
||
# 记录分发结果到数据库
|
||
if not args.test_mode and not args.test_email:
|
||
record_distribution_to_database(conn, distribution_csv, send_result_csv)
|
||
else:
|
||
if args.test_mode:
|
||
logger.info("测试模式,不记录分发结果到数据库")
|
||
if args.test_email:
|
||
logger.info("测试邮箱模式,不记录分发结果到数据库")
|
||
|
||
# 生成分发报告
|
||
generate_distribution_report(conn, distribution_csv, send_result_csv, args.output_dir, insufficient_content_users)
|
||
|
||
logger.info("内容分发流程完成")
|
||
|
||
finally:
|
||
conn.close()
|
||
|
||
if __name__ == "__main__":
|
||
main() |