TravelContentCreator/scripts/import_users.py

106 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sqlite3
import csv
import sys
# CSV文件路径
csv_file = '/root/autodl-tmp/TravelContentCreator/output/5.13.csv'
# 数据库路径
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'
def main():
# 检查CSV文件是否存在
if not os.path.exists(csv_file):
print(f"错误: CSV文件不存在: {csv_file}")
return False
# 连接数据库
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print(f"已连接到数据库: {db_path}")
except sqlite3.Error as e:
print(f"数据库连接错误: {e}")
return False
# 计数器
success_count = 0
error_count = 0
# 记录失败的条目
failed_entries = []
# 读取CSV文件并导入数据
try:
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
for row_num, row in enumerate(reader, 1):
if len(row) >= 4: # 确保行至少有4列
try:
# 从CSV提取数据 (小红书ID, 用户名, 粉丝数, 邮箱)
xhs_id, username, fans, email = row
# 确保邮箱格式正确
if '@' not in email:
error_info = f"无效邮箱格式"
failed_entries.append((row_num, row, error_info))
print(f"警告: 跳过无效邮箱: {email} (行 {row_num})")
error_count += 1
continue
# 插入或更新用户数据
cursor.execute("""
INSERT OR REPLACE INTO users (email, username)
VALUES (?, ?)
""", (email, username))
success_count += 1
except Exception as e:
error_info = f"数据库错误: {e}"
failed_entries.append((row_num, row, error_info))
print(f"插入行时出错 (行 {row_num}): {row}, 错误: {e}")
error_count += 1
else:
error_info = f"列数不足需要至少4列"
failed_entries.append((row_num, row, error_info))
print(f"警告: 跳过格式不正确的行 {row_num}: {row}")
error_count += 1
# 提交事务
conn.commit()
print(f"\n导入完成! 成功: {success_count}, 失败: {error_count}")
# 如果有失败的条目,输出详细信息
if failed_entries:
print("\n===== 导入失败的记录 =====")
for row_num, row, error in failed_entries:
print(f"{row_num}: {row} - 失败原因: {error}")
# 将失败记录保存到文件
failure_file = os.path.join(os.path.dirname(csv_file), "import_failures.csv")
with open(failure_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['行号', '原始数据', '失败原因'])
for row_num, row, error in failed_entries:
writer.writerow([row_num, ','.join(row), error])
print(f"\n失败记录已保存到: {failure_file}")
except Exception as e:
print(f"读取CSV文件时出错: {e}")
conn.rollback()
return False
finally:
conn.close()
return True
if __name__ == "__main__":
if main():
print("用户数据导入成功!")
else:
print("用户数据导入失败!")
sys.exit(1)