106 lines
3.8 KiB
Python
106 lines
3.8 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
import os
|
||
import sqlite3
|
||
import csv
|
||
import sys
|
||
|
||
# CSV文件路径
|
||
csv_file = '/root/autodl-tmp/TravelContentCreator/output/5.13.csv'
|
||
|
||
# 数据库路径
|
||
db_path = '/root/autodl-tmp/TravelContentCreator/distribution.db'
|
||
|
||
def main():
|
||
# 检查CSV文件是否存在
|
||
if not os.path.exists(csv_file):
|
||
print(f"错误: CSV文件不存在: {csv_file}")
|
||
return False
|
||
|
||
# 连接数据库
|
||
try:
|
||
conn = sqlite3.connect(db_path)
|
||
cursor = conn.cursor()
|
||
print(f"已连接到数据库: {db_path}")
|
||
except sqlite3.Error as e:
|
||
print(f"数据库连接错误: {e}")
|
||
return False
|
||
|
||
# 计数器
|
||
success_count = 0
|
||
error_count = 0
|
||
# 记录失败的条目
|
||
failed_entries = []
|
||
|
||
# 读取CSV文件并导入数据
|
||
try:
|
||
with open(csv_file, 'r', encoding='utf-8') as f:
|
||
reader = csv.reader(f)
|
||
for row_num, row in enumerate(reader, 1):
|
||
if len(row) >= 4: # 确保行至少有4列
|
||
try:
|
||
# 从CSV提取数据 (小红书ID, 用户名, 粉丝数, 邮箱)
|
||
xhs_id, username, fans, email = row
|
||
|
||
# 确保邮箱格式正确
|
||
if '@' not in email:
|
||
error_info = f"无效邮箱格式"
|
||
failed_entries.append((row_num, row, error_info))
|
||
print(f"警告: 跳过无效邮箱: {email} (行 {row_num})")
|
||
error_count += 1
|
||
continue
|
||
|
||
# 插入或更新用户数据
|
||
cursor.execute("""
|
||
INSERT OR REPLACE INTO users (email, username)
|
||
VALUES (?, ?)
|
||
""", (email, username))
|
||
|
||
success_count += 1
|
||
except Exception as e:
|
||
error_info = f"数据库错误: {e}"
|
||
failed_entries.append((row_num, row, error_info))
|
||
print(f"插入行时出错 (行 {row_num}): {row}, 错误: {e}")
|
||
error_count += 1
|
||
else:
|
||
error_info = f"列数不足,需要至少4列"
|
||
failed_entries.append((row_num, row, error_info))
|
||
print(f"警告: 跳过格式不正确的行 {row_num}: {row}")
|
||
error_count += 1
|
||
|
||
# 提交事务
|
||
conn.commit()
|
||
print(f"\n导入完成! 成功: {success_count}, 失败: {error_count}")
|
||
|
||
# 如果有失败的条目,输出详细信息
|
||
if failed_entries:
|
||
print("\n===== 导入失败的记录 =====")
|
||
for row_num, row, error in failed_entries:
|
||
print(f"行 {row_num}: {row} - 失败原因: {error}")
|
||
|
||
# 将失败记录保存到文件
|
||
failure_file = os.path.join(os.path.dirname(csv_file), "import_failures.csv")
|
||
with open(failure_file, 'w', newline='', encoding='utf-8') as f:
|
||
writer = csv.writer(f)
|
||
writer.writerow(['行号', '原始数据', '失败原因'])
|
||
for row_num, row, error in failed_entries:
|
||
writer.writerow([row_num, ','.join(row), error])
|
||
|
||
print(f"\n失败记录已保存到: {failure_file}")
|
||
|
||
except Exception as e:
|
||
print(f"读取CSV文件时出错: {e}")
|
||
conn.rollback()
|
||
return False
|
||
finally:
|
||
conn.close()
|
||
|
||
return True
|
||
|
||
if __name__ == "__main__":
|
||
if main():
|
||
print("用户数据导入成功!")
|
||
else:
|
||
print("用户数据导入失败!")
|
||
sys.exit(1) |