180 lines
5.9 KiB
Python
180 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
视频片段整合脚本
|
|
用于整合多个视频分析片段文件到一个完整的分析结果
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import glob
|
|
from datetime import datetime
|
|
|
|
def extract_json_from_file(file_path):
|
|
"""从文件中提取JSON内容"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# 查找JSON内容的开始和结束位置
|
|
start_marker = "```json"
|
|
end_marker = "```"
|
|
|
|
start_pos = content.find(start_marker)
|
|
if start_pos == -1:
|
|
print(f"警告: 在文件 {file_path} 中未找到JSON标记")
|
|
return None
|
|
|
|
# 找到JSON内容的开始位置
|
|
json_start = content.find('{', start_pos)
|
|
if json_start == -1:
|
|
print(f"警告: 在文件 {file_path} 中未找到JSON对象")
|
|
return None
|
|
|
|
# 找到JSON内容的结束位置
|
|
json_end = content.rfind(end_marker)
|
|
if json_end == 0:
|
|
print(f"警告: 在文件 {file_path} 中未找到JSON对象结束")
|
|
return None
|
|
|
|
# 提取JSON字符串
|
|
json_str = content[json_start:json_end]
|
|
print(f"json_str:{json_str}")
|
|
|
|
# 解析JSON
|
|
try:
|
|
json_data = json.loads(json_str)
|
|
return json_data
|
|
except json.JSONDecodeError as e:
|
|
print(f"错误: 解析文件 {file_path} 中的JSON失败: {e}")
|
|
print(f"尝试手动修复JSON...")
|
|
|
|
except Exception as e:
|
|
print(f"错误: 读取文件 {file_path} 失败: {e}")
|
|
return None
|
|
|
|
|
|
def merge_segments(segment_files):
|
|
"""合并多个片段文件"""
|
|
merged_data = {
|
|
"total_Oral broadcasting": "",
|
|
"summary": "",
|
|
"content": []
|
|
}
|
|
|
|
# 按文件名排序,确保按正确顺序合并
|
|
segment_files.sort()
|
|
|
|
for i, file_path in enumerate(segment_files):
|
|
print(f"正在处理片段 {i+1}: {os.path.basename(file_path)}")
|
|
|
|
json_data = extract_json_from_file(file_path)
|
|
if json_data is None:
|
|
continue
|
|
|
|
# 合并口播内容
|
|
if "total_Oral broadcasting" in json_data:
|
|
if merged_data["total_Oral broadcasting"]:
|
|
merged_data["total_Oral broadcasting"] += " " + json_data["total_Oral broadcasting"]
|
|
else:
|
|
merged_data["total_Oral broadcasting"] = json_data["total_Oral broadcasting"]
|
|
|
|
# 合并摘要
|
|
if "summary" in json_data:
|
|
if merged_data["summary"]:
|
|
merged_data["summary"] += " " + json_data["summary"]
|
|
else:
|
|
merged_data["summary"] = json_data["summary"]
|
|
|
|
# 合并内容
|
|
if "content" in json_data:
|
|
if i >= 1:
|
|
for item in json_data["content"]:
|
|
item["start"] = item["start"] + i*30
|
|
item["end"] = item["end"] + i*30
|
|
# 为每个片段的content重新编号
|
|
for item in json_data["content"]:
|
|
item["id"] = len(merged_data["content"]) + 1
|
|
merged_data["content"].append(item)
|
|
|
|
return merged_data
|
|
|
|
def save_merged_result(merged_data, output_file):
|
|
"""保存合并结果到JSON文件"""
|
|
try:
|
|
# # 添加元数据到JSON中
|
|
# json_data = {
|
|
# "metadata": {
|
|
# "video_file": "/root/autodl-tmp/video_processed/广州广之旅国际旅行社股份有限公司",
|
|
# "analysis_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
# "merge_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
# },
|
|
# "data": merged_data
|
|
# }
|
|
|
|
# 保存为纯JSON文件
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(merged_data, f, ensure_ascii=False, indent=4)
|
|
|
|
print(f"合并结果已保存到: {output_file}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"错误: 保存合并结果失败: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""主函数"""
|
|
# 获取当前目录
|
|
current_dir = "/root/autodl-tmp/video_llm/Template/牛管家"
|
|
print(current_dir)
|
|
# 查找所有片段文件
|
|
pattern = os.path.join(current_dir, "*.txt")
|
|
segment_files = glob.glob(pattern)
|
|
|
|
# if not segment_files:
|
|
# print("错误: 未找到任何片段文件")
|
|
# return
|
|
|
|
# print(f"找到 {len(segment_files)} 个片段文件:")
|
|
# #frame = extract_json_from_file(segment_files[1])
|
|
# #print(frame["content"][0]["id"])
|
|
# # print(frame)
|
|
# # for file_path in segment_files:
|
|
# # print(f" - {os.path.basename(file_path)}")
|
|
# for file_path in segment_files:
|
|
# frame = extract_json_from_file(file_path)
|
|
# print(frame)
|
|
|
|
# if __name__ == "__main__":
|
|
# main()
|
|
|
|
|
|
# 合并片段
|
|
print("\n开始合并片段...")
|
|
merged_data = merge_segments(segment_files)
|
|
|
|
if not merged_data["content"]:
|
|
print("错误: 合并失败,没有有效内容")
|
|
return
|
|
|
|
# 生成输出文件名
|
|
base_name = "牛管家"
|
|
output_file = os.path.join(current_dir, f"{base_name}_merged_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
|
|
|
|
# 保存合并结果
|
|
if save_merged_result(merged_data, output_file):
|
|
print(f"\n合并完成!")
|
|
print(f"总片段数: {len(segment_files)}")
|
|
print(f"总内容条目: {len(merged_data['content'])}")
|
|
print(f"输出文件: {os.path.basename(output_file)}")
|
|
|
|
# 显示统计信息
|
|
print(f"\n统计信息:")
|
|
print(f"- 口播内容长度: {len(merged_data['total_Oral broadcasting'])} 字符")
|
|
print(f"- 摘要长度: {len(merged_data['summary'])} 字符")
|
|
print(f"- 内容条目数: {len(merged_data['content'])}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|