From 30e8f402d9f212d0712e3c0d739adc82824db9b6 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Mon, 12 May 2025 14:00:42 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BA=86=E4=B8=80=E4=B8=8B?= =?UTF-8?q?=E6=B8=B2=E6=9F=93=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/extract_and_render.py | 111 ++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 12 deletions(-) diff --git a/scripts/extract_and_render.py b/scripts/extract_and_render.py index 7366dae..9f34134 100644 --- a/scripts/extract_and_render.py +++ b/scripts/extract_and_render.py @@ -41,6 +41,39 @@ def convert_json_to_txt_content(json_path): print(f" - 错误: 处理 JSON 时出错: {e}") return None, f"处理 JSON 时出错: {e}" +def load_topic_data(source_dir, run_id): + """ + 加载选题数据 + + Args: + source_dir: 源目录路径 + run_id: 运行ID + + Returns: + dict: 以topic_index为键的选题数据字典 + """ + topic_file_path = os.path.join(source_dir, f"tweet_topic_{run_id}.json") + topic_data = {} + + if os.path.exists(topic_file_path): + try: + with open(topic_file_path, 'r', encoding='utf-8') as f: + topics = json.load(f) + + # 将选题数据转换为以index为键的字典 + for topic in topics: + index = topic.get("index") + if index: + topic_data[index] = topic + + print(f"成功加载选题数据,共{len(topic_data)}条") + except Exception as e: + print(f"加载选题数据时出错: {e}") + else: + print(f"警告: 未找到选题文件: {topic_file_path}") + + return topic_data + def process_result_directory(source_dir, output_dir, run_id=None): """ 处理指定的结果目录,提取内容并渲染到输出目录。 @@ -62,28 +95,44 @@ def process_result_directory(source_dir, output_dir, run_id=None): if not run_id: run_id = os.path.basename(source_dir) - # 创建CSV清单 + # 加载选题数据 + topic_data = load_topic_data(source_dir, run_id) + + # 创建CSV清单,添加选题相关字段 csv_path = os.path.join(output_dir, f"manifest_{run_id}.csv") csv_data = [ [ "EntryID", + "TopicIndex", + "VariantIndex", + "Date", + "Logic", + "Object", + "Product", + "ProductLogic", + "Style", + "StyleLogic", + "TargetAudience", + "TargetAudienceLogic", "SourcePath", "ArticleJsonPath", "OutputTxtPath", "PosterPath", "AdditionalImagesCount", "Status", - "Details" + "Details", + "JudgeStatus" ] ] # 查找所有i_j目录 - entry_pattern = re.compile(r"^\d+_\d+$") + entry_pattern = re.compile(r"^(\d+)_(\d+)$") entries = [] for item in os.listdir(source_dir): item_path = os.path.join(source_dir, item) - if os.path.isdir(item_path) and entry_pattern.match(item): + match = entry_pattern.match(item) + if os.path.isdir(item_path) and match: entries.append(item) if not entries: @@ -99,16 +148,36 @@ def process_result_directory(source_dir, output_dir, run_id=None): print(f"\n处理条目: {entry}") + # 解析topic_index和variant_index + match = entry_pattern.match(entry) + topic_index = match.group(1) + variant_index = match.group(2) + + # 获取该话题的选题信息 + topic_info = topic_data.get(topic_index, {}) + # 创建记录 record = { "EntryID": entry, + "TopicIndex": topic_index, + "VariantIndex": variant_index, + "Date": topic_info.get("date", ""), + "Logic": topic_info.get("logic", ""), + "Object": topic_info.get("object", ""), + "Product": topic_info.get("product", ""), + "ProductLogic": topic_info.get("product_logic", ""), + "Style": topic_info.get("style", ""), + "StyleLogic": topic_info.get("style_logic", ""), + "TargetAudience": topic_info.get("target_audience", ""), + "TargetAudienceLogic": topic_info.get("target_audience_logic", ""), "SourcePath": entry_path, "ArticleJsonPath": "", "OutputTxtPath": "", "PosterPath": "", "AdditionalImagesCount": 0, "Status": "Processing", - "Details": "" + "Details": "", + "JudgeStatus": "" } # 创建输出条目目录 @@ -128,6 +197,18 @@ def process_result_directory(source_dir, output_dir, run_id=None): record["OutputTxtPath"] = txt_path if os.path.exists(json_path): + # 读取article.json + try: + with open(json_path, 'r', encoding='utf-8') as f_json: + article_data = json.load(f_json) + # 提取judge_success状态 + if "judge_success" in article_data: + record["JudgeStatus"] = str(article_data["judge_success"]) + elif "judged" in article_data: + record["JudgeStatus"] = "已审核" if article_data["judged"] else "未审核" + except Exception as e: + print(f" - 错误: 读取article.json失败: {e}") + txt_content, error = convert_json_to_txt_content(json_path) if error: record["Status"] = "Partial" @@ -218,20 +299,26 @@ def process_result_directory(source_dir, output_dir, run_id=None): def main(): # parser = argparse.ArgumentParser(description="从TravelContentCreator结果目录提取内容并渲染到指定目录") - # parser.add_argument("--source", type=str, required=True, help="源目录路径") - # parser.add_argument("--output", type=str, required=True, help="输出目录路径") + # parser.add_argument("--source", type=str, help="源目录路径") + # parser.add_argument("--output", type=str, help="输出目录路径") # parser.add_argument("--run-id", type=str, help="自定义运行ID") # args = parser.parse_args() - source = "" - output = "" - run_id = datetime.now().strftime("%Y%m%d_%H%M%S") + + # # 默认值设置 + # source = args.source if args.source else "/root/autodl-tmp/TravelContentCreator/result/2025-05-11_00-26-30" + # output = args.output if args.output else "/root/autodl-tmp/TravelContentCreator/output/2025-05-11_00-26-30" + # run_id = args.run_id if args.run_id else os.path.basename(source) + + source = "/root/autodl-tmp/TravelContentCreator/result/2025-05-12_09-33-12" + output = "/root/autodl-tmp/TravelContentCreator/output/2025-05-12_09-33-12" + run_id = os.path.basename(source) + print("-" * 60) print(f"开始提取和渲染流程") print(f"源目录: {source}") print(f"输出目录: {output}") - if run_id: - print(f"运行ID: {run_id}") + print(f"运行ID: {run_id}") print("-" * 60) process_result_directory(source, output, run_id)