From 98d7e5531973693de7e69ee3b32aa06eb42ce58c Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Sun, 27 Apr 2025 19:05:56 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=B8=A4=E4=B8=AA?= =?UTF-8?q?=E8=84=9A=E6=9C=AC,=20=E7=94=A8=E6=9D=A5=E9=87=8D=E6=96=B0?= =?UTF-8?q?=E7=94=9F=E6=88=90=E5=9B=BE=E5=83=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/__pycache__/__init__.cpython-312.pyc | Bin 396 -> 396 bytes core/__pycache__/ai_agent.cpython-312.pyc | Bin 37504 -> 37504 bytes core/__pycache__/poster_gen.cpython-312.pyc | Bin 37588 -> 37588 bytes .../simple_collage.cpython-312.pyc | Bin 35060 -> 35060 bytes core/__pycache__/topic_parser.cpython-312.pyc | Bin 8733 -> 8733 bytes output/2025-04-24_19-44-15.json | 26 - scripts/regenerate_img.py | 338 +++++++++++++ scripts/regenerate_poster_content.py | 457 ++++++++++++++++++ utils/__pycache__/__init__.cpython-312.pyc | Bin 374 -> 374 bytes .../content_generator.cpython-312.pyc | Bin 27562 -> 27562 bytes .../output_handler.cpython-312.pyc | Bin 13191 -> 13191 bytes .../prompt_manager.cpython-312.pyc | Bin 26480 -> 26480 bytes .../resource_loader.cpython-312.pyc | Bin 6945 -> 6945 bytes .../tweet_generator.cpython-312.pyc | Bin 34533 -> 34518 bytes 14 files changed, 795 insertions(+), 26 deletions(-) delete mode 100644 output/2025-04-24_19-44-15.json create mode 100644 scripts/regenerate_img.py create mode 100644 scripts/regenerate_poster_content.py diff --git a/core/__pycache__/__init__.cpython-312.pyc b/core/__pycache__/__init__.cpython-312.pyc index c040350c99a718f115c8171478a9de7bfa31b2d2..3ce90a88ade7825f3061dba8856285e1e259a461 100644 GIT binary patch delta 20 ZcmeBS?qTLW&CAQh00g17+#9(Y838Ml1M~m@ delta 20 acmeBS?qTLW&CAQh00c1?c{g%5G6DcEM+A`o diff --git a/core/__pycache__/ai_agent.cpython-312.pyc b/core/__pycache__/ai_agent.cpython-312.pyc index dae3bf4b691a84975d843171f97632ac72f24294..ddbb6baf6e172d0747167ef938e00a6e7305237e 100644 GIT binary patch delta 22 ccmZoz%G9uwiTgA!FBbz4WZ&o5$Xz-K07)MPUH||9 delta 22 ccmZoz%G9uwiTgA!FBbz4#9ZXv$Xz-K07!cVMgRZ+ diff --git a/core/__pycache__/poster_gen.cpython-312.pyc b/core/__pycache__/poster_gen.cpython-312.pyc index 18f2c485c7c3d711d88b6abe278934b0c24866ff..6ae451a193f599151191b299774fe70c4d32d301 100644 GIT binary patch delta 21 bcmcbzl int: + """Safely gets the topic index, falling back to list index + 1.""" + topic_index = topic_item.get("index") + try: + if isinstance(topic_index, str) and topic_index.isdigit(): + topic_index = int(topic_index) + elif not isinstance(topic_index, int) or topic_index < 1: + logger.warning(f"Topic item {list_index} has invalid or missing 'index'. Using list index {list_index + 1}.") + topic_index = list_index + 1 + except Exception: + logger.warning(f"Error processing index for topic item {list_index}. Using list index {list_index + 1}.") + topic_index = list_index + 1 + return topic_index + + +def find_and_load_variant_contents(run_dir: Path, topic_index: int) -> list: + """Finds all variant directories for a topic and loads their article.json content.""" + variant_contents = [] + variant_dirs = sorted(run_dir.glob(f"{topic_index}_*"), key=lambda p: int(p.name.split('_')[-1])) # Sort by variant number + + if not variant_dirs: + logger.warning(f"No variant directories found for topic {topic_index} in {run_dir}") + return variant_contents + + logger.debug(f"Found {len(variant_dirs)} potential variant directories for topic {topic_index}: {[d.name for d in variant_dirs]}") + + for variant_dir in variant_dirs: + # Double check it's a directory with the correct format + if variant_dir.is_dir() and '_' in variant_dir.name: + parts = variant_dir.name.split('_') + if len(parts) == 2 and parts[0] == str(topic_index) and parts[1].isdigit(): + content = load_variant_content(variant_dir) + if content: # Only add if content was loaded successfully + variant_contents.append(content) + else: + logger.warning(f"Could not load valid content for variant {variant_dir.name}, it will be excluded.") + + logger.info(f"Loaded content for {len(variant_contents)} variants for topic {topic_index}.") + return variant_contents + + +def load_variant_content(variant_dir: Path): + """Loads the article content (title, content) from article.json.""" + content_file = variant_dir / "article.json" + if not content_file.is_file(): + logger.warning(f"Article content file not found: {content_file}") + return None + try: + with open(content_file, 'r', encoding='utf-8') as f: + content_data = json.load(f) + if isinstance(content_data, dict) and "title" in content_data and "content" in content_data: + logger.debug(f"Successfully loaded content from {content_file}") + return content_data + else: + logger.warning(f"Invalid format in article content file: {content_file}. Missing 'title' or 'content'.") + return None + except json.JSONDecodeError: + logger.error(f"Failed to parse article content file (JSON format error): {content_file}") + return None + except Exception as e: + logger.exception(f"Error loading content from {content_file}: {e}") + return None + +def clean_object_name(object_name_raw: str) -> str: + """Cleans the object name, similar to logic in tweet_generator.""" + if not object_name_raw: + return "" + try: + cleaned = object_name_raw.split(".")[0].replace("景点信息-", "").strip() + return cleaned + except Exception as e: + logger.warning(f"Could not fully clean object name '{object_name_raw}': {e}. Using raw.") + return object_name_raw.strip() + +def find_description_file(object_name: str, resource_dir_config: list, desc_dir: Path | None = None) -> list: + """ + Tries to find the description file path based on object name. + Prioritizes searching in desc_dir if provided. + """ + info_directory = [] # Expects a list of paths + found_description = False + + # --- Step 1: Prioritize search in the specified desc_dir --- + if desc_dir and desc_dir.is_dir(): + logger.info(f"Prioritizing description file search in specified directory: {desc_dir}") + # Simple search: find first file containing the object name (case-insensitive) + for file_path in desc_dir.iterdir(): + # Use stem for matching (filename without extension) + # Make comparison case-insensitive for robustness + if file_path.is_file() and object_name.lower() in file_path.stem.lower(): + info_directory = [str(file_path)] + logger.info(f"Found description file in specified directory: {file_path}") + found_description = True + break # Use the first match found in the specified dir + if not found_description: + logger.warning(f"No file containing '{object_name}' found in specified directory: {desc_dir}. Falling back to resource_dir config.") + elif desc_dir: + logger.warning(f"Specified description directory '{desc_dir}' not found or not a directory. Falling back to resource_dir config.") + + # --- Step 2: Fallback to searching resource_dir_config from main config --- + if not found_description: + if not object_name or not resource_dir_config: + logger.warning("Cannot find description file: Missing object name or resource_dir config.") + return info_directory # Return empty list + + logger.debug(f"Searching for description file for object '{object_name}' in resource_dir config...") + # keywords = [k for k in re.split(r'[+\s_\-]+', object_name) if len(k) > 1] # Keywords might be needed for fuzzy matching later + + # Search logic based on resource_dir_config (as before) + for dir_info in resource_dir_config: + if dir_info.get("type") == "Description": + for file_path_str in dir_info.get("file_path", []): + file_path = Path(file_path_str) + # Use stem for matching, case-insensitive + if object_name.lower() in file_path.stem.lower(): + if file_path.is_file(): + info_directory = [str(file_path)] + logger.info(f"Found potential description file in resource_dir config: {file_path}") + found_description = True + break # Take the first match based on this logic + else: + logger.warning(f"Configured description file not found at path: {file_path}") + if found_description: + break + + if not found_description: + logger.warning(f"No description file found for '{object_name}' in resource_dir config either.") + + return info_directory + + +def regenerate_topic_poster_config( + content_gen: ContentGenerator, + topic_item: dict, + topic_index: int, # Now using the determined index + run_dir: Path, + main_config: dict, + desc_dir: Path | None # Added desc_dir parameter +) -> bool: + """Regenerates the poster text config for a single topic using all its variant contents.""" + logger.info(f"--- Regenerating Poster Config for Topic {topic_index} (Object: {topic_item.get('object', 'N/A')}) ---") + + # 1. Load content from ALL variants for this topic + variant_contents_list = find_and_load_variant_contents(run_dir, topic_index) + if not variant_contents_list: + logger.error(f"No valid variant contents found for topic {topic_index}. Skipping.") + return False + + # 2. Find description file (info_directory) - Pass desc_dir here + object_name_raw = topic_item.get("object", "") + object_name_cleaned = clean_object_name(object_name_raw) + resource_config = main_config.get("resource_dir", []) + info_directory = find_description_file(object_name_cleaned, resource_config, desc_dir) # Pass desc_dir + + # 3. Get parameters for ContentGenerator.run + poster_num = len(variant_contents_list) + system_prompt_path = main_config.get("poster_content_system_prompt") + api_url = main_config.get("api_url") + model_name = main_config.get("model") + api_key = main_config.get("api_key") + timeout = main_config.get("request_timeout", 120) + + if not system_prompt_path or not Path(system_prompt_path).is_file(): + logger.error(f"Poster content system prompt file not found or not specified in config: {system_prompt_path}") + return False + + try: + with open(system_prompt_path, "r", encoding="utf-8") as f: + system_prompt = f.read() + except Exception as e: + logger.exception(f"Failed to read system prompt file {system_prompt_path}: {e}") + return False + + # Set ContentGenerator parameters + content_gen.set_temperature(main_config.get("content_temperature", 0.7)) + content_gen.set_top_p(main_config.get("content_top_p", 0.8)) + content_gen.set_presence_penalty(main_config.get("content_presence_penalty", 1.2)) + + # 4. Call ContentGenerator.run with the list of variant contents + logger.info(f"Calling ContentGenerator.run for topic {topic_index} with {poster_num} variant contents...") + logger.debug(f" - Info Dir Used: {info_directory}") # Log the final info_directory used + logger.debug(f" - Poster Num: {poster_num}") + + try: + regenerated_configs = content_gen.run( + info_directory=info_directory, # Use the potentially updated info_directory + poster_num=poster_num, + content_data=variant_contents_list, + system_prompt=system_prompt, + api_url=api_url, + model_name=model_name, + api_key=api_key, + timeout=timeout + ) + except Exception as e: + logger.exception(f"ContentGenerator.run failed for topic {topic_index}: {e}") + traceback.print_exc() + regenerated_configs = None + + # 5. Save the regenerated config (list) + if regenerated_configs is not None: + output_config_path = run_dir / f"topic_{topic_index}_poster_configs.json" + try: + if not isinstance(regenerated_configs, list): + logger.warning(f"ContentGenerator.run for topic {topic_index} did not return a list. Attempting to wrap. Result type: {type(regenerated_configs)}") + regenerated_configs = [regenerated_configs] if regenerated_configs else [] + + with open(output_config_path, 'w', encoding='utf-8') as f_out: + json.dump(regenerated_configs, f_out, ensure_ascii=False, indent=4) + logger.info(f"Successfully regenerated and saved poster configs to: {output_config_path} ({len(regenerated_configs)} configs)") + return True + except Exception as e: + logger.exception(f"Failed to save regenerated poster configs to {output_config_path}: {e}") + return False + else: + logger.error(f"ContentGenerator.run did not return valid config data for topic {topic_index}.") + return False + + +# --- Main Logic --- +def main(run_dirs_to_process, config_path, debug_mode, desc_dir_path: Path | None): + """Main processing function.""" + + if debug_mode: + logger.setLevel(logging.DEBUG) + logging.getLogger().setLevel(logging.DEBUG) + logger.info("DEBUG 日志已启用") + + main_config = load_main_config(config_path) + if main_config is None: + logger.critical("Failed to load main configuration. Aborting.") + return + + try: + content_generator = ContentGenerator(output_dir=main_config.get("output_dir")) + logger.info("ContentGenerator initialized.") + except Exception as e: + logger.critical(f"Failed to initialize ContentGenerator: {e}") + traceback.print_exc() + return + + total_success_count = 0 + total_failure_count = 0 + all_topics_data = [] + + # Phase 1: Load all topics + for run_dir_str in run_dirs_to_process: + run_directory = Path(run_dir_str) + run_id = run_directory.name + logger.info(f"\n===== Loading Topics from: {run_directory} (Run ID: {run_id}) =====") + if not run_directory.is_dir(): + logger.error(f"Directory not found: {run_directory}. Skipping.") + continue + topics_list = load_topic_data(run_directory, run_id) + if topics_list: + all_topics_data.extend(topics_list) + else: + logger.error(f"Failed to load topics for {run_id}.") + + if not all_topics_data: + logger.critical("No topics loaded from any specified run directory. Aborting.") + return + + logger.info(f"\n===== Loaded a total of {len(all_topics_data)} topics. Starting regeneration. =====") + + # Phase 2: Process all loaded topics + for i, topic_item in enumerate(all_topics_data): + run_dir_str = topic_item.get('run_dir') + if not run_dir_str: + logger.error(f"Topic item {i} is missing 'run_dir' information. Cannot process.") + total_failure_count += 1 + continue + run_directory = Path(run_dir_str) + topic_index = get_topic_index(topic_item, i) + + try: + # Pass desc_dir_path to the processing function + if regenerate_topic_poster_config( + content_generator, + topic_item, + topic_index, + run_directory, + main_config, + desc_dir_path # Pass the specified description directory path + ): + total_success_count += 1 + else: + total_failure_count += 1 + except Exception as e: + logger.exception(f"Unhandled error processing topic index {topic_index} from run {run_directory.name}:") + total_failure_count += 1 + + logger.info("=" * 30) + logger.info(f"All topics processed.") + logger.info(f"Total Configs Regenerated Successfully: {total_success_count}") + logger.info(f"Total Failed/Skipped Topics: {total_failure_count}") + logger.info("=" * 30) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="重新生成指定运行ID目录下所有主题的海报文本配置") + parser.add_argument( + "--config", + type=str, + default="poster_gen_config.json", + help="主配置文件路径 (poster_gen_config.json)" + ) + parser.add_argument( + "--desc_dir", + type=str, + default=None, + help="可选:指定一个目录优先查找描述文件 (如果提供)" + ) + parser.add_argument( + "--debug", + action='store_true', + help="启用 DEBUG 级别日志" + ) + + args = parser.parse_args() + + # --- Process desc_dir argument --- + description_directory = Path("/root/autodl-tmp/TravelContentCreator/resource/Object") + if args.desc_dir: + description_directory = Path(args.desc_dir) + if not description_directory.is_dir(): + logger.warning(f"指定的描述目录 (--desc_dir) 不是一个有效的目录: {args.desc_dir}. 将忽略此参数.") + description_directory = None # Reset if invalid + else: + logger.info(f"将优先在以下目录查找描述文件: {description_directory}") + # --- End Process desc_dir --- + + + # ================================================== + # Define the list of run directories to process here + # ================================================== + run_directories = [ + "/root/autodl-tmp/TravelContentCreator/result/安吉/2025-04-27_12-55-40", + "/root/autodl-tmp/TravelContentCreator/result/乌镇/2025-04-27_10-50-34", + "/root/autodl-tmp/TravelContentCreator/result/齐云山/2025-04-27_11-51-56", + "/root/autodl-tmp/TravelContentCreator/result/长鹿/2025-04-27_14-03-44", + "/root/autodl-tmp/TravelContentCreator/result/笔架山/2025-04-27_02-02-34", + "/root/autodl-tmp/TravelContentCreator/result/笔架山/2025-04-27_02-23-17", + "/root/autodl-tmp/TravelContentCreator/result/笔架山/2025-04-27_07-57-20", + "/root/autodl-tmp/TravelContentCreator/result/笔架山/2025-04-27_09-29-20" + ] + # ================================================== + + if not run_directories: + print("ERROR: Please edit the 'run_directories' list in the script to specify at least one run directory to process.") + sys.exit(1) + + # Call the main processing function, passing the processed description_directory path + main(run_directories, args.config, args.debug, description_directory) \ No newline at end of file diff --git a/utils/__pycache__/__init__.cpython-312.pyc b/utils/__pycache__/__init__.cpython-312.pyc index 053c7667cd10c70f53b5e2c0be0aa5d8d22be288..22cbbcf4fc3283931cd410e4f03f12e8bc9fd584 100644 GIT binary patch delta 20 acmeyy^o@!8G%qg~0}yb|X4uG`%?JQG=>)d` delta 20 acmeyy^o@!8G%qg~0}#YqNHgeS(0{}SR1$_Vj delta 19 ZcmZoqZ%^ks&CAQh00c1?H*(b)0{}Q|1#SQU diff --git a/utils/__pycache__/prompt_manager.cpython-312.pyc b/utils/__pycache__/prompt_manager.cpython-312.pyc index 51fed845adb35d3b9781d7e29d3d126482f05ed0..e75407e752cbc3d1ee3c9d9f12693d5c3eb9e732 100644 GIT binary patch delta 22 ccmexxj`71eM()$Ryj%=GpvlFzkvlaV09r5xLI3~& delta 22 ccmexxj`71eM()$Ryj%=GVDgxEBX?>#0AG#=B>(^b diff --git a/utils/__pycache__/resource_loader.cpython-312.pyc b/utils/__pycache__/resource_loader.cpython-312.pyc index 32f9cb2e45a3a73ac7c160af782b3b441846e386..84310072e55babc5c50bb7cf2470a2b4561ddae8 100644 GIT binary patch delta 20 acmZ2zw$P0GG%qg~0}ycVa&6=mlLi1W;sdn+ delta 20 acmZ2zw$P0GG%qg~0}#YqYaZ#x4A#MIX6wGE}d>Rh0TF4J_I3XQ$o6$k#0JX%AAvW z=%EDTJOt%SL=Y4~?4`#_Dly9~`r}gtl|krs7D7)#=ic-1`+dHL!}*;H_tz@-d6g@D ztJM|*TTR8KQ-gP&mi9!rb`wEPXF3+SQh_4rKSMj5Is%XwTk0cTr;dCa?nMEzHtbfH zT#)x2WN^gl?w1-&>>5duCZx%A{t=Qd1warhk{pub97yBqd@@W5o?7x?Bx4>>!BWpD zs0T01ql+AZ5q1{AnsD`_I ztPQp_9IM+ZL)-=1=1@`Gu6kz=m9g~R8{%2p@l~>#(I_D+iY#PJ;cC{t`aQFH_~r`N zu=XPm7Up!^9v>>3)1|oIx=0_3`vyF17xpb0HhkaN5<~c)N+VaZc_xStf|xm{5@IcU zPOFc(`J5ZeZWH1*KIgZ@O?=*jne%qc2IgJDY$cy>GtO#xs>RGyFU(c&)Ydd7@&yqy z7j!~GFXDpLlPJf3%W*=Vp1gLCOU+nR=n7D+!cu7U(lUkR;On6k3M-+*PpcH_z}rRj3ag>rM-4Oh44Ywuj&5pF xwrk*cH#IA?fX`2@3T@EYO=}g_$>Zz_9pGuF^$MLb8x*?0eeBRJGhtilhicXG;KCbTl1vqX4D)peX2j_S^vd1?=pVf|VyA|Y0pz9EN1T-dBN!p4LV9uZJ9=E19Frw} z!Ar&GSXW%)V~%a(LB~rQX}+YZ+$_y%(GcBnb`#oJxp?*l+P=`~{smY%jjn1w*}HDV zuf=a{sRrV2?D3DXyXuM595DdAFZ|Qa|>rXM%zSw~rA8 zY!&!>*=m(q=-tRfl{)bGS(8dV3`Chhr4f2VtXbt62*;R7WeaSGFtbVvga(+T(h9*a zYn>qb_zW9t7-Vhg>vrfHWOkJ@gd@zMvIF`DnNww_GOkNyHw1!gtxA_dw@MFq*ZuY? JEIRnl_aCy|^`!s+