331 lines
30 KiB
JSON
331 lines
30 KiB
JSON
|
|
{
|
|||
|
|
"file_path": "travel-algorithms/travel_algorithms/content_generation/topic_parser.py",
|
|||
|
|
"file_size": 6727,
|
|||
|
|
"line_count": 205,
|
|||
|
|
"functions": [
|
|||
|
|
{
|
|||
|
|
"name": "__init__",
|
|||
|
|
"line_start": 23,
|
|||
|
|
"line_end": 29,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": null,
|
|||
|
|
"docstring": "初始化解析器",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def __init__(self):\n \"\"\"初始化解析器\"\"\"\n self.json_pattern = re.compile(r'```json\\s*(.*?)\\s*```', re.DOTALL)\n self.fallback_patterns = [\n re.compile(r'\\{.*\\}', re.DOTALL), # 匹配JSON对象\n re.compile(r'\\[.*\\]', re.DOTALL), # 匹配JSON数组\n ]",
|
|||
|
|
"code_hash": "2f157d84e962e5b08cd0c5789ec1e2c4"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "parse",
|
|||
|
|
"line_start": 31,
|
|||
|
|
"line_end": 58,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "raw_content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "List[Dict[str, Any]]",
|
|||
|
|
"docstring": "解析主题内容\n\nArgs:\n raw_content: AI生成的原始内容\n\nReturns:\n 解析后的主题列表",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def parse(self, raw_content: str) -> List[Dict[str, Any]]:\n \"\"\"\n 解析主题内容\n\n Args:\n raw_content: AI生成的原始内容\n\n Returns:\n 解析后的主题列表\n \"\"\"\n if not raw_content or not raw_content.strip():\n logger.warning(\"输入内容为空\")\n return []\n\n # 尝试多种解析方法\n topics = (\n self._parse_json_block(raw_content) or\n self._parse_direct_json(raw_content) or\n self._parse_structured_text(raw_content) or\n []\n )\n\n if topics:\n logger.info(f\"成功解析出 {len(topics)} 个主题\")\n return self._validate_topics(topics)\n else:\n logger.error(\"未能解析出任何有效主题\")\n return []",
|
|||
|
|
"code_hash": "fd6289431a9b868b63cb71ea21fd73b7"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_parse_json_block",
|
|||
|
|
"line_start": 60,
|
|||
|
|
"line_end": 79,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[List[Dict[str, Any]]]",
|
|||
|
|
"docstring": "解析```json```代码块中的JSON",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _parse_json_block(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"解析```json```代码块中的JSON\"\"\"\n try:\n matches = self.json_pattern.findall(content)\n if matches:\n json_str = matches[0].strip()\n data = json.loads(json_str)\n \n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n # 如果是字典,尝试提取topics字段\n if 'topics' in data:\n return data['topics']\n else:\n return [data]\n return None\n except (json.JSONDecodeError, Exception) as e:\n logger.debug(f\"JSON代码块解析失败: {e}\")\n return None",
|
|||
|
|
"code_hash": "a70c065e9c40432ae3e59cc2e73d486a"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_parse_direct_json",
|
|||
|
|
"line_start": 81,
|
|||
|
|
"line_end": 108,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[List[Dict[str, Any]]]",
|
|||
|
|
"docstring": "直接解析JSON内容",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _parse_direct_json(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"直接解析JSON内容\"\"\"\n try:\n # 尝试直接解析整个内容\n data = json.loads(content.strip())\n \n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n if 'topics' in data:\n return data['topics']\n else:\n return [data]\n return None\n except (json.JSONDecodeError, Exception):\n # 尝试使用fallback patterns\n for pattern in self.fallback_patterns:\n matches = pattern.findall(content)\n for match in matches:\n try:\n data = json.loads(match)\n if isinstance(data, list) and len(data) > 0:\n return data\n elif isinstance(data, dict):\n return [data]\n except json.JSONDecodeError:\n continue\n return None",
|
|||
|
|
"code_hash": "bbc1f94feb212a1b0ee92114f9c95bc5"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_parse_structured_text",
|
|||
|
|
"line_start": 110,
|
|||
|
|
"line_end": 159,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[List[Dict[str, Any]]]",
|
|||
|
|
"docstring": "解析结构化文本格式",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _parse_structured_text(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"解析结构化文本格式\"\"\"\n try:\n topics = []\n lines = content.split('\\n')\n current_topic = {}\n \n for line in lines:\n line = line.strip()\n if not line:\n continue\n \n # 检测主题开始(数字编号)\n if re.match(r'^\\d+[\\.\\)]\\s*', line):\n if current_topic:\n topics.append(current_topic)\n current_topic = {}\n title = re.sub(r'^\\d+[\\.\\)]\\s*', '', line)\n current_topic['title'] = title\n \n # 检测字段\n elif ':' in line and current_topic:\n key, value = line.split(':', 1)\n key = key.strip().lower()\n value = value.strip()\n \n # 映射中文字段到英文\n field_mapping = {\n '标题': 'title',\n '主题': 'title', \n '目标': 'target',\n '目标人群': 'target',\n '内容': 'content',\n '描述': 'content',\n '关键词': 'keywords',\n '标签': 'tags'\n }\n \n mapped_key = field_mapping.get(key, key)\n current_topic[mapped_key] = value\n \n # 添加最后一个主题\n if current_topic:\n topics.append(current_topic)\n \n return topics if topics else None\n \n except Exception as e:\n logger.debug(f\"结构化文本解析失败: {e}\")\n return None",
|
|||
|
|
"code_hash": "826c5e5160fe9b910eb34e70e657c661"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_validate_topics",
|
|||
|
|
"line_start": 161,
|
|||
|
|
"line_end": 193,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "topics",
|
|||
|
|
"type_hint": "List[Dict[str, Any]]"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "List[Dict[str, Any]]",
|
|||
|
|
"docstring": "验证和标准化主题数据",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _validate_topics(self, topics: List[Dict[str, Any]]) -> List[Dict[str, Any]]:\n \"\"\"验证和标准化主题数据\"\"\"\n validated_topics = []\n \n for i, topic in enumerate(topics):\n if not isinstance(topic, dict):\n logger.warning(f\"主题 {i+1} 不是字典格式,跳过\")\n continue\n \n # 确保必需字段存在\n validated_topic = {\n 'id': topic.get('id', f\"topic_{i+1}\"),\n 'title': topic.get('title', topic.get('主题', f\"主题{i+1}\")),\n 'content': topic.get('content', topic.get('描述', topic.get('内容', ''))),\n 'target': topic.get('target', topic.get('目标人群', '通用')),\n 'keywords': topic.get('keywords', topic.get('关键词', [])),\n 'tags': topic.get('tags', topic.get('标签', [])),\n 'metadata': topic.get('metadata', {})\n }\n \n # 确保keywords和tags是列表\n if isinstance(validated_topic['keywords'], str):\n validated_topic['keywords'] = [kw.strip() for kw in validated_topic['keywords'].split(',')]\n if isinstance(validated_topic['tags'], str):\n validated_topic['tags'] = [tag.strip() for tag in validated_topic['tags'].split(',')]\n \n # 只保留有标题的主题\n if validated_topic['title']:\n validated_topics.append(validated_topic)\n else:\n logger.warning(f\"主题 {i+1} 缺少标题,跳过\")\n \n return validated_topics",
|
|||
|
|
"code_hash": "3dfeb3356b59c3101937584477962a06"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "parse_single_topic",
|
|||
|
|
"line_start": 195,
|
|||
|
|
"line_end": 206,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[Dict[str, Any]]",
|
|||
|
|
"docstring": "解析单个主题\n\nArgs:\n content: 主题内容\n\nReturns:\n 解析后的主题字典",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def parse_single_topic(self, content: str) -> Optional[Dict[str, Any]]:\n \"\"\"\n 解析单个主题\n\n Args:\n content: 主题内容\n\n Returns:\n 解析后的主题字典\n \"\"\"\n topics = self.parse(content)\n return topics[0] if topics else None ",
|
|||
|
|
"code_hash": "4b6e0b313fdf155b6f375314ae7da82b"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"classes": [
|
|||
|
|
{
|
|||
|
|
"name": "TopicParser",
|
|||
|
|
"line_start": 17,
|
|||
|
|
"line_end": 206,
|
|||
|
|
"bases": [],
|
|||
|
|
"methods": [
|
|||
|
|
{
|
|||
|
|
"name": "__init__",
|
|||
|
|
"line_start": 23,
|
|||
|
|
"line_end": 29,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": null,
|
|||
|
|
"docstring": "初始化解析器",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def __init__(self):\n \"\"\"初始化解析器\"\"\"\n self.json_pattern = re.compile(r'```json\\s*(.*?)\\s*```', re.DOTALL)\n self.fallback_patterns = [\n re.compile(r'\\{.*\\}', re.DOTALL), # 匹配JSON对象\n re.compile(r'\\[.*\\]', re.DOTALL), # 匹配JSON数组\n ]",
|
|||
|
|
"code_hash": "2f157d84e962e5b08cd0c5789ec1e2c4"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "parse",
|
|||
|
|
"line_start": 31,
|
|||
|
|
"line_end": 58,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "raw_content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "List[Dict[str, Any]]",
|
|||
|
|
"docstring": "解析主题内容\n\nArgs:\n raw_content: AI生成的原始内容\n\nReturns:\n 解析后的主题列表",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def parse(self, raw_content: str) -> List[Dict[str, Any]]:\n \"\"\"\n 解析主题内容\n\n Args:\n raw_content: AI生成的原始内容\n\n Returns:\n 解析后的主题列表\n \"\"\"\n if not raw_content or not raw_content.strip():\n logger.warning(\"输入内容为空\")\n return []\n\n # 尝试多种解析方法\n topics = (\n self._parse_json_block(raw_content) or\n self._parse_direct_json(raw_content) or\n self._parse_structured_text(raw_content) or\n []\n )\n\n if topics:\n logger.info(f\"成功解析出 {len(topics)} 个主题\")\n return self._validate_topics(topics)\n else:\n logger.error(\"未能解析出任何有效主题\")\n return []",
|
|||
|
|
"code_hash": "fd6289431a9b868b63cb71ea21fd73b7"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_parse_json_block",
|
|||
|
|
"line_start": 60,
|
|||
|
|
"line_end": 79,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[List[Dict[str, Any]]]",
|
|||
|
|
"docstring": "解析```json```代码块中的JSON",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _parse_json_block(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"解析```json```代码块中的JSON\"\"\"\n try:\n matches = self.json_pattern.findall(content)\n if matches:\n json_str = matches[0].strip()\n data = json.loads(json_str)\n \n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n # 如果是字典,尝试提取topics字段\n if 'topics' in data:\n return data['topics']\n else:\n return [data]\n return None\n except (json.JSONDecodeError, Exception) as e:\n logger.debug(f\"JSON代码块解析失败: {e}\")\n return None",
|
|||
|
|
"code_hash": "a70c065e9c40432ae3e59cc2e73d486a"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_parse_direct_json",
|
|||
|
|
"line_start": 81,
|
|||
|
|
"line_end": 108,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[List[Dict[str, Any]]]",
|
|||
|
|
"docstring": "直接解析JSON内容",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _parse_direct_json(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"直接解析JSON内容\"\"\"\n try:\n # 尝试直接解析整个内容\n data = json.loads(content.strip())\n \n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n if 'topics' in data:\n return data['topics']\n else:\n return [data]\n return None\n except (json.JSONDecodeError, Exception):\n # 尝试使用fallback patterns\n for pattern in self.fallback_patterns:\n matches = pattern.findall(content)\n for match in matches:\n try:\n data = json.loads(match)\n if isinstance(data, list) and len(data) > 0:\n return data\n elif isinstance(data, dict):\n return [data]\n except json.JSONDecodeError:\n continue\n return None",
|
|||
|
|
"code_hash": "bbc1f94feb212a1b0ee92114f9c95bc5"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_parse_structured_text",
|
|||
|
|
"line_start": 110,
|
|||
|
|
"line_end": 159,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[List[Dict[str, Any]]]",
|
|||
|
|
"docstring": "解析结构化文本格式",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _parse_structured_text(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"解析结构化文本格式\"\"\"\n try:\n topics = []\n lines = content.split('\\n')\n current_topic = {}\n \n for line in lines:\n line = line.strip()\n if not line:\n continue\n \n # 检测主题开始(数字编号)\n if re.match(r'^\\d+[\\.\\)]\\s*', line):\n if current_topic:\n topics.append(current_topic)\n current_topic = {}\n title = re.sub(r'^\\d+[\\.\\)]\\s*', '', line)\n current_topic['title'] = title\n \n # 检测字段\n elif ':' in line and current_topic:\n key, value = line.split(':', 1)\n key = key.strip().lower()\n value = value.strip()\n \n # 映射中文字段到英文\n field_mapping = {\n '标题': 'title',\n '主题': 'title', \n '目标': 'target',\n '目标人群': 'target',\n '内容': 'content',\n '描述': 'content',\n '关键词': 'keywords',\n '标签': 'tags'\n }\n \n mapped_key = field_mapping.get(key, key)\n current_topic[mapped_key] = value\n \n # 添加最后一个主题\n if current_topic:\n topics.append(current_topic)\n \n return topics if topics else None\n \n except Exception as e:\n logger.debug(f\"结构化文本解析失败: {e}\")\n return None",
|
|||
|
|
"code_hash": "826c5e5160fe9b910eb34e70e657c661"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "_validate_topics",
|
|||
|
|
"line_start": 161,
|
|||
|
|
"line_end": 193,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "topics",
|
|||
|
|
"type_hint": "List[Dict[str, Any]]"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "List[Dict[str, Any]]",
|
|||
|
|
"docstring": "验证和标准化主题数据",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def _validate_topics(self, topics: List[Dict[str, Any]]) -> List[Dict[str, Any]]:\n \"\"\"验证和标准化主题数据\"\"\"\n validated_topics = []\n \n for i, topic in enumerate(topics):\n if not isinstance(topic, dict):\n logger.warning(f\"主题 {i+1} 不是字典格式,跳过\")\n continue\n \n # 确保必需字段存在\n validated_topic = {\n 'id': topic.get('id', f\"topic_{i+1}\"),\n 'title': topic.get('title', topic.get('主题', f\"主题{i+1}\")),\n 'content': topic.get('content', topic.get('描述', topic.get('内容', ''))),\n 'target': topic.get('target', topic.get('目标人群', '通用')),\n 'keywords': topic.get('keywords', topic.get('关键词', [])),\n 'tags': topic.get('tags', topic.get('标签', [])),\n 'metadata': topic.get('metadata', {})\n }\n \n # 确保keywords和tags是列表\n if isinstance(validated_topic['keywords'], str):\n validated_topic['keywords'] = [kw.strip() for kw in validated_topic['keywords'].split(',')]\n if isinstance(validated_topic['tags'], str):\n validated_topic['tags'] = [tag.strip() for tag in validated_topic['tags'].split(',')]\n \n # 只保留有标题的主题\n if validated_topic['title']:\n validated_topics.append(validated_topic)\n else:\n logger.warning(f\"主题 {i+1} 缺少标题,跳过\")\n \n return validated_topics",
|
|||
|
|
"code_hash": "3dfeb3356b59c3101937584477962a06"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "parse_single_topic",
|
|||
|
|
"line_start": 195,
|
|||
|
|
"line_end": 206,
|
|||
|
|
"args": [
|
|||
|
|
{
|
|||
|
|
"name": "self"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "content",
|
|||
|
|
"type_hint": "str"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"return_type": "Optional[Dict[str, Any]]",
|
|||
|
|
"docstring": "解析单个主题\n\nArgs:\n content: 主题内容\n\nReturns:\n 解析后的主题字典",
|
|||
|
|
"is_async": false,
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": " def parse_single_topic(self, content: str) -> Optional[Dict[str, Any]]:\n \"\"\"\n 解析单个主题\n\n Args:\n content: 主题内容\n\n Returns:\n 解析后的主题字典\n \"\"\"\n topics = self.parse(content)\n return topics[0] if topics else None ",
|
|||
|
|
"code_hash": "4b6e0b313fdf155b6f375314ae7da82b"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"docstring": "主题解析器\n负责从AI生成的文本中解析出结构化的主题数据",
|
|||
|
|
"decorators": [],
|
|||
|
|
"code": "class TopicParser:\n \"\"\"\n 主题解析器\n 负责从AI生成的文本中解析出结构化的主题数据\n \"\"\"\n\n def __init__(self):\n \"\"\"初始化解析器\"\"\"\n self.json_pattern = re.compile(r'```json\\s*(.*?)\\s*```', re.DOTALL)\n self.fallback_patterns = [\n re.compile(r'\\{.*\\}', re.DOTALL), # 匹配JSON对象\n re.compile(r'\\[.*\\]', re.DOTALL), # 匹配JSON数组\n ]\n\n def parse(self, raw_content: str) -> List[Dict[str, Any]]:\n \"\"\"\n 解析主题内容\n\n Args:\n raw_content: AI生成的原始内容\n\n Returns:\n 解析后的主题列表\n \"\"\"\n if not raw_content or not raw_content.strip():\n logger.warning(\"输入内容为空\")\n return []\n\n # 尝试多种解析方法\n topics = (\n self._parse_json_block(raw_content) or\n self._parse_direct_json(raw_content) or\n self._parse_structured_text(raw_content) or\n []\n )\n\n if topics:\n logger.info(f\"成功解析出 {len(topics)} 个主题\")\n return self._validate_topics(topics)\n else:\n logger.error(\"未能解析出任何有效主题\")\n return []\n\n def _parse_json_block(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"解析```json```代码块中的JSON\"\"\"\n try:\n matches = self.json_pattern.findall(content)\n if matches:\n json_str = matches[0].strip()\n data = json.loads(json_str)\n \n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n # 如果是字典,尝试提取topics字段\n if 'topics' in data:\n return data['topics']\n else:\n return [data]\n return None\n except (json.JSONDecodeError, Exception) as e:\n logger.debug(f\"JSON代码块解析失败: {e}\")\n return None\n\n def _parse_direct_json(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"直接解析JSON内容\"\"\"\n try:\n # 尝试直接解析整个内容\n data = json.loads(content.strip())\n \n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n if 'topics' in data:\n return data['topics']\n else:\n return [data]\n return None\n except (json.JSONDecodeError, Exception):\n # 尝试使用fallback patterns\n for pattern in self.fallback_patterns:\n matches = pattern.findall(content)\n for match in matches:\n try:\n data = json.loads(match)\n if isinstance(data, list) and len(data) > 0:\n return data\n elif isinstance(data, dict):\n return [data]\n except json.JSONDecodeError:\n continue\n return None\n\n def _parse_structured_text(self, content: str) -> Optional[List[Dict[str, Any]]]:\n \"\"\"解析结构化文本格式\"\"\"\n try:\n topics = []\n lines = content.split('\\n')\n current_topic = {}\n \n for line in lines:\n line = line.strip()\n if not line:\n continue\n \n # 检测主题开始(数字编号)\n if re.match(r'^\\d+[\\.\\)]\\s*', line):\n if current_topic:\n topics.append(current_topic)\n current_topic = {}\n title = re.sub(r'^\\d+[\\.\\)]\\s*', '', li
|
|||
|
|
"code_hash": "e18d720056356adcfd93c009f5594b79"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"imports": [
|
|||
|
|
{
|
|||
|
|
"type": "import",
|
|||
|
|
"modules": [
|
|||
|
|
"json"
|
|||
|
|
],
|
|||
|
|
"aliases": []
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"type": "import",
|
|||
|
|
"modules": [
|
|||
|
|
"re"
|
|||
|
|
],
|
|||
|
|
"aliases": []
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"type": "import",
|
|||
|
|
"modules": [
|
|||
|
|
"logging"
|
|||
|
|
],
|
|||
|
|
"aliases": []
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"type": "from_import",
|
|||
|
|
"module": "typing",
|
|||
|
|
"names": [
|
|||
|
|
"List",
|
|||
|
|
"Dict",
|
|||
|
|
"Any",
|
|||
|
|
"Optional"
|
|||
|
|
],
|
|||
|
|
"aliases": [],
|
|||
|
|
"level": 0
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"constants": [],
|
|||
|
|
"docstring": "Topic Parser\n主题解析器 - 从原项目迁移,解析AI生成的主题内容",
|
|||
|
|
"content_hash": "d74c0a64f33726e70ed1151d33e7d937"
|
|||
|
|
}
|