#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 节日日历数据源 本地 YAML 配置,无需爬取 """ import logging from typing import List from datetime import datetime, timedelta from pathlib import Path import yaml from .base import BaseCrawler from ..models import HotTopic, HotTopicSource, HotTopicCategory logger = logging.getLogger(__name__) class CalendarCrawler(BaseCrawler): """节日日历数据源""" source = HotTopicSource.CALENDAR name = "节日日历" # 默认节日数据 DEFAULT_FESTIVALS = [ # 法定节假日 {"date": "01-01", "name": "元旦", "tags": ["节假日", "跨年"]}, {"date": "02-14", "name": "情人节", "tags": ["情侣", "约会"]}, {"date": "03-08", "name": "妇女节", "tags": ["女性", "购物"]}, {"date": "04-05", "name": "清明节", "tags": ["节假日", "踏青", "祭祖"]}, {"date": "05-01", "name": "劳动节", "tags": ["节假日", "五一", "出游"]}, {"date": "05-04", "name": "青年节", "tags": ["青年"]}, {"date": "06-01", "name": "儿童节", "tags": ["亲子", "儿童"]}, {"date": "06-18", "name": "618购物节", "tags": ["购物", "促销"]}, {"date": "08-01", "name": "建军节", "tags": ["红色旅游"]}, {"date": "09-10", "name": "教师节", "tags": ["教师"]}, {"date": "10-01", "name": "国庆节", "tags": ["节假日", "十一", "黄金周"]}, {"date": "11-11", "name": "双十一", "tags": ["购物", "促销", "光棍节"]}, {"date": "12-12", "name": "双十二", "tags": ["购物", "促销"]}, {"date": "12-25", "name": "圣诞节", "tags": ["圣诞", "约会"]}, # 二十四节气 (简化版,实际日期每年不同) {"date": "02-04", "name": "立春", "tags": ["节气", "春季"]}, {"date": "03-21", "name": "春分", "tags": ["节气", "春季"]}, {"date": "04-20", "name": "谷雨", "tags": ["节气", "春季"]}, {"date": "05-06", "name": "立夏", "tags": ["节气", "夏季"]}, {"date": "06-21", "name": "夏至", "tags": ["节气", "夏季"]}, {"date": "08-07", "name": "立秋", "tags": ["节气", "秋季"]}, {"date": "09-23", "name": "秋分", "tags": ["节气", "秋季"]}, {"date": "11-07", "name": "立冬", "tags": ["节气", "冬季"]}, {"date": "12-22", "name": "冬至", "tags": ["节气", "冬季"]}, # 旅游旺季 {"date": "07-01", "name": "暑假开始", "tags": ["暑假", "亲子游", "旺季"]}, {"date": "08-31", "name": "暑假结束", "tags": ["暑假", "开学季"]}, ] def __init__(self, config_path: str = None): super().__init__() self.config_path = config_path self._festivals = None async def fetch(self) -> List[HotTopic]: """获取近期节日""" topics = [] try: festivals = self._load_festivals() now = datetime.now() # 获取未来30天内的节日 for festival in festivals: topic = self._check_festival(festival, now) if topic: topics.append(topic) # 按日期排序 topics.sort(key=lambda x: x.expires_at or datetime.max) self.logger.info(f"获取到 {len(topics)} 个近期节日") except Exception as e: self.logger.error(f"获取节日数据失败: {e}") return topics def _load_festivals(self) -> List[dict]: """加载节日配置""" if self._festivals is not None: return self._festivals # 尝试从配置文件加载 if self.config_path: try: with open(self.config_path, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) self._festivals = data.get('festivals', []) return self._festivals except Exception as e: self.logger.warning(f"加载节日配置失败: {e}") # 使用默认数据 self._festivals = self.DEFAULT_FESTIVALS return self._festivals def _check_festival(self, festival: dict, now: datetime) -> HotTopic: """检查节日是否在近期""" try: date_str = festival.get('date', '') if not date_str: return None # 解析日期 (MM-DD 格式) month, day = map(int, date_str.split('-')) # 计算今年和明年的日期 this_year = datetime(now.year, month, day) next_year = datetime(now.year + 1, month, day) # 选择最近的日期 if this_year < now - timedelta(days=7): # 今年已过超过7天,用明年 target_date = next_year else: target_date = this_year # 检查是否在未来30天内 days_until = (target_date - now).days if days_until < -7 or days_until > 30: return None # 创建热点 name = festival.get('name', '') tags = festival.get('tags', []) # 根据距离调整热度 if days_until <= 0: heat = 100000 # 正在进行 elif days_until <= 3: heat = 80000 # 即将到来 elif days_until <= 7: heat = 50000 # 一周内 else: heat = 30000 # 一个月内 return HotTopic( title=f"{name}{'倒计时' if days_until > 0 else ''}", source=self.source, rank=days_until if days_until > 0 else 0, heat=heat, category=HotTopicCategory.FESTIVAL, description=f"距离{name}还有{days_until}天" if days_until > 0 else f"{name}进行中", tags=tags, fetched_at=now, expires_at=target_date + timedelta(days=1), extra={ 'date': date_str, 'days_until': days_until, 'festival_name': name, } ) except Exception as e: self.logger.warning(f"解析节日失败: {e}") return None