170 lines
6.4 KiB
Python
170 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
节日日历数据源
|
||
|
||
本地 YAML 配置,无需爬取
|
||
"""
|
||
|
||
import logging
|
||
from typing import List
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
import yaml
|
||
|
||
from .base import BaseCrawler
|
||
from ..models import HotTopic, HotTopicSource, HotTopicCategory
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class CalendarCrawler(BaseCrawler):
|
||
"""节日日历数据源"""
|
||
|
||
source = HotTopicSource.CALENDAR
|
||
name = "节日日历"
|
||
|
||
# 默认节日数据
|
||
DEFAULT_FESTIVALS = [
|
||
# 法定节假日
|
||
{"date": "01-01", "name": "元旦", "tags": ["节假日", "跨年"]},
|
||
{"date": "02-14", "name": "情人节", "tags": ["情侣", "约会"]},
|
||
{"date": "03-08", "name": "妇女节", "tags": ["女性", "购物"]},
|
||
{"date": "04-05", "name": "清明节", "tags": ["节假日", "踏青", "祭祖"]},
|
||
{"date": "05-01", "name": "劳动节", "tags": ["节假日", "五一", "出游"]},
|
||
{"date": "05-04", "name": "青年节", "tags": ["青年"]},
|
||
{"date": "06-01", "name": "儿童节", "tags": ["亲子", "儿童"]},
|
||
{"date": "06-18", "name": "618购物节", "tags": ["购物", "促销"]},
|
||
{"date": "08-01", "name": "建军节", "tags": ["红色旅游"]},
|
||
{"date": "09-10", "name": "教师节", "tags": ["教师"]},
|
||
{"date": "10-01", "name": "国庆节", "tags": ["节假日", "十一", "黄金周"]},
|
||
{"date": "11-11", "name": "双十一", "tags": ["购物", "促销", "光棍节"]},
|
||
{"date": "12-12", "name": "双十二", "tags": ["购物", "促销"]},
|
||
{"date": "12-25", "name": "圣诞节", "tags": ["圣诞", "约会"]},
|
||
|
||
# 二十四节气 (简化版,实际日期每年不同)
|
||
{"date": "02-04", "name": "立春", "tags": ["节气", "春季"]},
|
||
{"date": "03-21", "name": "春分", "tags": ["节气", "春季"]},
|
||
{"date": "04-20", "name": "谷雨", "tags": ["节气", "春季"]},
|
||
{"date": "05-06", "name": "立夏", "tags": ["节气", "夏季"]},
|
||
{"date": "06-21", "name": "夏至", "tags": ["节气", "夏季"]},
|
||
{"date": "08-07", "name": "立秋", "tags": ["节气", "秋季"]},
|
||
{"date": "09-23", "name": "秋分", "tags": ["节气", "秋季"]},
|
||
{"date": "11-07", "name": "立冬", "tags": ["节气", "冬季"]},
|
||
{"date": "12-22", "name": "冬至", "tags": ["节气", "冬季"]},
|
||
|
||
# 旅游旺季
|
||
{"date": "07-01", "name": "暑假开始", "tags": ["暑假", "亲子游", "旺季"]},
|
||
{"date": "08-31", "name": "暑假结束", "tags": ["暑假", "开学季"]},
|
||
]
|
||
|
||
def __init__(self, config_path: str = None):
|
||
super().__init__()
|
||
self.config_path = config_path
|
||
self._festivals = None
|
||
|
||
async def fetch(self) -> List[HotTopic]:
|
||
"""获取近期节日"""
|
||
topics = []
|
||
|
||
try:
|
||
festivals = self._load_festivals()
|
||
now = datetime.now()
|
||
|
||
# 获取未来30天内的节日
|
||
for festival in festivals:
|
||
topic = self._check_festival(festival, now)
|
||
if topic:
|
||
topics.append(topic)
|
||
|
||
# 按日期排序
|
||
topics.sort(key=lambda x: x.expires_at or datetime.max)
|
||
|
||
self.logger.info(f"获取到 {len(topics)} 个近期节日")
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"获取节日数据失败: {e}")
|
||
|
||
return topics
|
||
|
||
def _load_festivals(self) -> List[dict]:
|
||
"""加载节日配置"""
|
||
if self._festivals is not None:
|
||
return self._festivals
|
||
|
||
# 尝试从配置文件加载
|
||
if self.config_path:
|
||
try:
|
||
with open(self.config_path, 'r', encoding='utf-8') as f:
|
||
data = yaml.safe_load(f)
|
||
self._festivals = data.get('festivals', [])
|
||
return self._festivals
|
||
except Exception as e:
|
||
self.logger.warning(f"加载节日配置失败: {e}")
|
||
|
||
# 使用默认数据
|
||
self._festivals = self.DEFAULT_FESTIVALS
|
||
return self._festivals
|
||
|
||
def _check_festival(self, festival: dict, now: datetime) -> HotTopic:
|
||
"""检查节日是否在近期"""
|
||
try:
|
||
date_str = festival.get('date', '')
|
||
if not date_str:
|
||
return None
|
||
|
||
# 解析日期 (MM-DD 格式)
|
||
month, day = map(int, date_str.split('-'))
|
||
|
||
# 计算今年和明年的日期
|
||
this_year = datetime(now.year, month, day)
|
||
next_year = datetime(now.year + 1, month, day)
|
||
|
||
# 选择最近的日期
|
||
if this_year < now - timedelta(days=7):
|
||
# 今年已过超过7天,用明年
|
||
target_date = next_year
|
||
else:
|
||
target_date = this_year
|
||
|
||
# 检查是否在未来30天内
|
||
days_until = (target_date - now).days
|
||
if days_until < -7 or days_until > 30:
|
||
return None
|
||
|
||
# 创建热点
|
||
name = festival.get('name', '')
|
||
tags = festival.get('tags', [])
|
||
|
||
# 根据距离调整热度
|
||
if days_until <= 0:
|
||
heat = 100000 # 正在进行
|
||
elif days_until <= 3:
|
||
heat = 80000 # 即将到来
|
||
elif days_until <= 7:
|
||
heat = 50000 # 一周内
|
||
else:
|
||
heat = 30000 # 一个月内
|
||
|
||
return HotTopic(
|
||
title=f"{name}{'倒计时' if days_until > 0 else ''}",
|
||
source=self.source,
|
||
rank=days_until if days_until > 0 else 0,
|
||
heat=heat,
|
||
category=HotTopicCategory.FESTIVAL,
|
||
description=f"距离{name}还有{days_until}天" if days_until > 0 else f"{name}进行中",
|
||
tags=tags,
|
||
fetched_at=now,
|
||
expires_at=target_date + timedelta(days=1),
|
||
extra={
|
||
'date': date_str,
|
||
'days_until': days_until,
|
||
'festival_name': name,
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"解析节日失败: {e}")
|
||
return None
|