#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Bing 热门搜索爬虫 数据源: Bing 热门搜索 API """ import logging import re from typing import List, Optional from datetime import datetime, timedelta import aiohttp from .base import BaseCrawler from ..models import HotTopic, HotTopicSource, HotTopicCategory logger = logging.getLogger(__name__) class BingCrawler(BaseCrawler): """Bing 热门搜索爬虫""" source = HotTopicSource.BING name = "Bing热搜" # Bing 热门搜索 API # 中国区 API_URL_CN = "https://cn.bing.com/HPImageArchive.aspx?format=js&idx=0&n=1&mkt=zh-CN" # 热门搜索页面 TRENDING_URL = "https://cn.bing.com/search?q=热门搜索&form=QBLH" # Bing 搜索建议 API (可获取热门词) SUGGEST_API = "https://api.bing.com/qsonhs.aspx?type=cb&q={keyword}&cb=callback" # 预设旅游相关搜索词 TRAVEL_KEYWORDS = [ "旅游攻略", "景点推荐", "酒店预订", "机票", "自驾游", "周边游", "亲子游", "温泉度假", "滑雪场", "海岛游", "三亚旅游", "云南旅游", "成都旅游", "杭州旅游", "西安旅游", ] def __init__(self, keywords: List[str] = None): super().__init__() self.keywords = keywords or self.TRAVEL_KEYWORDS async def fetch(self) -> List[HotTopic]: """获取 Bing 热门搜索""" topics = [] try: session = await self._get_session() # 1. 获取搜索建议 (基于旅游关键词) for idx, keyword in enumerate(self.keywords[:10]): # 限制10个 suggestions = await self._get_suggestions(session, keyword) for suggestion in suggestions[:3]: # 每个关键词取3个建议 topic = HotTopic( title=suggestion, source=self.source, rank=len(topics) + 1, heat=50000 - (len(topics) * 1000), # 递减热度 category=HotTopicCategory.TRAVEL, url=f"https://cn.bing.com/search?q={suggestion}", tags=[keyword], fetched_at=datetime.now(), expires_at=datetime.now() + timedelta(hours=4), extra={'source_keyword': keyword, 'engine': 'bing'} ) topics.append(topic) # 去重 seen = set() unique_topics = [] for t in topics: if t.title not in seen: seen.add(t.title) unique_topics.append(t) self.logger.info(f"获取到 {len(unique_topics)} 条 Bing 搜索建议") return unique_topics except Exception as e: self.logger.error(f"获取 Bing 热搜失败: {e}") return topics async def _get_suggestions(self, session, keyword: str) -> List[str]: """获取搜索建议""" suggestions = [] try: url = f"https://api.bing.com/qsonhs.aspx?type=cb&q={keyword}" async with session.get(url) as response: if response.status != 200: return [] text = await response.text() # 解析 JSONP 响应 # 格式: /* {"AS":{"Query":"xxx","FullResults":1,"Results":[{"Type":"AS","Suggests":[...]}]}} */ match = re.search(r'\{.*\}', text) if match: import json data = json.loads(match.group()) results = data.get('AS', {}).get('Results', []) for result in results: for suggest in result.get('Suggests', []): txt = suggest.get('Txt', '') if txt and txt != keyword: suggestions.append(txt) except Exception as e: self.logger.debug(f"获取建议失败 [{keyword}]: {e}") return suggestions async def search_travel(self, query: str) -> List[HotTopic]: """搜索旅游相关内容""" topics = [] try: session = await self._get_session() suggestions = await self._get_suggestions(session, query) for idx, suggestion in enumerate(suggestions): topic = HotTopic( title=suggestion, source=self.source, rank=idx + 1, heat=50000 - (idx * 2000), category=HotTopicCategory.TRAVEL, url=f"https://cn.bing.com/search?q={suggestion}", fetched_at=datetime.now(), expires_at=datetime.now() + timedelta(hours=2), extra={'query': query, 'engine': 'bing'} ) topics.append(topic) except Exception as e: self.logger.error(f"搜索失败: {e}") return topics