增加了元数据清除, 感知哈希干扰和颜色直方图扰动三种处理

2025-05-06 15:19:37 +08:00 · 2025-05-06 15:19:37 +08:00 · d828165547
commit d828165547
parent 45579c7b87
1 changed files with 154 additions and 3 deletions
--- a/utils/poster_notes_creator.py
+++ b/utils/poster_notes_creator.py
@ -9,6 +9,7 @@ import concurrent.futures
 import numpy as np
 from PIL import ImageEnhance, ImageFilter
 from .output_handler import OutputHandler
+import io

 logger = logging.getLogger(__name__)

@ -347,6 +348,133 @@ class PosterNotesCreator:
            logger.error(traceback.format_exc())
            return None
    
+    def add_phash_noise(self, image: Image.Image, intensity: float = 0.05) -> Image.Image:
+        """
+        添加扰动以对抗感知哈希算法(pHash)
+        通过在频域添加低频扰动实现
+        
+        Args:
+            image: 输入图像
+            intensity: 扰动强度(0-1)
+            
+        Returns:
+            添加扰动后的图像
+        """
+        # 灰度化处理
+        gray_image = image.convert('L')
+        width, height = gray_image.size
+        
+        # 确保宽高是8的倍数(DCT通常用8x8块)
+        new_width = (width // 8) * 8
+        new_height = (height // 8) * 8
+        if new_width != width or new_height != height:
+            gray_image = gray_image.resize((new_width, new_height))
+        
+        # 转为numpy数组
+        img_array = np.array(gray_image)
+        
+        # 简化版DCT域扰动
+        # 分块处理图像
+        for y in range(0, new_height, 8):
+            for x in range(0, new_width, 8):
+                block = img_array[y:y+8, x:x+8].astype(float)
+                
+                # 简单DCT - 对块应用频域变化
+                # 这里使用简单方法模拟DCT效果
+                # 真正的DCT需要使用scipy.fftpack
+                avg = np.mean(block)
+                # 修改低频区块(除直流分量外)
+                noise_value = random.uniform(-intensity * 10, intensity * 10)
+                
+                # 扰动左上角的低频系数(类似于DCT中的低频区域)
+                block[1:3, 1:3] += noise_value
+                
+                # 应用回原图
+                img_array[y:y+8, x:x+8] = np.clip(block, 0, 255)
+        
+        # 转回PIL图像
+        modified_image = Image.fromarray(img_array.astype(np.uint8))
+        
+        # 调整回原始大小
+        if new_width != width or new_height != height:
+            modified_image = modified_image.resize((width, height), Image.LANCZOS)
+        
+        # 将修改后的灰度通道应用到原彩色图像
+        if image.mode == 'RGB':
+            r, g, b = image.split()
+            # 混合原始图像与修改过的灰度图
+            blend_factor = 0.2 # 混合强度
+            r = Image.blend(r, modified_image, blend_factor)
+            g = Image.blend(g, modified_image, blend_factor)
+            b = Image.blend(b, modified_image, blend_factor)
+            return Image.merge('RGB', (r, g, b))
+        else:
+            return modified_image
+    
+    def perturb_color_histogram(self, image: Image.Image, strength: float = 0.03) -> Image.Image:
+        """
+        扰动图像的颜色直方图，对抗基于颜色统计的图像匹配
+        
+        Args:
+            image: 输入图像
+            strength: 扰动强度(0-1)
+            
+        Returns:
+            处理后的图像
+        """
+        # 确保为RGB模式
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        
+        # 转为numpy数组
+        img_array = np.array(image)
+        height, width, channels = img_array.shape
+        
+        # 对每个通道分别处理
+        for channel in range(channels):
+            # 计算当前通道的直方图
+            hist, _ = np.histogram(img_array[:,:,channel].flatten(), bins=64, range=(0, 256))
+            
+            # 找出主要颜色区间 (频率高的区间)
+            threshold = np.percentile(hist, 70)  # 取前30%的颜色块
+            significant_bins = np.where(hist > threshold)[0]
+            
+            if len(significant_bins) > 0:
+                for bin_idx in significant_bins:
+                    # 计算当前bin对应的颜色范围
+                    bin_width = 256 // 64
+                    color_low = bin_idx * bin_width
+                    color_high = (bin_idx + 1) * bin_width
+                    
+                    # 创建颜色范围掩码
+                    mask = (img_array[:,:,channel] >= color_low) & (img_array[:,:,channel] < color_high)
+                    
+                    if np.any(mask):
+                        # 生成随机偏移值
+                        offset = int(strength * bin_width * (random.random() - 0.5) * 2)
+                        
+                        # 应用偏移，确保在0-255范围内
+                        img_array[:,:,channel][mask] = np.clip(
+                            img_array[:,:,channel][mask] + offset, 0, 255).astype(np.uint8)
+        
+        # 转回PIL图像
+        return Image.fromarray(img_array)
+    
+    def strip_metadata(self, image: Image.Image) -> Image.Image:
+        """
+        移除图像中的所有元数据
+        
+        Args:
+            image: 输入图像
+            
+        Returns:
+            无元数据的图像
+        """
+        # 创建无元数据的副本
+        data = io.BytesIO()
+        image.save(data, format=image.format if image.format else 'PNG')
+        return Image.open(data)
+    
    def optimized_process_image(
        self, 
        image: Image.Image, 
@ -356,7 +484,7 @@ class PosterNotesCreator:
        variation_strength: str = "medium",
        extra_effects: bool = True
    ) -> Image.Image:
-        """优化后的图像处理方法，使用更高效的算法"""
+        """优化后的图像处理方法，使用更高效的算法，添加反查重技术"""
        # 设置随机种子
        if seed is not None:
            random.seed(seed)
@ -419,12 +547,14 @@ class PosterNotesCreator:
            result = resized_image.crop((0, crop_y1, resized_width, crop_y2))
        
        # 如果不需要变化或是低强度且禁用额外效果
-        if not add_variation or (variation_strength == "low" and not use_extra):
+        if not add_variation:
            # 重置随机种子
            if seed is not None:
                random.seed()
                np.random.seed()
-            return result
+                
+            # 清除元数据后返回
+            return self.strip_metadata(result)
        
        # 高效应用基本变化
        processed_image = result.convert('RGB')
@ -450,6 +580,24 @@ class PosterNotesCreator:
            if abs(rotation_angle) > 0.1:  # 只有当角度足够大时才旋转
                processed_image = processed_image.rotate(rotation_angle, resample=Image.BICUBIC, expand=False)
        
+        # 5. 新增 - 应用反查重技术
+        # 根据变化强度选择性应用
+        if use_extra:
+            # 随机决定应用哪些反查重技术
+            apply_phash = random.random() < 0.7
+            apply_color = random.random() < 0.7
+            
+            # 感知哈希干扰 (在中高强度变化时应用)
+            if apply_phash and variation_strength != "low":
+                phash_intensity = 0.05 if variation_strength == "medium" else 0.08
+                processed_image = self.add_phash_noise(processed_image, phash_intensity)
+            
+            # 颜色直方图扰动
+            if apply_color:
+                color_strength = 0.02 if variation_strength == "low" else \
+                                0.04 if variation_strength == "medium" else 0.06
+                processed_image = self.perturb_color_histogram(processed_image, color_strength)
+        
        # 应用额外效果 (只在需要时)
        if use_extra:
            # 根据强度决定是否应用特定效果
@ -483,6 +631,9 @@ class PosterNotesCreator:
                offset_y = random.randint(0, border_size*2)
                processed_image = bordered.crop((offset_x, offset_y, offset_x + w, offset_y + h))
        
+        # 6. 始终清除元数据 - 最后一步
+        processed_image = self.strip_metadata(processed_image)
+        
        # 重置随机种子
        if seed is not None:
            random.seed()