增加了元数据清除, 感知哈希干扰和颜色直方图扰动三种处理

This commit is contained in:
jinye_huang 2025-05-06 15:19:37 +08:00
parent 45579c7b87
commit d828165547

View File

@ -9,6 +9,7 @@ import concurrent.futures
import numpy as np
from PIL import ImageEnhance, ImageFilter
from .output_handler import OutputHandler
import io
logger = logging.getLogger(__name__)
@ -347,6 +348,133 @@ class PosterNotesCreator:
logger.error(traceback.format_exc())
return None
def add_phash_noise(self, image: Image.Image, intensity: float = 0.05) -> Image.Image:
"""
添加扰动以对抗感知哈希算法(pHash)
通过在频域添加低频扰动实现
Args:
image: 输入图像
intensity: 扰动强度(0-1)
Returns:
添加扰动后的图像
"""
# 灰度化处理
gray_image = image.convert('L')
width, height = gray_image.size
# 确保宽高是8的倍数(DCT通常用8x8块)
new_width = (width // 8) * 8
new_height = (height // 8) * 8
if new_width != width or new_height != height:
gray_image = gray_image.resize((new_width, new_height))
# 转为numpy数组
img_array = np.array(gray_image)
# 简化版DCT域扰动
# 分块处理图像
for y in range(0, new_height, 8):
for x in range(0, new_width, 8):
block = img_array[y:y+8, x:x+8].astype(float)
# 简单DCT - 对块应用频域变化
# 这里使用简单方法模拟DCT效果
# 真正的DCT需要使用scipy.fftpack
avg = np.mean(block)
# 修改低频区块(除直流分量外)
noise_value = random.uniform(-intensity * 10, intensity * 10)
# 扰动左上角的低频系数(类似于DCT中的低频区域)
block[1:3, 1:3] += noise_value
# 应用回原图
img_array[y:y+8, x:x+8] = np.clip(block, 0, 255)
# 转回PIL图像
modified_image = Image.fromarray(img_array.astype(np.uint8))
# 调整回原始大小
if new_width != width or new_height != height:
modified_image = modified_image.resize((width, height), Image.LANCZOS)
# 将修改后的灰度通道应用到原彩色图像
if image.mode == 'RGB':
r, g, b = image.split()
# 混合原始图像与修改过的灰度图
blend_factor = 0.2 # 混合强度
r = Image.blend(r, modified_image, blend_factor)
g = Image.blend(g, modified_image, blend_factor)
b = Image.blend(b, modified_image, blend_factor)
return Image.merge('RGB', (r, g, b))
else:
return modified_image
def perturb_color_histogram(self, image: Image.Image, strength: float = 0.03) -> Image.Image:
"""
扰动图像的颜色直方图对抗基于颜色统计的图像匹配
Args:
image: 输入图像
strength: 扰动强度(0-1)
Returns:
处理后的图像
"""
# 确保为RGB模式
if image.mode != 'RGB':
image = image.convert('RGB')
# 转为numpy数组
img_array = np.array(image)
height, width, channels = img_array.shape
# 对每个通道分别处理
for channel in range(channels):
# 计算当前通道的直方图
hist, _ = np.histogram(img_array[:,:,channel].flatten(), bins=64, range=(0, 256))
# 找出主要颜色区间 (频率高的区间)
threshold = np.percentile(hist, 70) # 取前30%的颜色块
significant_bins = np.where(hist > threshold)[0]
if len(significant_bins) > 0:
for bin_idx in significant_bins:
# 计算当前bin对应的颜色范围
bin_width = 256 // 64
color_low = bin_idx * bin_width
color_high = (bin_idx + 1) * bin_width
# 创建颜色范围掩码
mask = (img_array[:,:,channel] >= color_low) & (img_array[:,:,channel] < color_high)
if np.any(mask):
# 生成随机偏移值
offset = int(strength * bin_width * (random.random() - 0.5) * 2)
# 应用偏移确保在0-255范围内
img_array[:,:,channel][mask] = np.clip(
img_array[:,:,channel][mask] + offset, 0, 255).astype(np.uint8)
# 转回PIL图像
return Image.fromarray(img_array)
def strip_metadata(self, image: Image.Image) -> Image.Image:
"""
移除图像中的所有元数据
Args:
image: 输入图像
Returns:
无元数据的图像
"""
# 创建无元数据的副本
data = io.BytesIO()
image.save(data, format=image.format if image.format else 'PNG')
return Image.open(data)
def optimized_process_image(
self,
image: Image.Image,
@ -356,7 +484,7 @@ class PosterNotesCreator:
variation_strength: str = "medium",
extra_effects: bool = True
) -> Image.Image:
"""优化后的图像处理方法,使用更高效的算法"""
"""优化后的图像处理方法,使用更高效的算法,添加反查重技术"""
# 设置随机种子
if seed is not None:
random.seed(seed)
@ -419,12 +547,14 @@ class PosterNotesCreator:
result = resized_image.crop((0, crop_y1, resized_width, crop_y2))
# 如果不需要变化或是低强度且禁用额外效果
if not add_variation or (variation_strength == "low" and not use_extra):
if not add_variation:
# 重置随机种子
if seed is not None:
random.seed()
np.random.seed()
return result
# 清除元数据后返回
return self.strip_metadata(result)
# 高效应用基本变化
processed_image = result.convert('RGB')
@ -450,6 +580,24 @@ class PosterNotesCreator:
if abs(rotation_angle) > 0.1: # 只有当角度足够大时才旋转
processed_image = processed_image.rotate(rotation_angle, resample=Image.BICUBIC, expand=False)
# 5. 新增 - 应用反查重技术
# 根据变化强度选择性应用
if use_extra:
# 随机决定应用哪些反查重技术
apply_phash = random.random() < 0.7
apply_color = random.random() < 0.7
# 感知哈希干扰 (在中高强度变化时应用)
if apply_phash and variation_strength != "low":
phash_intensity = 0.05 if variation_strength == "medium" else 0.08
processed_image = self.add_phash_noise(processed_image, phash_intensity)
# 颜色直方图扰动
if apply_color:
color_strength = 0.02 if variation_strength == "low" else \
0.04 if variation_strength == "medium" else 0.06
processed_image = self.perturb_color_histogram(processed_image, color_strength)
# 应用额外效果 (只在需要时)
if use_extra:
# 根据强度决定是否应用特定效果
@ -483,6 +631,9 @@ class PosterNotesCreator:
offset_y = random.randint(0, border_size*2)
processed_image = bordered.crop((offset_x, offset_y, offset_x + w, offset_y + h))
# 6. 始终清除元数据 - 最后一步
processed_image = self.strip_metadata(processed_image)
# 重置随机种子
if seed is not None:
random.seed()