前几天一份作业是PDF文件,答案直接用红色填写了,为了打印出空白作业,需要将红色内容全部删除。
求助Linux.do的大佬,得到了解决办法。
以下是python代码:

import fitz
import os
from PIL import Image
import numpy as np


def is_red(color):
    r, g, b = color
    if r > max(g, b) + 10:
        return True
    return False

def replace_red_color(color, surrounding_colors):
    if is_red(color):
        non_red_colors = [c for c in surrounding_colors if not is_red(c)]
        if non_red_colors:
            avg_color = np.mean(non_red_colors, axis=0)
            return tuple(avg_color.astype(int))
    return color

def process_image(pix):
    width, height = pix.size
    for y in range(height):
        for x in range(width):
            if x == 0 or x == width - 1 or y == 0 or y == height - 1:
                continue
            color = pix.getpixel((x, y))
            surrounding_colors = [
                pix.getpixel((x - 1, y - 1)),
                pix.getpixel((x, y - 1)),
                pix.getpixel((x + 1, y - 1)),
                pix.getpixel((x - 1, y)),
                pix.getpixel((x + 1, y)),
                pix.getpixel((x - 1, y + 1)),
                pix.getpixel((x, y + 1)),
                pix.getpixel((x + 1, y + 1))
            ]
            new_color = replace_red_color(color, surrounding_colors)
            pix.putpixel((x, y), new_color)
    return pix

def process_pdf(input_file, output_file):
    doc = fitz.open(input_file)
    output_doc = fitz.open()
    temp_dir = "temp_images"
    os.makedirs(temp_dir, exist_ok=True)

    for page_num in range(len(doc)):
        page = doc[page_num]
        pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
        before_png = os.path.join(temp_dir, f"before_page_{page_num+1}.png")
        pix.save(before_png)
        temp_png = os.path.join(temp_dir, f"page_{page_num+1}.png")
        pix.save(temp_png)
        img = Image.open(temp_png)
        processed_img = process_image(img)
        processed_img.save(temp_png)
        after_png = os.path.join(temp_dir, f"after_page_{page_num+1}.png")
        processed_img.save(after_png)
        output_page = output_doc.new_page(width=page.rect.width, height=page.rect.height)
        output_pix = fitz.Pixmap(temp_png)
        output_page.insert_image(output_page.rect, pixmap=output_pix, overlay=False)

        pix = None
        output_pix = None

    output_doc.save(output_file)
    doc.close()
    output_doc.close()

if __name__ == "__main__":
    process_pdf("input.pdf", "output.pdf")

需要包含库:

pip install PyMuPDF Pillow numpy

来源https://linux.do/t/topic/111219/29?u=myseil Linux.do有很多大佬技术高超又热心!需要注册的可以留言,目前2级账户还可以邀请注册。

另外也有大佬使用pitstop进行去除,在这里一并作为感谢。不过这个软件好像是收费的。

标签: PDF去标红

添加新评论