PDF分割？有了这把魔法剪，PDF任你裁剪（PyPDF2）-

Posted 2021-06-27 Tisfy

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了PDF分割？有了这把魔法剪，PDF任你裁剪（PyPDF2）-相关的知识，希望对你有一定的参考价值。

传送门

前情提要

再阅读这篇文章之前，推荐先看一下前两篇文章

上篇也提到了，裁剪出的PDF带有黑边，影响美观。改他！
并且这样一次只能修改一个PDF，能一次修改多个PDF文件吗，改他！

黑边处理

其实，既然在第一篇中知道了如何定义裁剪范围，那么我们在水平方向裁剪的时候，少裁剪一点就是了。

定义一个height_diff_per变量，是每次裁剪的时候宽度少裁剪的量

import PyPDF2
from sys import argv

input_file_path = 'C:\\\\Users\\\\LetMeFly\\\\Desktop\\\\lec1_1.pdf'
output_file_path = 'C:\\\\Users\\\\LetMeFly\\\\Desktop\\\\lec1_1-sp.pdf'


def split(page, tup):
    page.mediaBox.lowerLeft=(tup[0],tup[1])
    page.mediaBox.lowerRight=(tup[2],tup[1])
    page.mediaBox.upperLeft = (tup[0], tup[3])
    page.mediaBox.upperRight = (tup[2], tup[3])


input_file_list = []
for i in range(4):
    input_file_list.append(PyPDF2.PdfFileReader(open(input_file_path, 'rb')))
output_file = PyPDF2.PdfFileWriter()
page_info = input_file_list[0].getPage(0)
width = float(page_info.mediaBox.getWidth())
height = float(page_info.mediaBox.getHeight())
height_diff_per = 4
page_count = input_file_list[0].getNumPages()
to_split_XYs = [(0,0+height_diff_per,width/2,height/2), (0,height/2,width/2,height-height_diff_per), (width/2,0+height_diff_per,width,height/2), (width/2,height/2,width,height-height_diff_per)]  # 与之前不同的就是水平方向少裁剪了一点

for page_num in range(page_count):
    for i in range(4):
        this_page = input_file_list[i].getPage(page_num)
        split(this_page,to_split_XYs[i])
        output_file.addPage(this_page)

output_file.write(open(output_file_path, 'wb'))

没有黑边

这样就没有黑边了！如果有些文件的黑边比较大，只需要把height_diff_per的值改大一点即可。

多文件一次性选择

为了方便保存，决定采用一次处理一个文件夹的方式。同时，为了制作成有图形界面的窗口，决定采用tkinter。

import tkinter as tk
from tkinter import filedialog
import PyPDF2
import os

window = tk.Tk()
window.title("LetMeFly-Cutter")
window.resizable(0, 0)
# window.attributes("-toolwindow", 2)
window.geometry("200x395+685+205")
window.iconbitmap('Fly.ico')

input_file_path, output_file_path = '', ''


def choose_input_file_path():
    global input_file_path, button_input_file_path
    input_file_path = filedialog.askdirectory(title="请选择")
    if input_file_path:
        button_input_file_path.config(text="已选择")
        button_input_file_path.place(x=67, y=5)


def choose_output_file_path():
    global output_file_path, button_output_file_path
    output_file_path = filedialog.askdirectory(title="请选择")
    if output_file_path:
        button_output_file_path.config(text="已选择")
        button_output_file_path.place(x=67, y=40)


def split_one_arc(page, tup):
    page.mediaBox.lowerLeft = (tup[0], tup[1])
    page.mediaBox.lowerRight = (tup[2], tup[1])
    page.mediaBox.upperLeft = (tup[0], tup[3])
    page.mediaBox.upperRight = (tup[2], tup[3])


def split_one_file(input_file_name, output_file_name):
    input_file_list = []
    for i in range(4):
        input_file_list.append(PyPDF2.PdfFileReader(open(input_file_name, 'rb')))
    output_file = PyPDF2.PdfFileWriter()
    page_info = input_file_list[0].getPage(0)
    width = float(page_info.mediaBox.getWidth())
    height = float(page_info.mediaBox.getHeight())
    page_count = input_file_list[0].getNumPages()
    height_diff_per = 4
    to_split_xy_s = [(0, 0 + height_diff_per, width / 2, height / 2),
                     (0, height / 2, width / 2, height - height_diff_per),
                     (width / 2, 0 + height_diff_per, width, height / 2),
                     (width / 2, height / 2, width, height - height_diff_per)]  # 要分隔的xy坐标
    for page_num in range(page_count):
        for i in range(4):
            this_page = input_file_list[i].getPage(page_num)
            split_one_arc(this_page, to_split_xy_s[i])
            output_file.addPage(this_page)
    output_file.write(open(output_file_name, 'wb'))


def split():
    global button_input_file_path, button_output_file_path, button_begin_split, text_area, input_file_path, output_file_path
    text_area.config(state='normal')
    text_area.delete(1.0, tk.END)
    text_area.config(state='disabled')
    if not input_file_path:
        text_area.config(state='normal')
        text_area.insert('1.0', "请先选择要裁剪的PDF所在的文件夹\\n")
        text_area.config(state='disabled')
        return
    if not output_file_path:
        text_area.config(state='normal')
        text_area.insert('1.0', "请先选择要保存到的文件夹（不建议在同一个文件夹下）\\n")
        text_area.config(state='disabled')
        return
    button_input_file_path.config(state='disabled')
    button_input_file_path.config(cursor='watch')
    button_output_file_path.config(state='disabled')
    button_output_file_path.config(cursor='watch')
    button_begin_split.config(state='disabled')
    button_begin_split.config(cursor='watch')
    button_begin_split.config(text="正在裁剪")

    try:
        files_ori = os.listdir(input_file_path)
        for this in files_ori:
            if os.path.isfile(os.path.join(input_file_path, this)) and this.split('.')[-1].lower() == 'pdf':
                text_area.config(state='normal')
                text_area.insert(tk.END, f"正在裁剪{this}\\n")
                text_area.update()
                text_area.config(state='disabled')
                split_one_file(os.path.join(input_file_path, this), os.path.join(output_file_path, 'LP-' + this))
    except:
        text_area.config(state='normal')
        text_area.insert(tk.END, "Sorry,裁剪失败！\\n\\n代码及原理请见：https://letmefly.blog.csdn.net/article/details/117638672\\n")
        text_area.update()
        text_area.config(state='disabled')
    button_input_file_path.config(state='normal')
    button_input_file_path.config(cursor='dotbox')
    button_input_file_path.config(text="选择要裁剪的PDF文件夹")
    button_input_file_path.place(x=27.5, y=5)
    button_output_file_path.config(state='normal')
    button_output_file_path.config(cursor='dotbox')
    button_output_file_path.config(text="选择你要保存到的文件夹")
    button_output_file_path.place(x=26, y=40)
    button_begin_split.config(state='normal')
    button_begin_split.config(cursor='dotbox')
    button_begin_split.config(text="开始裁剪")
    text_area.config(state='normal')
    text_area.insert(tk.END, "裁剪完毕！\\n")
    text_area.config(state='disabled')


button_input_file_path = tk.Button(window, text="选择要裁剪的PDF文件夹", command=choose_input_file_path)
button_input_file_path.place(x=27.5, y=5)
button_input_file_path.config(cursor="dotbox")

button_output_file_path = tk.Button(window, text='选择你要保存到的文件夹', command=choose_output_file_path)
button_output_file_path.place(x=26, y=40)
button_output_file_path.config(cursor="dotbox")

text_area = tk.Text(window, height=20, width=25)
text_area.place(x=9, y=80)
text_area.insert(tk.END, 'Author: LetMeFly[https://letmefly666.github.io/various/]\\n\\n')
text_area.insert(tk.END, '更多详情请访问: https://letmefly.blog.csdn.net/article/details/117638672\\n')
text_area.config(state='disabled')
text_area.config(cursor="star")

button_begin_split = tk.Button(window, text="开始裁剪", command=split)
button_begin_split.place(x=65, y=350)
button_begin_split.config(cursor="dotbox")

window.mainloop()

就是在前面的基础上，增加了可视化窗口，以及选择文件夹的功能。

效果如下：
main
选择文件夹

选择完毕后点击裁剪即可

打开选择的保存文件夹，里面全是裁剪好的PDF
效果图

原创不易，转载请附上原文链接哦~
Tisfy：https://letmefly.blog.csdn.net/article/details/117638672

以上是关于PDF分割？有了这把魔法剪，PDF任你裁剪（PyPDF2）-的主要内容，如果未能解决你的问题，请参考以下文章

PDF分割？有了这把魔法剪，PDF任你裁剪（PyPDF2）-

如何使用pyPdf沿中间分割/裁剪pdf

iText - 裁剪出 pdf 文件的一部分

PDF怎么裁剪页面，PDF裁剪页面的操作步骤

如何修改PDF文件，PDF页面怎么裁剪