python压缩pdf

有个需求是压缩pdf,没有wps,又不想泄露源文件,用python写吧。代码是抄了然后改的

  1. pip install fitz
  2. 运行代码
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    import fitz
    import os

    def covert2pic(zoom):
    for pg in range(totaling):
    page = doc[pg]
    zoom = int(zoom) #值越大,分辨率越高,文件越清晰
    rotate = int(0)
    print(page)
    trans = fitz.Matrix(zoom / 100.0, zoom / 100.0)
    pm = page.get_pixmap(matrix=trans, alpha=False)

    lurl='.pdf/%s.jpg' % str(pg+1)
    pm.save(lurl)
    doc.close()

    def pic2pdf(obj):
    doc = fitz.open()
    for pg in range(totaling):
    img = '.pdf/%s.jpg' % str(pg+1)
    imgdoc = fitz.open(img) # 打开图片
    pdfbytes = imgdoc.convert_to_pdf() # 使用图片创建单页的 PDF
    os.remove(img)
    imgpdf = fitz.open("pdf", pdfbytes)
    doc.insert_pdf(imgpdf) # 将当前页插入文档
    if os.path.exists(obj): # 若文件存在先删除
    os.remove(obj)
    doc.save(obj) # 保存pdf文件
    doc.close()


    def pdfz(sor, obj, zoom):
    covert2pic(zoom)
    pic2pdf(obj)

    if __name__ == "__main__":

    sor = "7.pdf" # 需要压缩的PDF文件
    obj = "new" + sor
    doc = fitz.open(sor)
    totaling = doc.page_count

    zoom = 200 # 清晰度调节,缩放比率
    pdfz(sor, obj, zoom)
    os.removedirs('.pdf')

暂时成功了,以后版本升级之后可能有bug吧。不过思路没错