Python 下载html 为PDF, 再转成PNG
html ot PDF: https://www.jb51.net/article/160638.htm
PDF to PNG: https://cloud.tencent.com/developer/article/1481641
import pdfkit, sys import sys, fitz import os import datetime
PATH = sys.path[0] print(PATH) pdfkit.from_url('https://www.nature.com/subjects/biological-sciences', PATH+'/out.pdf')
def pyMuPDF2_fitz(pdfPath, imagePath): pdfDoc = fitz.open(pdfPath) for pg in range(pdfDoc.pageCount): page = pdfDoc[pg] rotate = int(0) zoom_x = 1.33333333 zoom_y = 1.33333333 mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate) rect = page.rect mp = rect.tl + (rect.bl - (0,1224/zoom_x)) clip = fitz.Rect(mp, rect.br) pix = page.getPixmap(matrix=mat, alpha=False, clip=clip) if not os.path.exists(imagePath): os.makedirs(imagePath) pix.writePNG(imagePath+'/'+'psReport_%s.png' % pg)
if __name__ == "__main__": pdfPath = PATH+'/out.pdf' imagePath = PATH+'/../Nature' pyMuPDF2_fitz(pdfPath, imagePath)
|