合并PDF文档并添加目录
python
import os
from glob import glob
from PyPDF2 import PdfReader, PdfMerger
def merge_pdfs(path_title_list, output_title):
"""
Merges a list of PDFs into a single PDF with the given title.
Args:
path_title_list (list): List of dictionaries containing 'path' and 'title' of each PDF.
output_title (str): Title for the merged output PDF.
"""
file_merger = PdfMerger()
total_pages = 0
for item in path_title_list:
pdf_path = item["path"]
pdf_title = item["title"]
print(f'[+] Merging {pdf_path}')
with open(pdf_path, "rb") as pdf_file:
file_merger.merge(total_pages, pdf_file, outline_item=pdf_title)
total_pages += len(PdfReader(pdf_path).pages)
output_path = f'{output_title}.pdf'
with open(output_path, 'wb') as fp:
file_merger.write(fp)
print(f'[+] Merged PDF saved as {output_path}')
def get_pdf_list(pdfs_path):
"""
Retrieves and sorts a list of PDF files from the specified directory.
Args:
pdfs_path (str): Path to the directory containing PDF files.
Returns:
list: List of dictionaries containing 'path' and 'title' for each PDF file.
"""
pdf_files = glob(os.path.join(pdfs_path, '*.pdf'))
pdf_files.sort(key=os.path.getctime)
return [{'path': file, 'title': os.path.basename(file).replace('.pdf', '')} for file in pdf_files]
def make_merge_pdfs(pdfs_path, output_title):
"""
Merges all PDFs in the specified directory into a single PDF with the given title.
Args:
pdfs_path (str): Path to the directory containing PDF files.
output_title (str): Title for the merged output PDF.
"""
pdf_list = get_pdf_list(pdfs_path)
merge_pdfs(pdf_list, output_title)
toml
dependencies = [
"PyPDF2>=3.0.1",
]
requires-python = "==3.9.*"