Skip to content

合并PDF文档并添加目录

python
import os
from glob import glob
from PyPDF2 import PdfReader, PdfMerger

def merge_pdfs(path_title_list, output_title):
    """
    Merges a list of PDFs into a single PDF with the given title.

    Args:
        path_title_list (list): List of dictionaries containing 'path' and 'title' of each PDF.
        output_title (str): Title for the merged output PDF.

    """
    file_merger = PdfMerger()
    total_pages = 0

    for item in path_title_list:
        pdf_path = item["path"]
        pdf_title = item["title"]
        print(f'[+] Merging {pdf_path}')

        with open(pdf_path, "rb") as pdf_file:
            file_merger.merge(total_pages, pdf_file, outline_item=pdf_title)
            total_pages += len(PdfReader(pdf_path).pages)

    output_path = f'{output_title}.pdf'
    with open(output_path, 'wb') as fp:
        file_merger.write(fp)
    print(f'[+] Merged PDF saved as {output_path}')


def get_pdf_list(pdfs_path):
    """
    Retrieves and sorts a list of PDF files from the specified directory.

    Args:
        pdfs_path (str): Path to the directory containing PDF files.

    Returns:
        list: List of dictionaries containing 'path' and 'title' for each PDF file.
    """
    pdf_files = glob(os.path.join(pdfs_path, '*.pdf'))
    pdf_files.sort(key=os.path.getctime)

    return [{'path': file, 'title': os.path.basename(file).replace('.pdf', '')} for file in pdf_files]


def make_merge_pdfs(pdfs_path, output_title):
    """
    Merges all PDFs in the specified directory into a single PDF with the given title.

    Args:
        pdfs_path (str): Path to the directory containing PDF files.
        output_title (str): Title for the merged output PDF.
    """
    pdf_list = get_pdf_list(pdfs_path)
    merge_pdfs(pdf_list, output_title)
toml
dependencies = [
    "PyPDF2>=3.0.1",
]
requires-python = "==3.9.*"

Released under the MIT License.