[Java知识库] 使用java开源框架pdfbox添加书签

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> Java知识库 -> 使用java开源框架pdfbox添加书签 -> 正文阅读

[Java知识库]使用java开源框架pdfbox添加书签

gitee代码

参考博客

pdfBox操作pdf

java操作PDF文件，可支持分页、合并、图片转PDF等

使用 Apache PDFBox 在 Java 中为 PDF 文档创建书签

PDFbox-PDF解析（坐标定位，分页读取）

功能实现

实现思路

通过正则表达式匹配标题
通过标题和页面添加书签（只能定位到标题所在页）

实现效果

pdfbox给pdf添加书签

java操作PDF，有一个很好用的工具——pdfbox。只需要引入依赖，即可使用。

 ? ? ?  <dependency>
 ? ? ? ? ?  <groupId>org.apache.pdfbox</groupId>
 ? ? ? ? ?  <artifactId>pdfbox-app</artifactId>
 ? ? ? ? ?  <version>2.0.21</version>
 ? ? ?  </dependency>

利用这个工具，可以实现很多的功能，我这里示例了以下几种：

加载PDF文档
创建一个单页的PDF空文档
获取PDF文档总页数
获取pdf文档的所有分页对象
给整个PDF文件分页，形成多个pdf单页文件
合并多个单页PDF文件，输出一个合并后的PDF文档
图片转PDF
获取pdf单页分辨率

代码如下：

package com.bridge.pdf.utils;
?
import com.bridge.enums.UtilsEnums;
import com.bridge.pdf.model.PdfBoxData;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitWidthDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.text.PDFTextStripper;
?
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
?
/**
 * @author bridge
 * @Date 2022/05/08/18:14
 */
@Slf4j
public class PDFBoxUtils {
?
 ? ?public static void main(String[] args) throws IOException {
 ? ? ? ?String savePath = "C:\\Users\\Administrator\\Desktop\\tmp\\pdf\\添加书签-" + System.currentTimeMillis() + ".pdf";
 ? ? ? ?File file = new File("C:\\Users\\Administrator\\Desktop\\tmp\\k8s尚硅谷\\03_尚硅谷大数据技术之实时项目-需求一日活.pdf");
 ? ? ? ?PDDocument pdDocument = PDFBoxUtils.load(file);
 ? ? ? ?if (pdDocument == null) {
 ? ? ? ? ? ?return;
 ? ? ?  }
 ? ? ? ?List<PdfBoxData> allBookList = new ArrayList<>();
 ? ? ? ?int numberOfPages = pdDocument.getNumberOfPages();
 ? ? ? ?for (int i = 1; i <= numberOfPages; i++) {
 ? ? ? ? ? ?allBookList.addAll(PDFBoxUtils.getPdfBoxTextList(pdDocument, i));
 ? ? ?  }
 ? ? ? ?addMarkBook(pdDocument,allBookList, savePath);
 ? ? ? ?PDFBoxUtils.close(pdDocument);
 ?  }
?
 ? ?public static void addMarkBook(PDDocument document, List<PdfBoxData> allBookList, String savePath) throws IOException {
 ? ? ? ?for (int i = 0; i < 10; i++) {
 ? ? ? ? ? ?document.addPage(new PDPage());
 ? ? ?  }
?
 ? ? ? ?PDDocumentOutline documentOutline = new PDDocumentOutline();
 ? ? ? ?document.getDocumentCatalog().setDocumentOutline(documentOutline);
 ? ? ? ?PDOutlineItem pagesOutline = new PDOutlineItem();
 ? ? ? ?pagesOutline.setTitle("All Pages");
 ? ? ? ?documentOutline.addLast(pagesOutline);
?
 ? ? ? ?for (PdfBoxData pdfBoxData : allBookList) {
 ? ? ? ? ? ?PDPageDestination pageDestination = new PDPageFitWidthDestination();
 ? ? ? ? ? ?pageDestination.setPage(document.getPage(pdfBoxData.getPage()-1));
 ? ? ? ? ? ?PDOutlineItem bookmark = new PDOutlineItem();
 ? ? ? ? ? ?bookmark.setDestination(pageDestination);
 ? ? ? ? ? ?bookmark.setTitle(pdfBoxData.getTitle());
 ? ? ? ? ? ?pagesOutline.addLast(bookmark);
 ? ? ?  }
 ? ? ? ?pagesOutline.openNode();
 ? ? ? ?documentOutline.openNode();
 ? ? ? ?document.getDocumentCatalog().setPageMode(PageMode.USE_OUTLINES);
 ? ? ? ?document.save(savePath);
 ?  }
?
 ? ?public static List<PdfBoxData> getPdfBoxTextList(PDDocument document, int page) throws IOException {
 ? ? ? ?//文本剥离器
 ? ? ? ?PDFTextStripper stripper = new PDFTextStripper();
 ? ? ? ?//按页进行读取，页码从1开始
 ? ? ? ?stripper.setStartPage(page);
 ? ? ? ?stripper.setEndPage(page);
 ? ? ? ?//按位置进行排序
 ? ? ? ?stripper.setSortByPosition(true);
 ? ? ? ?//获取文本
 ? ? ? ?String text = stripper.getText(document);
 ? ? ? ?String[] dataArr = text.split("\r\n");
 ? ? ? ?List<PdfBoxData> pdfBoxDataList = new ArrayList<>();
 ? ? ? ?for (String data : dataArr) {
 ? ? ? ? ? ?if (data.matches(UtilsEnums.CHAPTER_TITLE_REGEX.getCode()) ||
 ? ? ? ? ? ? ? ? ? ?data.matches(UtilsEnums.FIRST_TITLE_REGEX.getCode())) {
 ? ? ? ? ? ? ? ?pdfBoxDataList.add(new PdfBoxData(data, page));
 ? ? ? ? ?  }
?
 ? ? ?  }
 ? ? ? ?return pdfBoxDataList;
 ?  }
?
?
 ? ?/**
 ? ? * 从文件中加载pdf
 ? ? *
 ? ? * @param file 文件
 ? ? * @return
 ? ? * @throws IOException
 ? ? */
 ? ?public static PDDocument load(File file) throws IOException {
 ? ? ? ?if (!file.exists() || file.isDirectory()) {
 ? ? ? ? ? ?return null;
 ? ? ?  }
 ? ? ? ?return PDDocument.load(file);
 ?  }
?
 ? ?/**
 ? ? * 从文件流中加载pdf
 ? ? *
 ? ? * @param inputStream 文件输入流
 ? ? * @return
 ? ? * @throws IOException
 ? ? */
 ? ?public static PDDocument load(InputStream inputStream) throws IOException {
 ? ? ? ?if (inputStream == null || inputStream.available() == 0) {
 ? ? ? ? ? ?return null;
 ? ? ?  }
 ? ? ? ?return PDDocument.load(inputStream);
 ?  }
?
 ? ?/**
 ? ? * 创建一个单页的PDF空文档
 ? ? *
 ? ? * @param outputFile
 ? ? * @return
 ? ? * @throws IOException
 ? ? */
 ? ?public static PDDocument getBlankPDF(File outputFile) throws IOException {
 ? ? ? ?//首先创建pdf文档类
 ? ? ? ?PDDocument pdf = null;
 ? ? ? ?pdf = new PDDocument();
 ? ? ? ?//实例化pdf页对象
 ? ? ? ?PDPage blankPage = new PDPage();
 ? ? ? ?//插入文档类
 ? ? ? ?pdf.addPage(blankPage);
 ? ? ? ?//保存
 ? ? ? ?pdf.save(outputFile);
 ? ? ? ?return pdf;
 ?  }
?
 ? ?/**
 ? ? * 获取pdf总页数
 ? ? *
 ? ? * @param pdf
 ? ? * @return
 ? ? */
 ? ?public static int pageCount(PDDocument pdf) {
 ? ? ? ?return pdf.getNumberOfPages();
 ?  }
?
 ? ?/**
 ? ? * 获取pdf文档的所有分页对象
 ? ? *
 ? ? * @param pdf
 ? ? * @return 返回的list集合
 ? ? */
 ? ?public static List<PDPage> getPageList(PDDocument pdf) {
 ? ? ? ?int count = pageCount(pdf);
 ? ? ? ?List<PDPage> pages = new ArrayList<>(64);
 ? ? ? ?PDPageTree pdPages = pdf.getPages();
 ? ? ? ?for (int i = 0; i < count; i++) {
 ? ? ? ? ? ?PDPage pdPage = pdPages.get(i);
 ? ? ? ? ? ?pages.add(pdPage);
 ? ? ?  }
 ? ? ? ?return pages;
 ?  }
?
?
 ? ?/**
 ? ? * 给整个PDF文件分页，形成多个pdf单页文件
 ? ? *
 ? ? * @param inputStream  pdf文件流
 ? ? * @param outputParent 输出文件的父目录
 ? ? * @throws IOException
 ? ? */
 ? ?public static Integer pageSpilt(InputStream inputStream, File outputParent) throws IOException {
 ? ? ? ?if (!outputParent.exists() || !outputParent.isDirectory()) {
 ? ? ? ? ? ?throw new RuntimeException("输出文件的父目录不存在");
 ? ? ?  }
?
 ? ? ? ?PDDocument pdf = load(inputStream);
 ? ? ? ?try {
 ? ? ? ? ? ?int numberOfPages = pageCount(pdf);
 ? ? ? ? ? ?for (int i = 0; i < numberOfPages; i++) {
 ? ? ? ? ? ? ? ?PDDocument document = new PDDocument();
 ? ? ? ? ? ? ? ?document.addPage(pdf.getPage(i));
 ? ? ? ? ? ? ? ?document.save(new File(outputParent, i + 1 + ".pdf"));
 ? ? ? ? ? ? ? ?close(document);
 ? ? ? ? ?  }
 ? ? ? ? ? ?return numberOfPages;
 ? ? ?  } finally {
 ? ? ? ? ? ?close(pdf);
 ? ? ? ? ? ?close(inputStream);
 ? ? ?  }
 ?  }
?
?
 ? ?/**
 ? ? * 合并多个单页PDF文件，输出一个合并后的PDF文档
 ? ? *
 ? ? * @param inputParent
 ? ? * @param outputFile
 ? ? * @param sortor
 ? ? * @throws IOException
 ? ? */
 ? ?public static void combine(File inputParent, String outputFile, FileSortor sortor) throws IOException {
 ? ? ? ?if (!inputParent.exists() || !inputParent.isDirectory()) {
 ? ? ? ? ? ?throw new RuntimeException("输入文件的父目录不存在");
 ? ? ?  }
 ? ? ? ?if (new File(outputFile).exists()) {
 ? ? ? ? ? ?throw new RuntimeException("输出文件已存在");
 ? ? ?  }
 ? ? ? ?File[] files = inputParent.listFiles();
 ? ? ? ?if (sortor != null) {
 ? ? ? ? ? ?sortor.sort(files);
 ? ? ?  }
 ? ? ? ?PDFMergerUtility merger = new PDFMergerUtility();
 ? ? ? ?//输出目标路径
 ? ? ? ?merger.setDestinationFileName(outputFile);
 ? ? ? ?for (int i = 0; i < files.length; i++) {
 ? ? ? ? ? ?if (files[i].getName().toLowerCase().endsWith(".pdf")) {
 ? ? ? ? ? ? ? ?merger.addSource(files[i]);
 ? ? ? ? ?  }
 ? ? ?  }
 ? ? ? ?merger.mergeDocuments(null);
 ?  }
?
 ? ?/**
 ? ? * 获取pdf单页分辨率
 ? ? *
 ? ? * @param page
 ? ? * @return
 ? ? */
 ? ?public static String getResolution(PDPage page) {
 ? ? ? ?PDRectangle rectangle = page.getArtBox();
 ? ? ? ?double width = Math.ceil(rectangle.getWidth());
 ? ? ? ?double height = Math.ceil(rectangle.getHeight());
 ? ? ? ?return (int) width + "*" + (int) height;
 ?  }
?
 ? ?/**
 ? ? * 图片转PDF
 ? ? *
 ? ? * @param inputFile  图片路径
 ? ? * @param outputFile 生成pdf的文件路径
 ? ? * @throws IOException
 ? ? */
 ? ?public static void convertImgToPDF(String inputFile, String outputFile) throws IOException {
 ? ? ? ?if (!new File(inputFile).exists()) {
 ? ? ? ? ? ?throw new RuntimeException("输入文件不存在");
 ? ? ?  }
 ? ? ? ?if (!outputFile.toLowerCase().endsWith(".pdf")) {
 ? ? ? ? ? ?throw new RuntimeException("只能转成pdf文件");
 ? ? ?  }
 ? ? ? ?PDDocument document = new PDDocument();
 ? ? ? ?InputStream inputStream = new FileInputStream(inputFile);
 ? ? ? ?BufferedImage bimg = ImageIO.read(inputStream);
 ? ? ? ?float width = bimg.getWidth();
 ? ? ? ?float height = bimg.getHeight();
 ? ? ? ?PDPage page = new PDPage(new PDRectangle(width, height));
 ? ? ? ?document.addPage(page);
 ? ? ? ?PDImageXObject img = PDImageXObject.createFromFile(inputFile, document);
 ? ? ? ?PDPageContentStream contentStream = new PDPageContentStream(document, page);
 ? ? ? ?contentStream.drawImage(img, 0, 0, width, height);
 ? ? ? ?contentStream.close();
 ? ? ? ?close(inputStream);
 ? ? ? ?document.save(outputFile);
 ? ? ? ?close(document);
 ?  }
?
?
 ? ?public static void close(InputStream inputStream) {
 ? ? ? ?try {
 ? ? ? ? ? ?if (inputStream != null) {
 ? ? ? ? ? ? ? ?inputStream.close();
 ? ? ? ? ?  }
 ? ? ?  } catch (IOException e) {
 ? ? ? ? ? ?log.error(e.getMessage(), e);
 ? ? ?  }
 ?  }
?
 ? ?public static void close(PDDocument pdf) {
 ? ? ? ?try {
 ? ? ? ? ? ?if (pdf != null) {
 ? ? ? ? ? ? ? ?pdf.close();
 ? ? ? ? ?  }
 ? ? ?  } catch (IOException e) {
 ? ? ? ? ? ?log.error(e.getMessage(), e);
 ? ? ?  }
 ?  }
?
 ? ?/**
 ? ? * 文件排序器
 ? ? */
 ? ?public interface FileSortor {
 ? ? ? ?/**
 ? ? ? ? * 源文件组
 ? ? ? ? *
 ? ? ? ? * @param sources
 ? ? ? ? */
 ? ? ? ?void sort(File[] sources);
 ?  }
}


?