Since I often download some e-books in pdf format, sometimes some good books have no bookmarks when downloading, and it doesn't feel like a whole reading, so I decided to write a gadget myself, parse the text in a specific format into a bookmark, and then save it to in pdf format.
The overall idea is to copy the directory information from the introductions below Douban, JD.com, Dangdang, and Amazon. Take the "HTTP Authoritative Guide" as an example:
The structure of the directory is as follows:
The code copy is as follows:
Chapter 1 HTTP Overview 3
1.1 HTTP-Internet's Multimedia Messenger 4
1.2 Web Client and Server 4
1.3 Resource 5
1.3.1 Media Type 6
1.3.2 URI 7
1.3.3 URL 7
1.3.4 URN 8
1.4 Transaction 9
1.4.1 Method 9
1.4.2 Status Code 10
1.4.3 Web pages can contain multiple objects 10
1.5 Message 11
1.6 Connection 13
Each line has page numbers and is separated by spaces.
After processing, the result is:
The main logic is:
The code copy is as follows:
package org.fra.pdf.bussiness;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Stack;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.IntHashtable;
import com.itextpdf.text.pdf.PdfArray;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfIndirectReference;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfString;
import com.itextpdf.text.pdf.SimpleBookmark;
public class AddPdfOutLineFromTxt {
private Stack<OutlineInfo> parentOutlineStack = new Stack<OutlineInfo>();
public void createPdf(String destPdf, String sourcePdf,
BufferedReader bufRead, int pattern) throws IOException,
DocumentException {
if (pattern != AddBookmarkConstants.RESERVED_OLD_OUTLINE
&& pattern != AddBookmarkConstants.RESERVED_NONE
&& pattern != AddBookmarkConstants.RESERVED_FIRST_OUTLINE)
return;
// Read in pdf file
PdfReader reader = new PdfReader(sourcePdf);
List<HashMap<String, Object>> outlines = new ArrayList<HashMap<String, Object>>();
if (pattern == AddBookmarkConstants.RESERVED_OLD_OUTLINE) {
outlines.addAll(SimpleBookmark.getBookmark(reader));
} else if (pattern == AddBookmarkConstants.RESERVED_FIRST_OUTLINE) {
addFirstOutlineReservedPdf(outlines, reader);
}
addBookmarks(bufRead, outlines, null, 0);
// Create a new stamper
PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(
destPdf));
stamper.setOutlines(outlines);
stamper.close();
}
private void addBookmarks(BufferedReader bufRead,
List<HashMap<String, Object>> outlines,
HashMap<String, Object> preOutline, int preLevel)
throws IOException {
String contentFormatLine = null;
bufRead.mark(1);
if ((contentFormatLine = bufRead.readLine()) != null) {
FormattedBookmark bookmark = parseFormattedText(contentFormatLine);
HashMap<String, Object> map = parseBookmarkToHashMap(bookmark);
int level = bookmark.getLevel();
// If n==m, then it is of the same layer. At this time, add it to the ArrayList and continue reading below
if (level == preLevel) {
outlines.add(map);
addBookmarks(bufRead, outlines, map, level);
}
// If n>m, then it is certain that the line is the child of the previous line, new a kids arraylist, and added to this arraylist
else if (level > preLevel) {
List<HashMap<String, Object>> kids = new ArrayList<HashMap<String, Object>>();
kids.add(map);
preOutline.put("Kids", kids);
// Record outline information about children
parentOutlineStack.push(new OutlineInfo(preOutline, outlines,
preLevel));
addBookmarks(bufRead, kids, map, level);
}
// If n<m, then it means that the child has been added and returned to the upper level, and bufRead will go back to the line
else if (level < preLevel) {
bufRead.reset();
OutlineInfo obj = parentOutlineStack.pop();
addBookmarks(bufRead, obj.getOutlines(), obj.getPreOutline(),
obj.getPreLevel());
}
}
}
private HashMap<String, Object> parseBookmarkToHashMap(
FormattedBookmark bookmark) {
HashMap<String, Object> map = new HashMap<String, Object>();
map.put("Title", bookmark.getTitle());
map.put("Action", "GoTo");
map.put("Page", bookmark.getPage() + "Fit");
return map;
}
private FormattedBookmark parseFormattedText(String contentFormatLine) {
FormattedBookmark bookmark = new FormattedBookmark();
String title = "";
String destPage = "";
// When there is no page number at the end of the string, it is usually the name of the book, if the format is correct.
int lastSpaceIndex = contentFormatLine.lastIndexOf(" ");
if (lastSpaceIndex == -1) {
title = contentFormatLine;
destPage = "1";
} else {
title = contentFormatLine.substring(0, lastSpaceIndex);
destPage = contentFormatLine.substring(lastSpaceIndex + 1);
}
String[] titleSplit = title.split(" ");
int dotCount = titleSplit[0].split("//.").length - 1;
bookmark.setLevel(dotCount);
bookmark.setPage(destPage);
bookmark.setTitle(title);
return bookmark;
}
private void addFirstOutlineReservedPdf(
List<HashMap<String, Object>> outlines, PdfReader reader) {
PdfDictionary catalog = reader.getCatalog();
PdfObject obj = PdfReader.getPdfObjectRelease(catalog
.get(PdfName.OUTLINES));
// No bookmark
if (obj == null || !obj.isDictionary())
return;
PdfDictionary outlinesDictionary = (PdfDictionary) obj;
// Get the first bookmark
PdfDictionary firstOutline = (PdfDictionary) PdfReader
.getPdfObjectRelease(outlinesDictionary.get(PdfName.FIRST));
PdfString titleObj = firstOutline.getAsString((PdfName.TITLE));
String title = titleObj.toUnicodeString();
PdfArray dest = firstOutline.getAsArray(PdfName.DEST);
if (dest == null) {
PdfDictionary action = (PdfDictionary) PdfReader
.getPdfObjectRelease(firstOutline.get(PdfName.A));
if (action != null) {
if (PdfName.GOTO.equals(PdfReader.getPdfObjectRelease(action
.get(PdfName.S)))) {
dest = (PdfArray) PdfReader.getPdfObjectRelease(action
.get(PdfName.D));
}
}
}
String destStr = parseDestString(dest, reader);
String[] decodeStr = destStr.split(" ");
int num = Integer.valueOf(decodeStr[0]);
HashMap<String, Object> map = new HashMap<String, Object>();
map.put("Title", title);
map.put("Action", "GoTo");
map.put("Page", num + "Fit");
outlines.add(map);
}
private String parseDestString(PdfArray dest, PdfReader reader) {
String destStr = "";
if (dest.isString()) {
destStr = dest.toString();
} else if (dest.isName()) {
destStr = PdfName.decodeName(dest.toString());
} else if (dest.isArray()) {
IntHashtable pages = new IntHashtable();
int numPages = reader.getNumberOfPages();
for (int k = 1; k <= numPages; ++k) {
pages.put(reader.getPageOrigRef(k).getNumber(), k);
reader.releasePage(k);
}
destStr = makeBookmarkParam((PdfArray) dest, pages);
}
return destStr;
}
private String makeBookmarkParam(PdfArray dest, IntHashtable pages) {
StringBuffer s = new StringBuffer();
PdfObject obj = dest.getPdfObject(0);
if (obj.isNumber()) {
s.append(((PdfNumber) obj).intValue() + 1);
} else {
s.append(pages.get(getNumber((PdfIndirectReference) obj)));
}
s.append(' ').append(dest.getPdfObject(1).toString().substring(1));
for (int k = 2; k < dest.size(); ++k) {
s.append(' ').append(dest.getPdfObject(k).toString());
}
return s.toString();
}
private int getNumber(PdfIndirectReference indirect) {
PdfDictionary pdfObj = (PdfDictionary) PdfReader
.getPdfObjectRelease(indirect);
if (pdfObj.contains(PdfName.TYPE)
&& pdfObj.get(PdfName.TYPE).equals(PdfName.PAGES)
&& pdfObj.contains(PdfName.KIDS)) {
PdfArray kids = (PdfArray) pdfObj.get(PdfName.KIDS);
indirect = (PdfIndirectReference) kids.getPdfObject(0);
}
return indirect.getNumber();
}
}