non crashing pdf font

This commit is contained in:
Benjamin Gamard 2018-03-10 10:44:40 +01:00
parent f7b84238df
commit 5cdbe9338b
4 changed files with 330 additions and 14 deletions

View File

@ -19,7 +19,7 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.DocsPDType1Font;
import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
@ -193,9 +193,9 @@ public class PdfUtil {
if (metadata) {
PDPage page = new PDPage();
doc.addPage(page);
try (PdfPage pdfPage = new PdfPage(doc, page, margin * mmPerInch, PDType1Font.HELVETICA, 12)) {
try (PdfPage pdfPage = new PdfPage(doc, page, margin * mmPerInch, DocsPDType1Font.HELVETICA, 12)) {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
pdfPage.addText(documentDto.getTitle(), true, PDType1Font.HELVETICA_BOLD, 16)
pdfPage.addText(documentDto.getTitle(), true, DocsPDType1Font.HELVETICA_BOLD, 16)
.newLine()
.addText("Created by " + documentDto.getCreator()
+ " on " + dateFormat.format(new Date(documentDto.getCreateTimestamp())), true)
@ -228,7 +228,7 @@ public class PdfUtil {
}
pdfPage.addText("Language: " + documentDto.getLanguage())
.newLine()
.addText("Files in this document : " + fileList.size(), false, PDType1Font.HELVETICA_BOLD, 12);
.addText("Files in this document : " + fileList.size(), false, DocsPDType1Font.HELVETICA_BOLD, 12);
}
}

View File

@ -28,7 +28,7 @@ public class PdfPage implements Closeable {
* @param margin Margin
* @param defaultFont Default font
* @param defaultFontSize Default fond size
* @throws IOException
* @throws IOException e
*/
public PdfPage(PDDocument pdDoc, PDPage pdPage, float margin, PDFont defaultFont, int defaultFontSize) throws IOException {
this.pdPage = pdPage;
@ -45,7 +45,7 @@ public class PdfPage implements Closeable {
* Write a text with default font.
*
* @param text Text
* @throws IOException
* @throws IOException e
*/
public PdfPage addText(String text) throws IOException {
drawText(pdPage.getMediaBox().getWidth() - 2 * margin, defaultFont, defaultFontSize, text, false);
@ -57,7 +57,7 @@ public class PdfPage implements Closeable {
*
* @param text Text
* @param centered If true, the text will be centered in the page
* @throws IOException
* @throws IOException e
*/
public PdfPage addText(String text, boolean centered) throws IOException {
drawText(pdPage.getMediaBox().getWidth() - 2 * margin, defaultFont, defaultFontSize, text, centered);
@ -71,7 +71,7 @@ public class PdfPage implements Closeable {
* @param centered If true, the text will be centered in the page
* @param font Font
* @param fontSize Font size
* @throws IOException
* @throws IOException e
*/
public PdfPage addText(String text, boolean centered, PDFont font, int fontSize) throws IOException {
drawText(pdPage.getMediaBox().getWidth() - 2 * margin, font, fontSize, text, centered);
@ -81,7 +81,7 @@ public class PdfPage implements Closeable {
/**
* Create a new line.
*
* @throws IOException
* @throws IOException e
*/
public PdfPage newLine() throws IOException {
pdContent.newLineAtOffset(0, - defaultFont.getFontDescriptor().getFontBoundingBox().getHeight() / 1000 * defaultFontSize);
@ -96,16 +96,13 @@ public class PdfPage implements Closeable {
* @param fontSize Font size
* @param text Text
* @param centered If true, the text will be centered in the paragraph
* @throws IOException
* @throws IOException e
*/
private void drawText(float paragraphWidth, PDFont font, int fontSize, String text, boolean centered) throws IOException {
if (text == null) {
return;
}
// Remove \r\n non breakable space
text = text.replaceAll("[\r\n]", "").replace("\u00A0", " ");
pdContent.setFont(font, fontSize);
int start = 0;
int end = 0;

View File

@ -0,0 +1,319 @@
package org.apache.pdfbox.pdmodel.font;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.EncodedFont;
import org.apache.fontbox.FontBoxFont;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.encoding.*;
import org.apache.pdfbox.util.Matrix;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import static org.apache.pdfbox.pdmodel.font.UniUtil.getUniNameOfCodePoint;
/**
* Safe non-crashing font even if no glyph are present.
* Will replace unknown glyphs by a space.
*
* @author bgamard
*/
public class DocsPDType1Font extends PDSimpleFont {
private static final Log LOG = LogFactory.getLog(DocsPDType1Font.class);
// alternative names for glyphs which are commonly encountered
private static final Map<String, String> ALT_NAMES = new HashMap<>();
static {
ALT_NAMES.put("ff", "f_f");
ALT_NAMES.put("ffi", "f_f_i");
ALT_NAMES.put("ffl", "f_f_l");
ALT_NAMES.put("fi", "f_i");
ALT_NAMES.put("fl", "f_l");
ALT_NAMES.put("st", "s_t");
ALT_NAMES.put("IJ", "I_J");
ALT_NAMES.put("ij", "i_j");
ALT_NAMES.put("ellipsis", "elipsis"); // misspelled in ArialMT
}
public static final DocsPDType1Font HELVETICA = new DocsPDType1Font("Helvetica");
public static final DocsPDType1Font HELVETICA_BOLD = new DocsPDType1Font("Helvetica-Bold");
/**
* embedded or system font for rendering.
*/
private final FontBoxFont genericFont;
private final boolean isEmbedded;
private final boolean isDamaged;
private Matrix fontMatrix;
private final AffineTransform fontMatrixTransform;
private BoundingBox fontBBox;
/**
* to improve encoding speed.
*/
private final Map<Integer, byte[]> codeToBytesMap;
/**
* Creates a Type 1 standard 14 font for embedding.
*
* @param baseFont One of the standard 14 PostScript names
*/
private DocsPDType1Font(String baseFont) {
super(baseFont);
dict.setItem(COSName.SUBTYPE, COSName.TYPE1);
dict.setName(COSName.BASE_FONT, baseFont);
if ("ZapfDingbats".equals(baseFont)) {
encoding = ZapfDingbatsEncoding.INSTANCE;
} else if ("Symbol".equals(baseFont)) {
encoding = SymbolEncoding.INSTANCE;
} else {
encoding = WinAnsiEncoding.INSTANCE;
dict.setItem(COSName.ENCODING, COSName.WIN_ANSI_ENCODING);
}
// standard 14 fonts may be accessed concurrently, as they are singletons
codeToBytesMap = new ConcurrentHashMap<>();
FontMapping<FontBoxFont> mapping = FontMappers.instance()
.getFontBoxFont(getBaseFont(),
getFontDescriptor());
genericFont = mapping.getFont();
if (mapping.isFallback()) {
String fontName;
try {
fontName = genericFont.getName();
} catch (IOException e) {
fontName = "?";
}
LOG.warn("Using fallback font " + fontName + " for base font " + getBaseFont());
}
isEmbedded = false;
isDamaged = false;
fontMatrixTransform = new AffineTransform();
}
/**
* Returns the PostScript name of the font.
*/
private String getBaseFont() {
return dict.getNameAsString(COSName.BASE_FONT);
}
@Override
public float getHeight(int code) throws IOException {
String name = codeToName(code);
if (getStandard14AFM() != null) {
String afmName = getEncoding().getName(code);
return getStandard14AFM().getCharacterHeight(afmName);
} else {
return (float) genericFont.getPath(name).getBounds().getHeight();
}
}
@Override
protected byte[] encode(int unicode) throws IOException {
byte[] bytes = codeToBytesMap.get(unicode);
if (bytes != null) {
return bytes;
}
String name = getGlyphList().codePointToName(unicode);
if (isStandard14()) {
// genericFont not needed, thus simplified code
// this is important on systems with no installed fonts
if (!encoding.contains(name)) {
return " ".getBytes();
}
if (".notdef".equals(name)) {
return " ".getBytes();
}
} else {
if (!encoding.contains(name)) {
return " ".getBytes();
}
String nameInFont = getNameInFont(name);
if (nameInFont.equals(".notdef") || !genericFont.hasGlyph(nameInFont)) {
return " ".getBytes();
}
}
Map<String, Integer> inverted = encoding.getNameToCodeMap();
int code = inverted.get(name);
bytes = new byte[]{(byte) code};
codeToBytesMap.put(code, bytes);
return bytes;
}
@Override
public float getWidthFromFont(int code) throws IOException {
String name = codeToName(code);
// width of .notdef is ignored for substitutes, see PDFBOX-1900
if (!isEmbedded && ".notdef".equals(name)) {
return 250;
}
float width = genericFont.getWidth(name);
Point2D p = new Point2D.Float(width, 0);
fontMatrixTransform.transform(p, p);
return (float) p.getX();
}
@Override
public boolean isEmbedded() {
return isEmbedded;
}
@Override
public float getAverageFontWidth() {
if (getStandard14AFM() != null) {
return getStandard14AFM().getAverageCharacterWidth();
} else {
return super.getAverageFontWidth();
}
}
@Override
public int readCode(InputStream in) throws IOException {
return in.read();
}
@Override
protected Encoding readEncodingFromFont() throws IOException {
if (!isEmbedded() && getStandard14AFM() != null) {
// read from AFM
return new Type1Encoding(getStandard14AFM());
} else {
// extract from Type1 font/substitute
if (genericFont instanceof EncodedFont) {
return Type1Encoding.fromFontBox(((EncodedFont) genericFont).getEncoding());
} else {
// default (only happens with TTFs)
return StandardEncoding.INSTANCE;
}
}
}
@Override
public FontBoxFont getFontBoxFont() {
return genericFont;
}
@Override
public String getName() {
return getBaseFont();
}
@Override
public BoundingBox getBoundingBox() throws IOException {
if (fontBBox == null) {
fontBBox = generateBoundingBox();
}
return fontBBox;
}
private BoundingBox generateBoundingBox() throws IOException {
if (getFontDescriptor() != null) {
PDRectangle bbox = getFontDescriptor().getFontBoundingBox();
if (bbox != null &&
(bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 ||
bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0)) {
return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(),
bbox.getUpperRightX(), bbox.getUpperRightY());
}
}
return genericFont.getFontBBox();
}
private String codeToName(int code) throws IOException {
String name = getEncoding().getName(code);
return getNameInFont(name);
}
/**
* Maps a PostScript glyph name to the name in the underlying font, for example when
* using a TTF font we might map "W" to "uni0057".
*/
private String getNameInFont(String name) throws IOException {
if (isEmbedded() || genericFont.hasGlyph(name)) {
return name;
} else {
// try alternative name
String altName = ALT_NAMES.get(name);
if (altName != null && !name.equals(".notdef") && genericFont.hasGlyph(altName)) {
return altName;
} else {
// try unicode name
String unicodes = getGlyphList().toUnicode(name);
if (unicodes != null && unicodes.length() == 1) {
String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0));
if (genericFont.hasGlyph(uniName)) {
return uniName;
}
}
}
}
return ".notdef";
}
@Override
public GeneralPath getPath(String name) throws IOException {
// Acrobat does not draw .notdef for Type 1 fonts, see PDFBOX-2421
// I suspect that it does do this for embedded fonts though, but this is untested
if (name.equals(".notdef") && !isEmbedded) {
return new GeneralPath();
} else {
return genericFont.getPath(getNameInFont(name));
}
}
@Override
public boolean hasGlyph(String name) throws IOException {
return genericFont.hasGlyph(getNameInFont(name));
}
@Override
public final Matrix getFontMatrix() {
if (fontMatrix == null) {
// PDF specified that Type 1 fonts use a 1000upem matrix, but some fonts specify
// their own custom matrix anyway, for example PDFBOX-2298
List<Number> numbers = null;
try {
numbers = genericFont.getFontMatrix();
} catch (IOException e) {
fontMatrix = DEFAULT_FONT_MATRIX;
}
if (numbers != null && numbers.size() == 6) {
fontMatrix = new Matrix(
numbers.get(0).floatValue(), numbers.get(1).floatValue(),
numbers.get(2).floatValue(), numbers.get(3).floatValue(),
numbers.get(4).floatValue(), numbers.get(5).floatValue());
} else {
return super.getFontMatrix();
}
}
return fontMatrix;
}
@Override
public boolean isDamaged() {
return isDamaged;
}
}

View File

@ -263,7 +263,7 @@ public class TestDocumentResource extends BaseJerseyTest {
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document1Token)
.post(Entity.form(new Form()
.param("title", "My new super document 1")
.param("description", "My new super description for document\r\n\u00A0 1")
.param("description", "My new super description for document\r\n\u00A0\u0009 1")
.param("subject", "My new subject for document 1")
.param("identifier", "My new identifier for document 1")
.param("publisher", "My new publisher for document 1")