From 5cdbe9338b23dbdb3ad4b7a015c153f75dd43f82 Mon Sep 17 00:00:00 2001 From: Benjamin Gamard Date: Sat, 10 Mar 2018 10:44:40 +0100 Subject: [PATCH] non crashing pdf font --- .../com/sismics/docs/core/util/PdfUtil.java | 8 +- .../sismics/docs/core/util/pdf/PdfPage.java | 15 +- .../pdfbox/pdmodel/font/DocsPDType1Font.java | 319 ++++++++++++++++++ .../docs/rest/TestDocumentResource.java | 2 +- 4 files changed, 330 insertions(+), 14 deletions(-) create mode 100644 docs-core/src/main/java/org/apache/pdfbox/pdmodel/font/DocsPDType1Font.java diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/PdfUtil.java b/docs-core/src/main/java/com/sismics/docs/core/util/PdfUtil.java index 1f3a79e6..445de127 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/PdfUtil.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/PdfUtil.java @@ -19,7 +19,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.DocsPDType1Font; import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; @@ -193,9 +193,9 @@ public class PdfUtil { if (metadata) { PDPage page = new PDPage(); doc.addPage(page); - try (PdfPage pdfPage = new PdfPage(doc, page, margin * mmPerInch, PDType1Font.HELVETICA, 12)) { + try (PdfPage pdfPage = new PdfPage(doc, page, margin * mmPerInch, DocsPDType1Font.HELVETICA, 12)) { SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - pdfPage.addText(documentDto.getTitle(), true, PDType1Font.HELVETICA_BOLD, 16) + pdfPage.addText(documentDto.getTitle(), true, DocsPDType1Font.HELVETICA_BOLD, 16) .newLine() .addText("Created by " + documentDto.getCreator() + " on " + dateFormat.format(new Date(documentDto.getCreateTimestamp())), true) @@ -228,7 +228,7 @@ public class PdfUtil { } pdfPage.addText("Language: " + documentDto.getLanguage()) .newLine() - .addText("Files in this document : " + fileList.size(), false, PDType1Font.HELVETICA_BOLD, 12); + .addText("Files in this document : " + fileList.size(), false, DocsPDType1Font.HELVETICA_BOLD, 12); } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/pdf/PdfPage.java b/docs-core/src/main/java/com/sismics/docs/core/util/pdf/PdfPage.java index cb651cc3..b5098bc3 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/pdf/PdfPage.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/pdf/PdfPage.java @@ -28,7 +28,7 @@ public class PdfPage implements Closeable { * @param margin Margin * @param defaultFont Default font * @param defaultFontSize Default fond size - * @throws IOException + * @throws IOException e */ public PdfPage(PDDocument pdDoc, PDPage pdPage, float margin, PDFont defaultFont, int defaultFontSize) throws IOException { this.pdPage = pdPage; @@ -45,7 +45,7 @@ public class PdfPage implements Closeable { * Write a text with default font. * * @param text Text - * @throws IOException + * @throws IOException e */ public PdfPage addText(String text) throws IOException { drawText(pdPage.getMediaBox().getWidth() - 2 * margin, defaultFont, defaultFontSize, text, false); @@ -57,7 +57,7 @@ public class PdfPage implements Closeable { * * @param text Text * @param centered If true, the text will be centered in the page - * @throws IOException + * @throws IOException e */ public PdfPage addText(String text, boolean centered) throws IOException { drawText(pdPage.getMediaBox().getWidth() - 2 * margin, defaultFont, defaultFontSize, text, centered); @@ -71,7 +71,7 @@ public class PdfPage implements Closeable { * @param centered If true, the text will be centered in the page * @param font Font * @param fontSize Font size - * @throws IOException + * @throws IOException e */ public PdfPage addText(String text, boolean centered, PDFont font, int fontSize) throws IOException { drawText(pdPage.getMediaBox().getWidth() - 2 * margin, font, fontSize, text, centered); @@ -81,7 +81,7 @@ public class PdfPage implements Closeable { /** * Create a new line. * - * @throws IOException + * @throws IOException e */ public PdfPage newLine() throws IOException { pdContent.newLineAtOffset(0, - defaultFont.getFontDescriptor().getFontBoundingBox().getHeight() / 1000 * defaultFontSize); @@ -96,16 +96,13 @@ public class PdfPage implements Closeable { * @param fontSize Font size * @param text Text * @param centered If true, the text will be centered in the paragraph - * @throws IOException + * @throws IOException e */ private void drawText(float paragraphWidth, PDFont font, int fontSize, String text, boolean centered) throws IOException { if (text == null) { return; } - // Remove \r\n non breakable space - text = text.replaceAll("[\r\n]", "").replace("\u00A0", " "); - pdContent.setFont(font, fontSize); int start = 0; int end = 0; diff --git a/docs-core/src/main/java/org/apache/pdfbox/pdmodel/font/DocsPDType1Font.java b/docs-core/src/main/java/org/apache/pdfbox/pdmodel/font/DocsPDType1Font.java new file mode 100644 index 00000000..fa1c09c9 --- /dev/null +++ b/docs-core/src/main/java/org/apache/pdfbox/pdmodel/font/DocsPDType1Font.java @@ -0,0 +1,319 @@ +package org.apache.pdfbox.pdmodel.font; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.EncodedFont; +import org.apache.fontbox.FontBoxFont; +import org.apache.fontbox.util.BoundingBox; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.encoding.*; +import org.apache.pdfbox.util.Matrix; + +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.awt.geom.Point2D; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import static org.apache.pdfbox.pdmodel.font.UniUtil.getUniNameOfCodePoint; + +/** + * Safe non-crashing font even if no glyph are present. + * Will replace unknown glyphs by a space. + * + * @author bgamard + */ +public class DocsPDType1Font extends PDSimpleFont { + private static final Log LOG = LogFactory.getLog(DocsPDType1Font.class); + + // alternative names for glyphs which are commonly encountered + private static final Map ALT_NAMES = new HashMap<>(); + + static { + ALT_NAMES.put("ff", "f_f"); + ALT_NAMES.put("ffi", "f_f_i"); + ALT_NAMES.put("ffl", "f_f_l"); + ALT_NAMES.put("fi", "f_i"); + ALT_NAMES.put("fl", "f_l"); + ALT_NAMES.put("st", "s_t"); + ALT_NAMES.put("IJ", "I_J"); + ALT_NAMES.put("ij", "i_j"); + ALT_NAMES.put("ellipsis", "elipsis"); // misspelled in ArialMT + } + + public static final DocsPDType1Font HELVETICA = new DocsPDType1Font("Helvetica"); + public static final DocsPDType1Font HELVETICA_BOLD = new DocsPDType1Font("Helvetica-Bold"); + + /** + * embedded or system font for rendering. + */ + private final FontBoxFont genericFont; + + private final boolean isEmbedded; + private final boolean isDamaged; + private Matrix fontMatrix; + private final AffineTransform fontMatrixTransform; + private BoundingBox fontBBox; + + /** + * to improve encoding speed. + */ + private final Map codeToBytesMap; + + /** + * Creates a Type 1 standard 14 font for embedding. + * + * @param baseFont One of the standard 14 PostScript names + */ + private DocsPDType1Font(String baseFont) { + super(baseFont); + + dict.setItem(COSName.SUBTYPE, COSName.TYPE1); + dict.setName(COSName.BASE_FONT, baseFont); + if ("ZapfDingbats".equals(baseFont)) { + encoding = ZapfDingbatsEncoding.INSTANCE; + } else if ("Symbol".equals(baseFont)) { + encoding = SymbolEncoding.INSTANCE; + } else { + encoding = WinAnsiEncoding.INSTANCE; + dict.setItem(COSName.ENCODING, COSName.WIN_ANSI_ENCODING); + } + + // standard 14 fonts may be accessed concurrently, as they are singletons + codeToBytesMap = new ConcurrentHashMap<>(); + + FontMapping mapping = FontMappers.instance() + .getFontBoxFont(getBaseFont(), + getFontDescriptor()); + genericFont = mapping.getFont(); + + if (mapping.isFallback()) { + String fontName; + try { + fontName = genericFont.getName(); + } catch (IOException e) { + fontName = "?"; + } + LOG.warn("Using fallback font " + fontName + " for base font " + getBaseFont()); + } + isEmbedded = false; + isDamaged = false; + fontMatrixTransform = new AffineTransform(); + } + + /** + * Returns the PostScript name of the font. + */ + private String getBaseFont() { + return dict.getNameAsString(COSName.BASE_FONT); + } + + @Override + public float getHeight(int code) throws IOException { + String name = codeToName(code); + if (getStandard14AFM() != null) { + String afmName = getEncoding().getName(code); + return getStandard14AFM().getCharacterHeight(afmName); + } else { + return (float) genericFont.getPath(name).getBounds().getHeight(); + } + } + + @Override + protected byte[] encode(int unicode) throws IOException { + byte[] bytes = codeToBytesMap.get(unicode); + if (bytes != null) { + return bytes; + } + + String name = getGlyphList().codePointToName(unicode); + if (isStandard14()) { + // genericFont not needed, thus simplified code + // this is important on systems with no installed fonts + if (!encoding.contains(name)) { + return " ".getBytes(); + } + if (".notdef".equals(name)) { + return " ".getBytes(); + } + } else { + if (!encoding.contains(name)) { + return " ".getBytes(); + } + + String nameInFont = getNameInFont(name); + + if (nameInFont.equals(".notdef") || !genericFont.hasGlyph(nameInFont)) { + return " ".getBytes(); + } + } + + Map inverted = encoding.getNameToCodeMap(); + int code = inverted.get(name); + bytes = new byte[]{(byte) code}; + codeToBytesMap.put(code, bytes); + return bytes; + } + + @Override + public float getWidthFromFont(int code) throws IOException { + String name = codeToName(code); + + // width of .notdef is ignored for substitutes, see PDFBOX-1900 + if (!isEmbedded && ".notdef".equals(name)) { + return 250; + } + float width = genericFont.getWidth(name); + + Point2D p = new Point2D.Float(width, 0); + fontMatrixTransform.transform(p, p); + return (float) p.getX(); + } + + @Override + public boolean isEmbedded() { + return isEmbedded; + } + + @Override + public float getAverageFontWidth() { + if (getStandard14AFM() != null) { + return getStandard14AFM().getAverageCharacterWidth(); + } else { + return super.getAverageFontWidth(); + } + } + + @Override + public int readCode(InputStream in) throws IOException { + return in.read(); + } + + @Override + protected Encoding readEncodingFromFont() throws IOException { + if (!isEmbedded() && getStandard14AFM() != null) { + // read from AFM + return new Type1Encoding(getStandard14AFM()); + } else { + // extract from Type1 font/substitute + if (genericFont instanceof EncodedFont) { + return Type1Encoding.fromFontBox(((EncodedFont) genericFont).getEncoding()); + } else { + // default (only happens with TTFs) + return StandardEncoding.INSTANCE; + } + } + } + + @Override + public FontBoxFont getFontBoxFont() { + return genericFont; + } + + @Override + public String getName() { + return getBaseFont(); + } + + @Override + public BoundingBox getBoundingBox() throws IOException { + if (fontBBox == null) { + fontBBox = generateBoundingBox(); + } + return fontBBox; + } + + private BoundingBox generateBoundingBox() throws IOException { + if (getFontDescriptor() != null) { + PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); + if (bbox != null && + (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || + bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0)) { + return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), + bbox.getUpperRightX(), bbox.getUpperRightY()); + } + } + return genericFont.getFontBBox(); + } + + private String codeToName(int code) throws IOException { + String name = getEncoding().getName(code); + return getNameInFont(name); + } + + /** + * Maps a PostScript glyph name to the name in the underlying font, for example when + * using a TTF font we might map "W" to "uni0057". + */ + private String getNameInFont(String name) throws IOException { + if (isEmbedded() || genericFont.hasGlyph(name)) { + return name; + } else { + // try alternative name + String altName = ALT_NAMES.get(name); + if (altName != null && !name.equals(".notdef") && genericFont.hasGlyph(altName)) { + return altName; + } else { + // try unicode name + String unicodes = getGlyphList().toUnicode(name); + if (unicodes != null && unicodes.length() == 1) { + String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0)); + if (genericFont.hasGlyph(uniName)) { + return uniName; + } + } + } + } + return ".notdef"; + } + + @Override + public GeneralPath getPath(String name) throws IOException { + // Acrobat does not draw .notdef for Type 1 fonts, see PDFBOX-2421 + // I suspect that it does do this for embedded fonts though, but this is untested + if (name.equals(".notdef") && !isEmbedded) { + return new GeneralPath(); + } else { + return genericFont.getPath(getNameInFont(name)); + } + } + + @Override + public boolean hasGlyph(String name) throws IOException { + return genericFont.hasGlyph(getNameInFont(name)); + } + + @Override + public final Matrix getFontMatrix() { + if (fontMatrix == null) { + // PDF specified that Type 1 fonts use a 1000upem matrix, but some fonts specify + // their own custom matrix anyway, for example PDFBOX-2298 + List numbers = null; + try { + numbers = genericFont.getFontMatrix(); + } catch (IOException e) { + fontMatrix = DEFAULT_FONT_MATRIX; + } + + if (numbers != null && numbers.size() == 6) { + fontMatrix = new Matrix( + numbers.get(0).floatValue(), numbers.get(1).floatValue(), + numbers.get(2).floatValue(), numbers.get(3).floatValue(), + numbers.get(4).floatValue(), numbers.get(5).floatValue()); + } else { + return super.getFontMatrix(); + } + } + return fontMatrix; + } + + @Override + public boolean isDamaged() { + return isDamaged; + } +} diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java index 3ce2ceac..071051ad 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java @@ -263,7 +263,7 @@ public class TestDocumentResource extends BaseJerseyTest { .cookie(TokenBasedSecurityFilter.COOKIE_NAME, document1Token) .post(Entity.form(new Form() .param("title", "My new super document 1") - .param("description", "My new super description for document\r\n\u00A0 1") + .param("description", "My new super description for document\r\n\u00A0\u0009 1") .param("subject", "My new subject for document 1") .param("identifier", "My new identifier for document 1") .param("publisher", "My new publisher for document 1")