1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.imaging.formats.jpeg.iptc;
19
20 import java.io.ByteArrayInputStream;
21 import java.io.ByteArrayOutputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.nio.ByteOrder;
25 import java.nio.charset.Charset;
26 import java.nio.charset.StandardCharsets;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Comparator;
30 import java.util.List;
31 import java.util.Objects;
32 import java.util.logging.Level;
33 import java.util.logging.Logger;
34
35 import org.apache.commons.imaging.ImagingConstants;
36 import org.apache.commons.imaging.ImagingException;
37 import org.apache.commons.imaging.ImagingParameters;
38 import org.apache.commons.imaging.common.AbstractBinaryOutputStream;
39 import org.apache.commons.imaging.common.Allocator;
40 import org.apache.commons.imaging.common.BinaryFileParser;
41 import org.apache.commons.imaging.common.BinaryFunctions;
42 import org.apache.commons.imaging.common.ByteConversions;
43 import org.apache.commons.imaging.formats.jpeg.JpegConstants;
44 import org.apache.commons.imaging.formats.jpeg.JpegImagingParameters;
45 import org.apache.commons.imaging.internal.Debug;
46
47 public class IptcParser extends BinaryFileParser {
48
49 private static final Logger LOGGER = Logger.getLogger(IptcParser.class.getName());
50
51 private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
52
53
54
55
56
57
58
59
60 private static final List<Integer> PHOTOSHOP_IGNORED_BLOCK_TYPE = Arrays.asList(1084, 1085, 1086, 1087);
61
62 private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
63 private static final int ENV_TAG_CODED_CHARACTER_SET = 90;
64 private static final byte[] CHARACTER_ESCAPE_SEQUENCE = { '\u001B', '%', 'G' };
65
66
67
68
69 public IptcParser() {
70
71 }
72
73 private Charset findCharset(final byte[] codedCharset) {
74 final String codedCharsetString = new String(codedCharset, StandardCharsets.ISO_8859_1);
75 try {
76 if (Charset.isSupported(codedCharsetString)) {
77 return Charset.forName(codedCharsetString);
78 }
79 } catch (final IllegalArgumentException ignored) {
80
81 }
82
83
84 final byte[] codedCharsetNormalized = Allocator.byteArray(codedCharset.length);
85 int j = 0;
86 for (final byte element : codedCharset) {
87 if (element != ' ') {
88 codedCharsetNormalized[j++] = element;
89 }
90 }
91
92 if (Objects.deepEquals(codedCharsetNormalized, CHARACTER_ESCAPE_SEQUENCE)) {
93 return StandardCharsets.UTF_8;
94 }
95 return DEFAULT_CHARSET;
96 }
97
98 public boolean isPhotoshopJpegSegment(final byte[] segmentData) {
99 if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.isStartOf(segmentData)) {
100 return false;
101 }
102
103 final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size();
104 return index + 4 <= segmentData.length && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM;
105 }
106
107 protected List<IptcBlock> parseAllBlocks(final byte[] bytes, final boolean strict) throws ImagingException, IOException {
108 final List<IptcBlock> blocks = new ArrayList<>();
109
110 try (InputStream bis = new ByteArrayInputStream(bytes)) {
111
112
113
114
115 final byte[] idString = BinaryFunctions.readBytes("", bis, JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(),
116 "App13 Segment missing identification string");
117 if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) {
118 throw new ImagingException("Not a Photoshop App13 Segment");
119 }
120
121
122
123 while (true) {
124 final int imageResourceBlockSignature;
125 try {
126 imageResourceBlockSignature = BinaryFunctions.read4Bytes("", bis, "Image Resource Block missing identification string", APP13_BYTE_ORDER);
127 } catch (final IOException ioEx) {
128 break;
129 }
130 if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) {
131 throw new ImagingException("Invalid Image Resource Block Signature");
132 }
133
134 final int blockType = BinaryFunctions.read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER);
135 Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
136
137
138 if (PHOTOSHOP_IGNORED_BLOCK_TYPE.contains(blockType)) {
139 Debug.debug("Skipping blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
140
141
142
143 BinaryFunctions.searchQuad(JpegConstants.CONST_8BIM, bis);
144 continue;
145 }
146
147 final int blockNameLength = BinaryFunctions.readByte("Name length", bis, "Image Resource Block missing name length");
148 if (blockNameLength > 0) {
149 Debug.debug("blockNameLength: " + blockNameLength + " (0x" + Integer.toHexString(blockNameLength) + ")");
150 }
151 final byte[] blockNameBytes;
152 if (blockNameLength == 0) {
153 BinaryFunctions.readByte("Block name bytes", bis, "Image Resource Block has invalid name");
154 blockNameBytes = ImagingConstants.EMPTY_BYTE_ARRAY;
155 } else {
156 try {
157 blockNameBytes = BinaryFunctions.readBytes("", bis, blockNameLength, "Invalid Image Resource Block name");
158 } catch (final IOException ioEx) {
159 if (strict) {
160 throw ioEx;
161 }
162 break;
163 }
164
165 if (blockNameLength % 2 == 0) {
166 BinaryFunctions.readByte("Padding byte", bis, "Image Resource Block missing padding byte");
167 }
168 }
169
170 final int blockSize = BinaryFunctions.read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER);
171 Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")");
172
173
174
175
176 if (blockSize > bytes.length) {
177 throw new ImagingException("Invalid Block Size : " + blockSize + " > " + bytes.length);
178 }
179
180 final byte[] blockData;
181 try {
182 blockData = BinaryFunctions.readBytes("", bis, blockSize, "Invalid Image Resource Block data");
183 } catch (final IOException ioEx) {
184 if (strict) {
185 throw ioEx;
186 }
187 break;
188 }
189
190 blocks.add(new IptcBlock(blockType, blockNameBytes, blockData));
191
192 if (blockSize % 2 != 0) {
193 BinaryFunctions.readByte("Padding byte", bis, "Image Resource Block missing padding byte");
194 }
195 }
196
197 return blocks;
198 }
199 }
200
201 protected List<IptcRecord> parseIptcBlock(final byte[] bytes) {
202 Charset charset = DEFAULT_CHARSET;
203 final List<IptcRecord> elements = new ArrayList<>();
204
205 int index = 0;
206
207 while (index + 1 < bytes.length) {
208 final int tagMarker = 0xff & bytes[index++];
209 Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")");
210
211 if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) {
212 if (LOGGER.isLoggable(Level.FINE)) {
213 LOGGER.fine("Unexpected record tag marker in IPTC data.");
214 }
215 return elements;
216 }
217
218 final int recordNumber = 0xff & bytes[index++];
219 Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")");
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 final int recordType = 0xff & bytes[index];
240 Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")");
241 index++;
242
243 final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder());
244 index += 2;
245
246 final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE;
247 final int dataFieldCountLength = recordSize & 0x7fff;
248 if (extendedDataset) {
249 Debug.debug("extendedDataset. dataFieldCountLength: " + dataFieldCountLength);
250 }
251 if (extendedDataset) {
252
253 return elements;
254 }
255
256 final byte[] recordData = BinaryFunctions.copyOfRange(bytes, index, recordSize);
257 index += recordSize;
258
259
260
261
262 if (recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && recordType == ENV_TAG_CODED_CHARACTER_SET) {
263 charset = findCharset(recordData);
264 continue;
265 }
266
267 if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
268 continue;
269 }
270
271 if (recordType == 0) {
272 if (LOGGER.isLoggable(Level.FINE)) {
273 LOGGER.fine("ignore record version record! " + elements.size());
274 }
275
276 continue;
277 }
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302 final String value = new String(recordData, charset);
303
304 final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
305
306
307
308
309
310
311
312
313
314
315
316
317 final IptcRecord element = new IptcRecord(iptcType, value);
318 elements.add(element);
319 }
320
321 return elements;
322 }
323
324 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final boolean strict) throws ImagingException, IOException {
325 final List<IptcRecord> records = new ArrayList<>();
326
327 final List<IptcBlock> blocks = parseAllBlocks(bytes, strict);
328
329 for (final IptcBlock block : blocks) {
330
331 if (!block.isIptcBlock()) {
332 continue;
333 }
334
335 records.addAll(parseIptcBlock(block.getBlockData()));
336 }
337
338 return new PhotoshopApp13Data(records, blocks);
339 }
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final ImagingParameters<JpegImagingParameters> params)
371 throws ImagingException, IOException {
372 final boolean strict = params != null && params.isStrict();
373
374 return parsePhotoshopSegment(bytes, strict);
375 }
376
377 public byte[] writeIptcBlock(final List<IptcRecord> elements) throws ImagingException, IOException {
378 return writeIptcBlock(elements, false);
379 }
380
381 public byte[] writeIptcBlock(List<IptcRecord> elements, final boolean forceUtf8Encoding) throws ImagingException, IOException {
382 Charset charset;
383 if (forceUtf8Encoding) {
384
385 charset = StandardCharsets.UTF_8;
386 } else {
387
388
389 charset = DEFAULT_CHARSET;
390 for (final IptcRecord element : elements) {
391 final byte[] recordData = element.getValue().getBytes(charset);
392 if (!new String(recordData, charset).equals(element.getValue())) {
393 charset = StandardCharsets.UTF_8;
394 break;
395 }
396 }
397 }
398 final ByteArrayOutputStream baos = new ByteArrayOutputStream();
399 try (AbstractBinaryOutputStream bos = AbstractBinaryOutputStream.create(baos, getByteOrder())) {
400 if (!charset.equals(DEFAULT_CHARSET)) {
401 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
402 bos.write(IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER);
403 bos.write(ENV_TAG_CODED_CHARACTER_SET);
404 final byte[] codedCharset = CHARACTER_ESCAPE_SEQUENCE;
405 bos.write2Bytes(codedCharset.length);
406 bos.write(codedCharset);
407 }
408
409
410 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
411 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
412 bos.write(IptcTypes.RECORD_VERSION.type);
413
414 bos.write2Bytes(2);
415 bos.write2Bytes(2);
416
417
418 elements = new ArrayList<>(elements);
419
420
421 final Comparator<IptcRecord> comparator = (e1, e2) -> e2.iptcType.getType() - e1.iptcType.getType();
422 elements.sort(comparator);
423
424
425
426 for (final IptcRecord element : elements) {
427 if (element.iptcType == IptcTypes.RECORD_VERSION) {
428 continue;
429 }
430
431 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
432 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
433 if (element.iptcType.getType() < 0 || element.iptcType.getType() > 0xff) {
434 throw new ImagingException("Invalid record type: " + element.iptcType.getType());
435 }
436 bos.write(element.iptcType.getType());
437
438 final byte[] recordData = element.getValue().getBytes(charset);
439
440
441
442
443
444 bos.write2Bytes(recordData.length);
445 bos.write(recordData);
446 }
447 }
448
449 return baos.toByteArray();
450 }
451
452 public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data) throws IOException, ImagingException {
453 try (ByteArrayOutputStream os = new ByteArrayOutputStream();
454 AbstractBinaryOutputStream bos = AbstractBinaryOutputStream.bigEndian(os)) {
455
456 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos);
457
458 final List<IptcBlock> blocks = data.getRawBlocks();
459 for (final IptcBlock block : blocks) {
460 bos.write4Bytes(JpegConstants.CONST_8BIM);
461
462 if (block.getBlockType() < 0 || block.getBlockType() > 0xffff) {
463 throw new ImagingException("Invalid IPTC block type.");
464 }
465 bos.write2Bytes(block.getBlockType());
466
467 final byte[] blockNameBytes = block.getBlockNameBytes();
468 if (blockNameBytes.length > 255) {
469 throw new ImagingException("IPTC block name is too long: " + blockNameBytes.length);
470 }
471 bos.write(blockNameBytes.length);
472 bos.write(blockNameBytes);
473 if (blockNameBytes.length % 2 == 0) {
474 bos.write(0);
475 }
476
477 final byte[] blockData = block.getBlockData();
478 if (blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) {
479 throw new ImagingException("IPTC block data is too long: " + blockData.length);
480 }
481 bos.write4Bytes(blockData.length);
482 bos.write(blockData);
483 if (blockData.length % 2 == 1) {
484 bos.write(0);
485 }
486 }
487
488 bos.flush();
489 return os.toByteArray();
490 }
491 }
492
493 }