1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3;
18
19 import java.io.IOException;
20 import java.io.Writer;
21
22 import org.apache.commons.lang3.text.translate.AggregateTranslator;
23 import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
24 import org.apache.commons.lang3.text.translate.EntityArrays;
25 import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
26 import org.apache.commons.lang3.text.translate.LookupTranslator;
27 import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
28 import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
29 import org.apache.commons.lang3.text.translate.OctalUnescaper;
30 import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
31 import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
32
33 /**
34 * Escapes and unescapes {@link String}s for
35 * Java, Java Script, HTML and XML.
36 *
37 * <p>#ThreadSafe#</p>
38 *
39 * @since 2.0
40 * @deprecated As of 3.6, use Apache Commons Text
41 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
42 * StringEscapeUtils</a> instead.
43 */
44 @Deprecated
45 public class StringEscapeUtils {
46
47 /* ESCAPE TRANSLATORS */
48
49 private static final class CsvEscaper extends CharSequenceTranslator {
50
51 private static final char CSV_DELIMITER = ',';
52 private static final char CSV_QUOTE = '"';
53 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
54 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
55
56 @Override
57 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
58 if (index != 0) {
59 throw new IllegalStateException("CsvEscaper should never reach the [1] index");
60 }
61 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
62 out.write(input.toString());
63 } else {
64 out.write(CSV_QUOTE);
65 out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
66 out.write(CSV_QUOTE);
67 }
68 return Character.codePointCount(input, 0, input.length());
69 }
70 }
71
72 private static final class CsvUnescaper extends CharSequenceTranslator {
73
74 private static final char CSV_DELIMITER = ',';
75 private static final char CSV_QUOTE = '"';
76 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
77 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
78
79 @Override
80 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
81 if (index != 0) {
82 throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
83 }
84 if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
85 out.write(input.toString());
86 return Character.codePointCount(input, 0, input.length());
87 }
88 // strip quotes
89 final String quoteless = input.subSequence(1, input.length() - 1).toString();
90 if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
91 // deal with escaped quotes; ie) ""
92 out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
93 } else {
94 out.write(input.toString());
95 }
96 return Character.codePointCount(input, 0, input.length());
97 }
98 }
99
100 /**
101 * Translator object for escaping Java.
102 *
103 * While {@link #escapeJava(String)} is the expected method of use, this
104 * object allows the Java escaping functionality to be used
105 * as the foundation for a custom translator.
106 *
107 * @since 3.0
108 */
109 public static final CharSequenceTranslator ESCAPE_JAVA =
110 new LookupTranslator(
111 new String[][] {
112 {"\"", "\\\""},
113 {"\\", "\\\\"},
114 }).with(
115 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
116 ).with(
117 JavaUnicodeEscaper.outsideOf(32, 0x7f)
118 );
119
120 /**
121 * Translator object for escaping EcmaScript/JavaScript.
122 *
123 * While {@link #escapeEcmaScript(String)} is the expected method of use, this
124 * object allows the EcmaScript escaping functionality to be used
125 * as the foundation for a custom translator.
126 *
127 * @since 3.0
128 */
129 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
130 new AggregateTranslator(
131 new LookupTranslator(
132 new String[][] {
133 {"'", "\\'"},
134 {"\"", "\\\""},
135 {"\\", "\\\\"},
136 {"/", "\\/"}
137 }),
138 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
139 JavaUnicodeEscaper.outsideOf(32, 0x7f)
140 );
141
142 /**
143 * Translator object for escaping Json.
144 *
145 * While {@link #escapeJson(String)} is the expected method of use, this
146 * object allows the Json escaping functionality to be used
147 * as the foundation for a custom translator.
148 *
149 * @since 3.2
150 */
151 public static final CharSequenceTranslator ESCAPE_JSON =
152 new AggregateTranslator(
153 new LookupTranslator(
154 new String[][] {
155 {"\"", "\\\""},
156 {"\\", "\\\\"},
157 {"/", "\\/"}
158 }),
159 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
160 JavaUnicodeEscaper.outsideOf(32, 0x7f)
161 );
162
163 /**
164 * Translator object for escaping XML.
165 *
166 * While {@link #escapeXml(String)} is the expected method of use, this
167 * object allows the XML escaping functionality to be used
168 * as the foundation for a custom translator.
169 *
170 * @since 3.0
171 * @deprecated Use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
172 */
173 @Deprecated
174 public static final CharSequenceTranslator ESCAPE_XML =
175 new AggregateTranslator(
176 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
177 new LookupTranslator(EntityArrays.APOS_ESCAPE())
178 );
179
180 /**
181 * Translator object for escaping XML 1.0.
182 *
183 * While {@link #escapeXml10(String)} is the expected method of use, this
184 * object allows the XML escaping functionality to be used
185 * as the foundation for a custom translator.
186 *
187 * @since 3.3
188 */
189 public static final CharSequenceTranslator ESCAPE_XML10 =
190 new AggregateTranslator(
191 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
192 new LookupTranslator(EntityArrays.APOS_ESCAPE()),
193 new LookupTranslator(
194 new String[][] {
195 { "\u0000", StringUtils.EMPTY },
196 { "\u0001", StringUtils.EMPTY },
197 { "\u0002", StringUtils.EMPTY },
198 { "\u0003", StringUtils.EMPTY },
199 { "\u0004", StringUtils.EMPTY },
200 { "\u0005", StringUtils.EMPTY },
201 { "\u0006", StringUtils.EMPTY },
202 { "\u0007", StringUtils.EMPTY },
203 { "\u0008", StringUtils.EMPTY },
204 { "\u000b", StringUtils.EMPTY },
205 { "\u000c", StringUtils.EMPTY },
206 { "\u000e", StringUtils.EMPTY },
207 { "\u000f", StringUtils.EMPTY },
208 { "\u0010", StringUtils.EMPTY },
209 { "\u0011", StringUtils.EMPTY },
210 { "\u0012", StringUtils.EMPTY },
211 { "\u0013", StringUtils.EMPTY },
212 { "\u0014", StringUtils.EMPTY },
213 { "\u0015", StringUtils.EMPTY },
214 { "\u0016", StringUtils.EMPTY },
215 { "\u0017", StringUtils.EMPTY },
216 { "\u0018", StringUtils.EMPTY },
217 { "\u0019", StringUtils.EMPTY },
218 { "\u001a", StringUtils.EMPTY },
219 { "\u001b", StringUtils.EMPTY },
220 { "\u001c", StringUtils.EMPTY },
221 { "\u001d", StringUtils.EMPTY },
222 { "\u001e", StringUtils.EMPTY },
223 { "\u001f", StringUtils.EMPTY },
224 { "\ufffe", StringUtils.EMPTY },
225 { "\uffff", StringUtils.EMPTY }
226 }),
227 NumericEntityEscaper.between(0x7f, 0x84),
228 NumericEntityEscaper.between(0x86, 0x9f),
229 new UnicodeUnpairedSurrogateRemover()
230 );
231
232 /**
233 * Translator object for escaping XML 1.1.
234 *
235 * While {@link #escapeXml11(String)} is the expected method of use, this
236 * object allows the XML escaping functionality to be used
237 * as the foundation for a custom translator.
238 *
239 * @since 3.3
240 */
241 public static final CharSequenceTranslator ESCAPE_XML11 =
242 new AggregateTranslator(
243 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
244 new LookupTranslator(EntityArrays.APOS_ESCAPE()),
245 new LookupTranslator(
246 new String[][] {
247 { "\u0000", StringUtils.EMPTY },
248 { "\u000b", "" },
249 { "\u000c", "" },
250 { "\ufffe", StringUtils.EMPTY },
251 { "\uffff", StringUtils.EMPTY }
252 }),
253 NumericEntityEscaper.between(0x1, 0x8),
254 NumericEntityEscaper.between(0xe, 0x1f),
255 NumericEntityEscaper.between(0x7f, 0x84),
256 NumericEntityEscaper.between(0x86, 0x9f),
257 new UnicodeUnpairedSurrogateRemover()
258 );
259
260 /**
261 * Translator object for escaping HTML version 3.0.
262 *
263 * While {@link #escapeHtml3(String)} is the expected method of use, this
264 * object allows the HTML escaping functionality to be used
265 * as the foundation for a custom translator.
266 *
267 * @since 3.0
268 */
269 public static final CharSequenceTranslator ESCAPE_HTML3 =
270 new AggregateTranslator(
271 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
272 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
273 );
274
275 /**
276 * Translator object for escaping HTML version 4.0.
277 *
278 * While {@link #escapeHtml4(String)} is the expected method of use, this
279 * object allows the HTML escaping functionality to be used
280 * as the foundation for a custom translator.
281 *
282 * @since 3.0
283 */
284 public static final CharSequenceTranslator ESCAPE_HTML4 =
285 new AggregateTranslator(
286 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
287 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
288 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
289 );
290
291 /* UNESCAPE TRANSLATORS */
292
293 /**
294 * Translator object for escaping individual Comma Separated Values.
295 *
296 * While {@link #escapeCsv(String)} is the expected method of use, this
297 * object allows the CSV escaping functionality to be used
298 * as the foundation for a custom translator.
299 *
300 * @since 3.0
301 */
302 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
303
304 /**
305 * Translator object for unescaping escaped Java.
306 *
307 * While {@link #unescapeJava(String)} is the expected method of use, this
308 * object allows the Java unescaping functionality to be used
309 * as the foundation for a custom translator.
310 *
311 * @since 3.0
312 */
313 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
314 public static final CharSequenceTranslator UNESCAPE_JAVA =
315 new AggregateTranslator(
316 new OctalUnescaper(), // .between('\1', '\377'),
317 new UnicodeUnescaper(),
318 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
319 new LookupTranslator(
320 new String[][] {
321 {"\\\\", "\\"},
322 {"\\\"", "\""},
323 {"\\'", "'"},
324 {"\\", ""}
325 })
326 );
327
328 /**
329 * Translator object for unescaping escaped EcmaScript.
330 *
331 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
332 * object allows the EcmaScript unescaping functionality to be used
333 * as the foundation for a custom translator.
334 *
335 * @since 3.0
336 */
337 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
338
339 /**
340 * Translator object for unescaping escaped Json.
341 *
342 * While {@link #unescapeJson(String)} is the expected method of use, this
343 * object allows the Json unescaping functionality to be used
344 * as the foundation for a custom translator.
345 *
346 * @since 3.2
347 */
348 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
349
350 /**
351 * Translator object for unescaping escaped HTML 3.0.
352 *
353 * While {@link #unescapeHtml3(String)} is the expected method of use, this
354 * object allows the HTML unescaping functionality to be used
355 * as the foundation for a custom translator.
356 *
357 * @since 3.0
358 */
359 public static final CharSequenceTranslator UNESCAPE_HTML3 =
360 new AggregateTranslator(
361 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
362 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
363 new NumericEntityUnescaper()
364 );
365
366 /**
367 * Translator object for unescaping escaped HTML 4.0.
368 *
369 * While {@link #unescapeHtml4(String)} is the expected method of use, this
370 * object allows the HTML unescaping functionality to be used
371 * as the foundation for a custom translator.
372 *
373 * @since 3.0
374 */
375 public static final CharSequenceTranslator UNESCAPE_HTML4 =
376 new AggregateTranslator(
377 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
378 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
379 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
380 new NumericEntityUnescaper()
381 );
382
383 /**
384 * Translator object for unescaping escaped XML.
385 *
386 * While {@link #unescapeXml(String)} is the expected method of use, this
387 * object allows the XML unescaping functionality to be used
388 * as the foundation for a custom translator.
389 *
390 * @since 3.0
391 */
392 public static final CharSequenceTranslator UNESCAPE_XML =
393 new AggregateTranslator(
394 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
395 new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
396 new NumericEntityUnescaper()
397 );
398
399 /**
400 * Translator object for unescaping escaped Comma Separated Value entries.
401 *
402 * While {@link #unescapeCsv(String)} is the expected method of use, this
403 * object allows the CSV unescaping functionality to be used
404 * as the foundation for a custom translator.
405 *
406 * @since 3.0
407 */
408 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
409
410 /* Helper functions */
411
412 /**
413 * Returns a {@link String} value for a CSV column enclosed in double quotes,
414 * if required.
415 *
416 * <p>If the value contains a comma, newline or double quote, then the
417 * String value is returned enclosed in double quotes.</p>
418 *
419 * <p>Any double quote characters in the value are escaped with another double quote.</p>
420 *
421 * <p>If the value does not contain a comma, newline or double quote, then the
422 * String value is returned unchanged.</p>
423 *
424 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
425 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
426 *
427 * @param input the input CSV column String, may be null
428 * @return the input String, enclosed in double quotes if the value contains a comma,
429 * newline or double quote, {@code null} if null string input
430 * @since 2.4
431 */
432 public static final String escapeCsv(final String input) {
433 return ESCAPE_CSV.translate(input);
434 }
435
436 /**
437 * Escapes the characters in a {@link String} using EcmaScript String rules.
438 * <p>Escapes any values it finds into their EcmaScript String form.
439 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
440 *
441 * <p>So a tab becomes the characters {@code '\\'} and
442 * {@code 't'}.</p>
443 *
444 * <p>The only difference between Java strings and EcmaScript strings
445 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
446 *
447 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
448 *
449 * <p>Example:</p>
450 * <pre>
451 * input string: He didn't say, "Stop!"
452 * output string: He didn\'t say, \"Stop!\"
453 * </pre>
454 *
455 * @param input String to escape values in, may be null
456 * @return String with escaped values, {@code null} if null string input
457 * @since 3.0
458 */
459 public static final String escapeEcmaScript(final String input) {
460 return ESCAPE_ECMASCRIPT.translate(input);
461 }
462
463 /**
464 * Escapes the characters in a {@link String} using HTML entities.
465 * <p>Supports only the HTML 3.0 entities.</p>
466 *
467 * @param input the {@link String} to escape, may be null
468 * @return a new escaped {@link String}, {@code null} if null string input
469 * @since 3.0
470 */
471 public static final String escapeHtml3(final String input) {
472 return ESCAPE_HTML3.translate(input);
473 }
474
475 /**
476 * Escapes the characters in a {@link String} using HTML entities.
477 *
478 * <p>
479 * For example:
480 * </p>
481 * <p>{@code "bread" & "butter"}</p>
482 * becomes:
483 * <p>
484 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
485 * </p>
486 *
487 * <p>Supports all known HTML 4.0 entities, including funky accents.
488 * Note that the commonly used apostrophe escape character (&apos;)
489 * is not a legal entity and so is not supported).</p>
490 *
491 * @param input the {@link String} to escape, may be null
492 * @return a new escaped {@link String}, {@code null} if null string input
493 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
494 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
495 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
496 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
497 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
498 * @since 3.0
499 */
500 public static final String escapeHtml4(final String input) {
501 return ESCAPE_HTML4.translate(input);
502 }
503
504 /**
505 * Escapes the characters in a {@link String} using Java String rules.
506 *
507 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
508 *
509 * <p>So a tab becomes the characters {@code '\\'} and
510 * {@code 't'}.</p>
511 *
512 * <p>The only difference between Java strings and JavaScript strings
513 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
514 *
515 * <p>Example:</p>
516 * <pre>
517 * input string: He didn't say, "Stop!"
518 * output string: He didn't say, \"Stop!\"
519 * </pre>
520 *
521 * @param input String to escape values in, may be null
522 * @return String with escaped values, {@code null} if null string input
523 */
524 public static final String escapeJava(final String input) {
525 return ESCAPE_JAVA.translate(input);
526 }
527
528 /**
529 * Escapes the characters in a {@link String} using Json String rules.
530 * <p>Escapes any values it finds into their Json String form.
531 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
532 *
533 * <p>So a tab becomes the characters {@code '\\'} and
534 * {@code 't'}.</p>
535 *
536 * <p>The only difference between Java strings and Json strings
537 * is that in Json, forward-slash (/) is escaped.</p>
538 *
539 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
540 *
541 * <p>Example:</p>
542 * <pre>
543 * input string: He didn't say, "Stop!"
544 * output string: He didn't say, \"Stop!\"
545 * </pre>
546 *
547 * @param input String to escape values in, may be null
548 * @return String with escaped values, {@code null} if null string input
549 * @since 3.2
550 */
551 public static final String escapeJson(final String input) {
552 return ESCAPE_JSON.translate(input);
553 }
554
555 /**
556 * Escapes the characters in a {@link String} using XML entities.
557 *
558 * <p>For example: {@code "bread" & "butter"} =>
559 * {@code "bread" & "butter"}.
560 * </p>
561 *
562 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
563 * Does not support DTDs or external entities.</p>
564 *
565 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
566 * escaped. If you still wish this functionality, you can achieve it
567 * via the following:
568 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p>
569 *
570 * @param input the {@link String} to escape, may be null
571 * @return a new escaped {@link String}, {@code null} if null string input
572 * @see #unescapeXml(String)
573 * @deprecated Use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
574 */
575 @Deprecated
576 public static final String escapeXml(final String input) {
577 return ESCAPE_XML.translate(input);
578 }
579
580 /**
581 * Escapes the characters in a {@link String} using XML entities.
582 * <p>
583 * For example:
584 * </p>
585 *
586 * <pre>{@code
587 * "bread" & "butter"
588 * }</pre>
589 * <p>
590 * converts to:
591 * </p>
592 *
593 * <pre>
594 * {@code
595 * "bread" & "butter"
596 * }
597 * </pre>
598 *
599 * <p>
600 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The
601 * method {@code escapeXml10} will remove characters that do not fit in the following ranges:
602 * </p>
603 *
604 * <p>
605 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
606 * </p>
607 *
608 * <p>
609 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
610 * </p>
611 *
612 * <p>
613 * {@code [#x7F-#x84] | [#x86-#x9F]}
614 * </p>
615 *
616 * <p>
617 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
618 * {@link #escapeXml11(String)}.
619 * </p>
620 *
621 * @param input the {@link String} to escape, may be null
622 * @return a new escaped {@link String}, {@code null} if null string input
623 * @see #unescapeXml(String)
624 * @since 3.3
625 */
626 public static String escapeXml10(final String input) {
627 return ESCAPE_XML10.translate(input);
628 }
629
630 /**
631 * Escapes the characters in a {@link String} using XML entities.
632 *
633 * <p>For example: {@code "bread" & "butter"} =>
634 * {@code "bread" & "butter"}.
635 * </p>
636 *
637 * <p>XML 1.1 can represent certain control characters, but it cannot represent
638 * the null byte or unpaired Unicode surrogate code points, even after escaping.
639 * {@code escapeXml11} will remove characters that do not fit in the following
640 * ranges:</p>
641 *
642 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
643 *
644 * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
645 *
646 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
647 *
648 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
649 * use it for XML 1.0 documents.</p>
650 *
651 * @param input the {@link String} to escape, may be null
652 * @return a new escaped {@link String}, {@code null} if null string input
653 * @see #unescapeXml(String)
654 * @since 3.3
655 */
656 public static String escapeXml11(final String input) {
657 return ESCAPE_XML11.translate(input);
658 }
659
660 /**
661 * Returns a {@link String} value for an unescaped CSV column.
662 *
663 * <p>If the value is enclosed in double quotes, and contains a comma, newline
664 * or double quote, then quotes are removed.
665 * </p>
666 *
667 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
668 * to just one double quote.</p>
669 *
670 * <p>If the value is not enclosed in double quotes, or is and does not contain a
671 * comma, newline or double quote, then the String value is returned unchanged.</p>
672 *
673 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
674 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
675 *
676 * @param input the input CSV column String, may be null
677 * @return the input String, with enclosing double quotes removed and embedded double
678 * quotes unescaped, {@code null} if null string input
679 * @since 2.4
680 */
681 public static final String unescapeCsv(final String input) {
682 return UNESCAPE_CSV.translate(input);
683 }
684
685 /**
686 * Unescapes any EcmaScript literals found in the {@link String}.
687 *
688 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
689 * into a newline character, unless the {@code '\'} is preceded by another
690 * {@code '\'}.</p>
691 *
692 * @see #unescapeJava(String)
693 * @param input the {@link String} to unescape, may be null
694 * @return A new unescaped {@link String}, {@code null} if null string input
695 * @since 3.0
696 */
697 public static final String unescapeEcmaScript(final String input) {
698 return UNESCAPE_ECMASCRIPT.translate(input);
699 }
700
701 /**
702 * Unescapes a string containing entity escapes to a string
703 * containing the actual Unicode characters corresponding to the
704 * escapes. Supports only HTML 3.0 entities.
705 *
706 * @param input the {@link String} to unescape, may be null
707 * @return a new unescaped {@link String}, {@code null} if null string input
708 * @since 3.0
709 */
710 public static final String unescapeHtml3(final String input) {
711 return UNESCAPE_HTML3.translate(input);
712 }
713
714 /**
715 * Unescapes a string containing entity escapes to a string
716 * containing the actual Unicode characters corresponding to the
717 * escapes. Supports HTML 4.0 entities.
718 *
719 * <p>For example, the string {@code "<Français>"}
720 * will become {@code "<Français>"}</p>
721 *
722 * <p>If an entity is unrecognized, it is left alone, and inserted
723 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will
724 * become {@code ">&zzzz;x"}.</p>
725 *
726 * @param input the {@link String} to unescape, may be null
727 * @return a new unescaped {@link String}, {@code null} if null string input
728 * @since 3.0
729 */
730 public static final String unescapeHtml4(final String input) {
731 return UNESCAPE_HTML4.translate(input);
732 }
733
734 /**
735 * Unescapes any Java literals found in the {@link String}.
736 * For example, it will turn a sequence of {@code '\'} and
737 * {@code 'n'} into a newline character, unless the {@code '\'}
738 * is preceded by another {@code '\'}.
739 *
740 * @param input the {@link String} to unescape, may be null
741 * @return a new unescaped {@link String}, {@code null} if null string input
742 */
743 public static final String unescapeJava(final String input) {
744 return UNESCAPE_JAVA.translate(input);
745 }
746
747 /**
748 * Unescapes any Json literals found in the {@link String}.
749 *
750 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
751 * into a newline character, unless the {@code '\'} is preceded by another
752 * {@code '\'}.</p>
753 *
754 * @see #unescapeJava(String)
755 * @param input the {@link String} to unescape, may be null
756 * @return A new unescaped {@link String}, {@code null} if null string input
757 * @since 3.2
758 */
759 public static final String unescapeJson(final String input) {
760 return UNESCAPE_JSON.translate(input);
761 }
762
763 /**
764 * Unescapes a string containing XML entity escapes to a string
765 * containing the actual Unicode characters corresponding to the
766 * escapes.
767 *
768 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
769 * Does not support DTDs or external entities.</p>
770 *
771 * <p>Note that numerical \\u Unicode codes are unescaped to their respective
772 * Unicode characters. This may change in future releases.</p>
773 *
774 * @param input the {@link String} to unescape, may be null
775 * @return a new unescaped {@link String}, {@code null} if null string input
776 * @see #escapeXml(String)
777 * @see #escapeXml10(String)
778 * @see #escapeXml11(String)
779 */
780 public static final String unescapeXml(final String input) {
781 return UNESCAPE_XML.translate(input);
782 }
783
784 /**
785 * {@link StringEscapeUtils} instances should NOT be constructed in
786 * standard programming.
787 *
788 * <p>Instead, the class should be used as:</p>
789 * <pre>StringEscapeUtils.escapeJava("foo");</pre>
790 *
791 * <p>This constructor is public to permit tools that require a JavaBean
792 * instance to operate.</p>
793 *
794 * @deprecated TODO Make private in 4.0.
795 */
796 @Deprecated
797 public StringEscapeUtils() {
798 // empty
799 }
800
801 }