View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  
22  import org.apache.commons.lang3.text.translate.AggregateTranslator;
23  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
24  import org.apache.commons.lang3.text.translate.EntityArrays;
25  import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
26  import org.apache.commons.lang3.text.translate.LookupTranslator;
27  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
28  import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
29  import org.apache.commons.lang3.text.translate.OctalUnescaper;
30  import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
31  import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
32  
33  /**
34   * Escapes and unescapes {@link String}s for
35   * Java, Java Script, HTML and XML.
36   *
37   * <p>#ThreadSafe#</p>
38   *
39   * @since 2.0
40   * @deprecated As of 3.6, use Apache Commons Text
41   * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
42   * StringEscapeUtils</a> instead.
43   */
44  @Deprecated
45  public class StringEscapeUtils {
46  
47      /* ESCAPE TRANSLATORS */
48  
49      private static final class CsvEscaper extends CharSequenceTranslator {
50  
51          private static final char CSV_DELIMITER = ',';
52          private static final char CSV_QUOTE = '"';
53          private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
54          private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
55  
56          @Override
57          public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
58              if (index != 0) {
59                  throw new IllegalStateException("CsvEscaper should never reach the [1] index");
60              }
61              if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
62                  out.write(input.toString());
63              } else {
64                  out.write(CSV_QUOTE);
65                  out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
66                  out.write(CSV_QUOTE);
67              }
68              return Character.codePointCount(input, 0, input.length());
69          }
70      }
71  
72      private static final class CsvUnescaper extends CharSequenceTranslator {
73  
74          private static final char CSV_DELIMITER = ',';
75          private static final char CSV_QUOTE = '"';
76          private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
77          private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
78  
79          @Override
80          public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
81              if (index != 0) {
82                  throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
83              }
84              if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
85                  out.write(input.toString());
86                  return Character.codePointCount(input, 0, input.length());
87              }
88              // strip quotes
89              final String quoteless = input.subSequence(1, input.length() - 1).toString();
90              if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
91                  // deal with escaped quotes; ie) ""
92                  out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
93              } else {
94                  out.write(input.toString());
95              }
96              return Character.codePointCount(input, 0, input.length());
97          }
98      }
99  
100     /**
101      * Translator object for escaping Java.
102      *
103      * While {@link #escapeJava(String)} is the expected method of use, this
104      * object allows the Java escaping functionality to be used
105      * as the foundation for a custom translator.
106      *
107      * @since 3.0
108      */
109     public static final CharSequenceTranslator ESCAPE_JAVA =
110           new LookupTranslator(
111             new String[][] {
112               {"\"", "\\\""},
113               {"\\", "\\\\"},
114           }).with(
115             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
116           ).with(
117             JavaUnicodeEscaper.outsideOf(32, 0x7f)
118         );
119 
120     /**
121      * Translator object for escaping EcmaScript/JavaScript.
122      *
123      * While {@link #escapeEcmaScript(String)} is the expected method of use, this
124      * object allows the EcmaScript escaping functionality to be used
125      * as the foundation for a custom translator.
126      *
127      * @since 3.0
128      */
129     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
130         new AggregateTranslator(
131             new LookupTranslator(
132                       new String[][] {
133                             {"'", "\\'"},
134                             {"\"", "\\\""},
135                             {"\\", "\\\\"},
136                             {"/", "\\/"}
137                       }),
138             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
139             JavaUnicodeEscaper.outsideOf(32, 0x7f)
140         );
141 
142     /**
143      * Translator object for escaping Json.
144      *
145      * While {@link #escapeJson(String)} is the expected method of use, this
146      * object allows the Json escaping functionality to be used
147      * as the foundation for a custom translator.
148      *
149      * @since 3.2
150      */
151     public static final CharSequenceTranslator ESCAPE_JSON =
152         new AggregateTranslator(
153             new LookupTranslator(
154                       new String[][] {
155                             {"\"", "\\\""},
156                             {"\\", "\\\\"},
157                             {"/", "\\/"}
158                       }),
159             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
160             JavaUnicodeEscaper.outsideOf(32, 0x7f)
161         );
162 
163     /**
164      * Translator object for escaping XML.
165      *
166      * While {@link #escapeXml(String)} is the expected method of use, this
167      * object allows the XML escaping functionality to be used
168      * as the foundation for a custom translator.
169      *
170      * @since 3.0
171      * @deprecated Use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
172      */
173     @Deprecated
174     public static final CharSequenceTranslator ESCAPE_XML =
175         new AggregateTranslator(
176             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
177             new LookupTranslator(EntityArrays.APOS_ESCAPE())
178         );
179 
180     /**
181      * Translator object for escaping XML 1.0.
182      *
183      * While {@link #escapeXml10(String)} is the expected method of use, this
184      * object allows the XML escaping functionality to be used
185      * as the foundation for a custom translator.
186      *
187      * @since 3.3
188      */
189     public static final CharSequenceTranslator ESCAPE_XML10 =
190         new AggregateTranslator(
191             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
192             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
193             new LookupTranslator(
194                     new String[][] {
195                             { "\u0000", StringUtils.EMPTY },
196                             { "\u0001", StringUtils.EMPTY },
197                             { "\u0002", StringUtils.EMPTY },
198                             { "\u0003", StringUtils.EMPTY },
199                             { "\u0004", StringUtils.EMPTY },
200                             { "\u0005", StringUtils.EMPTY },
201                             { "\u0006", StringUtils.EMPTY },
202                             { "\u0007", StringUtils.EMPTY },
203                             { "\u0008", StringUtils.EMPTY },
204                             { "\u000b", StringUtils.EMPTY },
205                             { "\u000c", StringUtils.EMPTY },
206                             { "\u000e", StringUtils.EMPTY },
207                             { "\u000f", StringUtils.EMPTY },
208                             { "\u0010", StringUtils.EMPTY },
209                             { "\u0011", StringUtils.EMPTY },
210                             { "\u0012", StringUtils.EMPTY },
211                             { "\u0013", StringUtils.EMPTY },
212                             { "\u0014", StringUtils.EMPTY },
213                             { "\u0015", StringUtils.EMPTY },
214                             { "\u0016", StringUtils.EMPTY },
215                             { "\u0017", StringUtils.EMPTY },
216                             { "\u0018", StringUtils.EMPTY },
217                             { "\u0019", StringUtils.EMPTY },
218                             { "\u001a", StringUtils.EMPTY },
219                             { "\u001b", StringUtils.EMPTY },
220                             { "\u001c", StringUtils.EMPTY },
221                             { "\u001d", StringUtils.EMPTY },
222                             { "\u001e", StringUtils.EMPTY },
223                             { "\u001f", StringUtils.EMPTY },
224                             { "\ufffe", StringUtils.EMPTY },
225                             { "\uffff", StringUtils.EMPTY }
226                     }),
227             NumericEntityEscaper.between(0x7f, 0x84),
228             NumericEntityEscaper.between(0x86, 0x9f),
229             new UnicodeUnpairedSurrogateRemover()
230         );
231 
232     /**
233      * Translator object for escaping XML 1.1.
234      *
235      * While {@link #escapeXml11(String)} is the expected method of use, this
236      * object allows the XML escaping functionality to be used
237      * as the foundation for a custom translator.
238      *
239      * @since 3.3
240      */
241     public static final CharSequenceTranslator ESCAPE_XML11 =
242         new AggregateTranslator(
243             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
244             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
245             new LookupTranslator(
246                     new String[][] {
247                             { "\u0000", StringUtils.EMPTY },
248                             { "\u000b", "&#11;" },
249                             { "\u000c", "&#12;" },
250                             { "\ufffe", StringUtils.EMPTY },
251                             { "\uffff", StringUtils.EMPTY }
252                     }),
253             NumericEntityEscaper.between(0x1, 0x8),
254             NumericEntityEscaper.between(0xe, 0x1f),
255             NumericEntityEscaper.between(0x7f, 0x84),
256             NumericEntityEscaper.between(0x86, 0x9f),
257             new UnicodeUnpairedSurrogateRemover()
258         );
259 
260     /**
261      * Translator object for escaping HTML version 3.0.
262      *
263      * While {@link #escapeHtml3(String)} is the expected method of use, this
264      * object allows the HTML escaping functionality to be used
265      * as the foundation for a custom translator.
266      *
267      * @since 3.0
268      */
269     public static final CharSequenceTranslator ESCAPE_HTML3 =
270         new AggregateTranslator(
271             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
272             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
273         );
274 
275     /**
276      * Translator object for escaping HTML version 4.0.
277      *
278      * While {@link #escapeHtml4(String)} is the expected method of use, this
279      * object allows the HTML escaping functionality to be used
280      * as the foundation for a custom translator.
281      *
282      * @since 3.0
283      */
284     public static final CharSequenceTranslator ESCAPE_HTML4 =
285         new AggregateTranslator(
286             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
287             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
288             new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
289         );
290 
291     /* UNESCAPE TRANSLATORS */
292 
293     /**
294      * Translator object for escaping individual Comma Separated Values.
295      *
296      * While {@link #escapeCsv(String)} is the expected method of use, this
297      * object allows the CSV escaping functionality to be used
298      * as the foundation for a custom translator.
299      *
300      * @since 3.0
301      */
302     public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
303 
304     /**
305      * Translator object for unescaping escaped Java.
306      *
307      * While {@link #unescapeJava(String)} is the expected method of use, this
308      * object allows the Java unescaping functionality to be used
309      * as the foundation for a custom translator.
310      *
311      * @since 3.0
312      */
313     // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
314     public static final CharSequenceTranslator UNESCAPE_JAVA =
315         new AggregateTranslator(
316             new OctalUnescaper(),     // .between('\1', '\377'),
317             new UnicodeUnescaper(),
318             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
319             new LookupTranslator(
320                       new String[][] {
321                             {"\\\\", "\\"},
322                             {"\\\"", "\""},
323                             {"\\'", "'"},
324                             {"\\", ""}
325                       })
326         );
327 
328     /**
329      * Translator object for unescaping escaped EcmaScript.
330      *
331      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
332      * object allows the EcmaScript unescaping functionality to be used
333      * as the foundation for a custom translator.
334      *
335      * @since 3.0
336      */
337     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
338 
339     /**
340      * Translator object for unescaping escaped Json.
341      *
342      * While {@link #unescapeJson(String)} is the expected method of use, this
343      * object allows the Json unescaping functionality to be used
344      * as the foundation for a custom translator.
345      *
346      * @since 3.2
347      */
348     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
349 
350     /**
351      * Translator object for unescaping escaped HTML 3.0.
352      *
353      * While {@link #unescapeHtml3(String)} is the expected method of use, this
354      * object allows the HTML unescaping functionality to be used
355      * as the foundation for a custom translator.
356      *
357      * @since 3.0
358      */
359     public static final CharSequenceTranslator UNESCAPE_HTML3 =
360         new AggregateTranslator(
361             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
362             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
363             new NumericEntityUnescaper()
364         );
365 
366     /**
367      * Translator object for unescaping escaped HTML 4.0.
368      *
369      * While {@link #unescapeHtml4(String)} is the expected method of use, this
370      * object allows the HTML unescaping functionality to be used
371      * as the foundation for a custom translator.
372      *
373      * @since 3.0
374      */
375     public static final CharSequenceTranslator UNESCAPE_HTML4 =
376         new AggregateTranslator(
377             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
378             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
379             new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
380             new NumericEntityUnescaper()
381         );
382 
383     /**
384      * Translator object for unescaping escaped XML.
385      *
386      * While {@link #unescapeXml(String)} is the expected method of use, this
387      * object allows the XML unescaping functionality to be used
388      * as the foundation for a custom translator.
389      *
390      * @since 3.0
391      */
392     public static final CharSequenceTranslator UNESCAPE_XML =
393         new AggregateTranslator(
394             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
395             new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
396             new NumericEntityUnescaper()
397         );
398 
399     /**
400      * Translator object for unescaping escaped Comma Separated Value entries.
401      *
402      * While {@link #unescapeCsv(String)} is the expected method of use, this
403      * object allows the CSV unescaping functionality to be used
404      * as the foundation for a custom translator.
405      *
406      * @since 3.0
407      */
408     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
409 
410     /* Helper functions */
411 
412     /**
413      * Returns a {@link String} value for a CSV column enclosed in double quotes,
414      * if required.
415      *
416      * <p>If the value contains a comma, newline or double quote, then the
417      *    String value is returned enclosed in double quotes.</p>
418      *
419      * <p>Any double quote characters in the value are escaped with another double quote.</p>
420      *
421      * <p>If the value does not contain a comma, newline or double quote, then the
422      *    String value is returned unchanged.</p>
423      *
424      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
425      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
426      *
427      * @param input the input CSV column String, may be null
428      * @return the input String, enclosed in double quotes if the value contains a comma,
429      * newline or double quote, {@code null} if null string input
430      * @since 2.4
431      */
432     public static final String escapeCsv(final String input) {
433         return ESCAPE_CSV.translate(input);
434     }
435 
436     /**
437      * Escapes the characters in a {@link String} using EcmaScript String rules.
438      * <p>Escapes any values it finds into their EcmaScript String form.
439      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
440      *
441      * <p>So a tab becomes the characters {@code '\\'} and
442      * {@code 't'}.</p>
443      *
444      * <p>The only difference between Java strings and EcmaScript strings
445      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
446      *
447      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
448      *
449      * <p>Example:</p>
450      * <pre>
451      * input string: He didn't say, "Stop!"
452      * output string: He didn\'t say, \"Stop!\"
453      * </pre>
454      *
455      * @param input  String to escape values in, may be null
456      * @return String with escaped values, {@code null} if null string input
457      * @since 3.0
458      */
459     public static final String escapeEcmaScript(final String input) {
460         return ESCAPE_ECMASCRIPT.translate(input);
461     }
462 
463     /**
464      * Escapes the characters in a {@link String} using HTML entities.
465      * <p>Supports only the HTML 3.0 entities.</p>
466      *
467      * @param input  the {@link String} to escape, may be null
468      * @return a new escaped {@link String}, {@code null} if null string input
469      * @since 3.0
470      */
471     public static final String escapeHtml3(final String input) {
472         return ESCAPE_HTML3.translate(input);
473     }
474 
475     /**
476      * Escapes the characters in a {@link String} using HTML entities.
477      *
478      * <p>
479      * For example:
480      * </p>
481      * <p>{@code "bread" &amp; "butter"}</p>
482      * becomes:
483      * <p>
484      * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
485      * </p>
486      *
487      * <p>Supports all known HTML 4.0 entities, including funky accents.
488      * Note that the commonly used apostrophe escape character (&amp;apos;)
489      * is not a legal entity and so is not supported).</p>
490      *
491      * @param input  the {@link String} to escape, may be null
492      * @return a new escaped {@link String}, {@code null} if null string input
493      * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
494      * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
495      * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
496      * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
497      * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
498      * @since 3.0
499      */
500     public static final String escapeHtml4(final String input) {
501         return ESCAPE_HTML4.translate(input);
502     }
503 
504     /**
505      * Escapes the characters in a {@link String} using Java String rules.
506      *
507      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
508      *
509      * <p>So a tab becomes the characters {@code '\\'} and
510      * {@code 't'}.</p>
511      *
512      * <p>The only difference between Java strings and JavaScript strings
513      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
514      *
515      * <p>Example:</p>
516      * <pre>
517      * input string: He didn't say, "Stop!"
518      * output string: He didn't say, \"Stop!\"
519      * </pre>
520      *
521      * @param input  String to escape values in, may be null
522      * @return String with escaped values, {@code null} if null string input
523      */
524     public static final String escapeJava(final String input) {
525         return ESCAPE_JAVA.translate(input);
526     }
527 
528     /**
529      * Escapes the characters in a {@link String} using Json String rules.
530      * <p>Escapes any values it finds into their Json String form.
531      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
532      *
533      * <p>So a tab becomes the characters {@code '\\'} and
534      * {@code 't'}.</p>
535      *
536      * <p>The only difference between Java strings and Json strings
537      * is that in Json, forward-slash (/) is escaped.</p>
538      *
539      * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
540      *
541      * <p>Example:</p>
542      * <pre>
543      * input string: He didn't say, "Stop!"
544      * output string: He didn't say, \"Stop!\"
545      * </pre>
546      *
547      * @param input  String to escape values in, may be null
548      * @return String with escaped values, {@code null} if null string input
549      * @since 3.2
550      */
551     public static final String escapeJson(final String input) {
552         return ESCAPE_JSON.translate(input);
553     }
554 
555     /**
556      * Escapes the characters in a {@link String} using XML entities.
557      *
558      * <p>For example: {@code "bread" & "butter"} =&gt;
559      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
560      * </p>
561      *
562      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
563      * Does not support DTDs or external entities.</p>
564      *
565      * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
566      *    escaped. If you still wish this functionality, you can achieve it
567      *    via the following:
568      * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p>
569      *
570      * @param input  the {@link String} to escape, may be null
571      * @return a new escaped {@link String}, {@code null} if null string input
572      * @see #unescapeXml(String)
573      * @deprecated Use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
574      */
575     @Deprecated
576     public static final String escapeXml(final String input) {
577         return ESCAPE_XML.translate(input);
578     }
579 
580     /**
581      * Escapes the characters in a {@link String} using XML entities.
582      * <p>
583      * For example:
584      * </p>
585      *
586      * <pre>{@code
587      * "bread" & "butter"
588      * }</pre>
589      * <p>
590      * converts to:
591      * </p>
592      *
593      * <pre>
594      * {@code
595      * &quot;bread&quot; &amp; &quot;butter&quot;
596      * }
597      * </pre>
598      *
599      * <p>
600      * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The
601      * method {@code escapeXml10} will remove characters that do not fit in the following ranges:
602      * </p>
603      *
604      * <p>
605      * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
606      * </p>
607      *
608      * <p>
609      * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
610      * </p>
611      *
612      * <p>
613      * {@code [#x7F-#x84] | [#x86-#x9F]}
614      * </p>
615      *
616      * <p>
617      * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
618      * {@link #escapeXml11(String)}.
619      * </p>
620      *
621      * @param input the {@link String} to escape, may be null
622      * @return a new escaped {@link String}, {@code null} if null string input
623      * @see #unescapeXml(String)
624      * @since 3.3
625      */
626     public static String escapeXml10(final String input) {
627         return ESCAPE_XML10.translate(input);
628     }
629 
630     /**
631      * Escapes the characters in a {@link String} using XML entities.
632      *
633      * <p>For example: {@code "bread" & "butter"} =&gt;
634      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
635      * </p>
636      *
637      * <p>XML 1.1 can represent certain control characters, but it cannot represent
638      * the null byte or unpaired Unicode surrogate code points, even after escaping.
639      * {@code escapeXml11} will remove characters that do not fit in the following
640      * ranges:</p>
641      *
642      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
643      *
644      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
645      *
646      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
647      *
648      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
649      * use it for XML 1.0 documents.</p>
650      *
651      * @param input  the {@link String} to escape, may be null
652      * @return a new escaped {@link String}, {@code null} if null string input
653      * @see #unescapeXml(String)
654      * @since 3.3
655      */
656     public static String escapeXml11(final String input) {
657         return ESCAPE_XML11.translate(input);
658     }
659 
660     /**
661      * Returns a {@link String} value for an unescaped CSV column.
662      *
663      * <p>If the value is enclosed in double quotes, and contains a comma, newline
664      *    or double quote, then quotes are removed.
665      * </p>
666      *
667      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
668      *    to just one double quote.</p>
669      *
670      * <p>If the value is not enclosed in double quotes, or is and does not contain a
671      *    comma, newline or double quote, then the String value is returned unchanged.</p>
672      *
673      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
674      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
675      *
676      * @param input the input CSV column String, may be null
677      * @return the input String, with enclosing double quotes removed and embedded double
678      * quotes unescaped, {@code null} if null string input
679      * @since 2.4
680      */
681     public static final String unescapeCsv(final String input) {
682         return UNESCAPE_CSV.translate(input);
683     }
684 
685     /**
686      * Unescapes any EcmaScript literals found in the {@link String}.
687      *
688      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
689      * into a newline character, unless the {@code '\'} is preceded by another
690      * {@code '\'}.</p>
691      *
692      * @see #unescapeJava(String)
693      * @param input  the {@link String} to unescape, may be null
694      * @return A new unescaped {@link String}, {@code null} if null string input
695      * @since 3.0
696      */
697     public static final String unescapeEcmaScript(final String input) {
698         return UNESCAPE_ECMASCRIPT.translate(input);
699     }
700 
701     /**
702      * Unescapes a string containing entity escapes to a string
703      * containing the actual Unicode characters corresponding to the
704      * escapes. Supports only HTML 3.0 entities.
705      *
706      * @param input  the {@link String} to unescape, may be null
707      * @return a new unescaped {@link String}, {@code null} if null string input
708      * @since 3.0
709      */
710     public static final String unescapeHtml3(final String input) {
711         return UNESCAPE_HTML3.translate(input);
712     }
713 
714     /**
715      * Unescapes a string containing entity escapes to a string
716      * containing the actual Unicode characters corresponding to the
717      * escapes. Supports HTML 4.0 entities.
718      *
719      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
720      * will become {@code "<Français>"}</p>
721      *
722      * <p>If an entity is unrecognized, it is left alone, and inserted
723      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
724      * become {@code ">&zzzz;x"}.</p>
725      *
726      * @param input  the {@link String} to unescape, may be null
727      * @return a new unescaped {@link String}, {@code null} if null string input
728      * @since 3.0
729      */
730     public static final String unescapeHtml4(final String input) {
731         return UNESCAPE_HTML4.translate(input);
732     }
733 
734     /**
735      * Unescapes any Java literals found in the {@link String}.
736      * For example, it will turn a sequence of {@code '\'} and
737      * {@code 'n'} into a newline character, unless the {@code '\'}
738      * is preceded by another {@code '\'}.
739      *
740      * @param input  the {@link String} to unescape, may be null
741      * @return a new unescaped {@link String}, {@code null} if null string input
742      */
743     public static final String unescapeJava(final String input) {
744         return UNESCAPE_JAVA.translate(input);
745     }
746 
747     /**
748      * Unescapes any Json literals found in the {@link String}.
749      *
750      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
751      * into a newline character, unless the {@code '\'} is preceded by another
752      * {@code '\'}.</p>
753      *
754      * @see #unescapeJava(String)
755      * @param input  the {@link String} to unescape, may be null
756      * @return A new unescaped {@link String}, {@code null} if null string input
757      * @since 3.2
758      */
759     public static final String unescapeJson(final String input) {
760         return UNESCAPE_JSON.translate(input);
761     }
762 
763     /**
764      * Unescapes a string containing XML entity escapes to a string
765      * containing the actual Unicode characters corresponding to the
766      * escapes.
767      *
768      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
769      * Does not support DTDs or external entities.</p>
770      *
771      * <p>Note that numerical \\u Unicode codes are unescaped to their respective
772      *    Unicode characters. This may change in future releases.</p>
773      *
774      * @param input  the {@link String} to unescape, may be null
775      * @return a new unescaped {@link String}, {@code null} if null string input
776      * @see #escapeXml(String)
777      * @see #escapeXml10(String)
778      * @see #escapeXml11(String)
779      */
780     public static final String unescapeXml(final String input) {
781         return UNESCAPE_XML.translate(input);
782     }
783 
784     /**
785      * {@link StringEscapeUtils} instances should NOT be constructed in
786      * standard programming.
787      *
788      * <p>Instead, the class should be used as:</p>
789      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
790      *
791      * <p>This constructor is public to permit tools that require a JavaBean
792      * instance to operate.</p>
793      *
794      * @deprecated TODO Make private in 4.0.
795      */
796     @Deprecated
797     public StringEscapeUtils() {
798         // empty
799     }
800 
801 }