1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.lang3;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.Comparator;
24 import java.util.LinkedHashSet;
25 import java.util.List;
26 import java.util.Locale;
27 import java.util.Set;
28 import java.util.concurrent.ConcurrentHashMap;
29 import java.util.concurrent.ConcurrentMap;
30 import java.util.function.Predicate;
31 import java.util.stream.Collectors;
32
33 /**
34 * Operations to assist when working with a {@link Locale}.
35 *
36 * <p>
37 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a {@code null} input. Each method documents its behavior in
38 * more detail.
39 * </p>
40 *
41 * @see Locale
42 * @since 2.2
43 */
44 public class LocaleUtils {
45
46 /**
47 * Avoids synchronization, initializes on demand.
48 */
49 private static final class SyncAvoid {
50
51 /** Private unmodifiable and sorted list of available locales. */
52 private static final List<Locale> AVAILABLE_LOCALE_ULIST;
53
54 /** Private unmodifiable set of available locales. */
55 private static final Set<Locale> AVAILABLE_LOCALE_USET;
56 static {
57 AVAILABLE_LOCALE_ULIST = Collections
58 .unmodifiableList(Arrays.asList(ArraySorter.sort(Locale.getAvailableLocales(), Comparator.comparing(Locale::toString))));
59 AVAILABLE_LOCALE_USET = Collections.unmodifiableSet(new LinkedHashSet<>(AVAILABLE_LOCALE_ULIST));
60 }
61 }
62
63 /**
64 * The underscore character {@code '}{@value}{@code '}.
65 */
66 private static final char UNDERSCORE = '_';
67
68 /**
69 * The undetermined language {@value}.
70 * <p>
71 * If a language is empty, or not <em>well-formed</am> (for example "a" or "e2"), {@link Locale#toLanguageTag()} will return {@code "und"} (Undetermined).
72 * </p>
73 *
74 * @see Locale#toLanguageTag()
75 */
76 private static final String UNDETERMINED = "und";
77
78 /**
79 * The dash character {@code '}{@value}{@code '}.
80 */
81 private static final char DASH = '-';
82
83 /**
84 * Concurrent map of language locales by country.
85 */
86 private static final ConcurrentMap<String, List<Locale>> cLanguagesByCountry = new ConcurrentHashMap<>();
87
88 /**
89 * Concurrent map of country locales by language.
90 */
91 private static final ConcurrentMap<String, List<Locale>> cCountriesByLanguage = new ConcurrentHashMap<>();
92
93 /**
94 * Obtains an unmodifiable and sorted list of installed locales.
95 *
96 * <p>
97 * This method is a wrapper around {@link Locale#getAvailableLocales()}. It is more efficient, as the JDK method must create a new array each time it is
98 * called.
99 * </p>
100 *
101 * @return the unmodifiable and sorted list of available locales.
102 */
103 public static List<Locale> availableLocaleList() {
104 return SyncAvoid.AVAILABLE_LOCALE_ULIST;
105 }
106
107 private static List<Locale> availableLocaleList(final Predicate<Locale> predicate) {
108 return availableLocaleList().stream().filter(predicate).collect(Collectors.toList());
109 }
110
111 /**
112 * Obtains an unmodifiable set of installed locales.
113 *
114 * <p>
115 * This method is a wrapper around {@link Locale#getAvailableLocales()}. It is more efficient, as the JDK method must create a new array each time it is
116 * called.
117 * </p>
118 *
119 * @return the unmodifiable set of available locales.
120 */
121 public static Set<Locale> availableLocaleSet() {
122 return SyncAvoid.AVAILABLE_LOCALE_USET;
123 }
124
125 /**
126 * Obtains the list of countries supported for a given language.
127 *
128 * <p>
129 * This method takes a language code and searches to find the countries available for that language. Variant locales are removed.
130 * </p>
131 *
132 * @param languageCode the 2 letter language code, null returns empty.
133 * @return an unmodifiable List of Locale objects, not null.
134 */
135 public static List<Locale> countriesByLanguage(final String languageCode) {
136 if (languageCode == null) {
137 return Collections.emptyList();
138 }
139 return cCountriesByLanguage.computeIfAbsent(languageCode, lc -> Collections
140 .unmodifiableList(availableLocaleList(locale -> languageCode.equals(locale.getLanguage()) && !hasCountry(locale) && hasVariant(locale))));
141 }
142
143 /**
144 * Tests whether the given Locale defines a variant.
145 *
146 * @param locale The Locale to test.
147 * @return whether the given Locale defines a variant.
148 */
149 private static boolean hasCountry(final Locale locale) {
150 return locale.getCountry().isEmpty();
151 }
152
153 /**
154 * Tests whether the given Locale defines a country.
155 *
156 * @param locale The Locale to test.
157 * @return whether the given Locale defines a country.
158 */
159 private static boolean hasVariant(final Locale locale) {
160 return locale.getVariant().isEmpty();
161 }
162
163 /**
164 * Tests whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-2 country code.
165 *
166 * @param str The string to test.
167 * @return whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-2 country code.
168 */
169 private static boolean isAlpha2Len(final String str) {
170 return str.length() == 2;
171 }
172
173 /**
174 * Tests whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-3 country code.
175 *
176 * @param str The string to test.
177 * @return whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-3 country code.
178 */
179 private static boolean isAlpha3Len(final String str) {
180 return str.length() == 3;
181 }
182
183 /**
184 * Checks if the locale specified is in the set of available locales.
185 *
186 * @param locale the Locale object to check if it is available.
187 * @return true if the locale is a known locale.
188 */
189 public static boolean isAvailableLocale(final Locale locale) {
190 return availableLocaleSet().contains(locale);
191 }
192
193 /**
194 * Tests whether the given String is a <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-2 country code.
195 *
196 * @param str the String to check.
197 * @return true, is the given String is a <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> compliant country code.
198 */
199 private static boolean isISO3166CountryCode(final String str) {
200 return StringUtils.isAllUpperCase(str) && isAlpha2Len(str);
201 }
202
203 /**
204 * Tests whether the given String is a <a href="https://www.iso.org/iso-639-language-code">ISO 639</a> compliant language code.
205 *
206 * @param str the String to check.
207 * @return true, if the given String is a <a href="https://www.iso.org/iso-639-language-code">ISO 639</a> compliant language code.
208 */
209 private static boolean isISO639LanguageCode(final String str) {
210 return StringUtils.isAllLowerCase(str) && (isAlpha2Len(str) || isAlpha3Len(str));
211 }
212
213 /**
214 * Tests whether a Locale's language is undetermined.
215 * <p>
216 * A Locale's language tag is undetermined if it's value is {@code "und"}. If a language is empty, or not well-formed (for example, "a" or "e2"), it will be
217 * equal to {@code "und"}.
218 * </p>
219 *
220 * @param locale the locale to test.
221 * @return whether a Locale's language is undetermined.
222 * @see Locale#toLanguageTag()
223 * @since 3.14.0
224 */
225 public static boolean isLanguageUndetermined(final Locale locale) {
226 return locale == null || UNDETERMINED.equals(locale.toLanguageTag());
227 }
228
229 /**
230 * TestsNo whether the given String is a UN M.49 numeric area code.
231 *
232 * @param str the String to check.
233 * @return true, is the given String is a UN M.49 numeric area code.
234 */
235 private static boolean isNumericAreaCode(final String str) {
236 return StringUtils.isNumeric(str) && isAlpha3Len(str);
237 }
238
239 /**
240 * Obtains the list of languages supported for a given country.
241 *
242 * <p>
243 * This method takes a country code and searches to find the languages available for that country. Variant locales are removed.
244 * </p>
245 *
246 * @param countryCode the 2-letter country code, null returns empty.
247 * @return an unmodifiable List of Locale objects, not null.
248 */
249 public static List<Locale> languagesByCountry(final String countryCode) {
250 if (countryCode == null) {
251 return Collections.emptyList();
252 }
253 return cLanguagesByCountry.computeIfAbsent(countryCode,
254 k -> Collections.unmodifiableList(availableLocaleList(locale -> countryCode.equals(locale.getCountry()) && hasVariant(locale))));
255 }
256
257 /**
258 * Obtains the list of locales to search through when performing a locale search.
259 *
260 * <pre>
261 * localeLookupList(Locale("fr", "CA", "xxx"))
262 * = [Locale("fr", "CA", "xxx"), Locale("fr", "CA"), Locale("fr")]
263 * </pre>
264 *
265 * @param locale the locale to start from.
266 * @return the unmodifiable list of Locale objects, 0 being locale, not null.
267 */
268 public static List<Locale> localeLookupList(final Locale locale) {
269 return localeLookupList(locale, locale);
270 }
271
272 /**
273 * Obtains the list of locales to search through when performing a locale search.
274 *
275 * <pre>
276 * localeLookupList(Locale("fr", "CA", "xxx"), Locale("en"))
277 * = [Locale("fr", "CA", "xxx"), Locale("fr", "CA"), Locale("fr"), Locale("en"]
278 * </pre>
279 *
280 * <p>
281 * The result list begins with the most specific locale, then the next more general and so on, finishing with the default locale. The list will never
282 * contain the same locale twice.
283 * </p>
284 *
285 * @param locale the locale to start from, null returns empty list.
286 * @param defaultLocale the default locale to use if no other is found.
287 * @return the unmodifiable list of Locale objects, 0 being locale, not null.
288 */
289 public static List<Locale> localeLookupList(final Locale locale, final Locale defaultLocale) {
290 final List<Locale> list = new ArrayList<>(4);
291 if (locale != null) {
292 list.add(locale);
293 if (!hasVariant(locale)) {
294 list.add(new Locale(locale.getLanguage(), locale.getCountry()));
295 }
296 if (!hasCountry(locale)) {
297 list.add(new Locale(locale.getLanguage(), StringUtils.EMPTY));
298 }
299 if (!list.contains(defaultLocale)) {
300 list.add(defaultLocale);
301 }
302 }
303 return Collections.unmodifiableList(list);
304 }
305
306 /**
307 * Creates new {@linkplain Locale} for the given country.
308 *
309 * @param country An ISO 3166 alpha-2 country code or a UN M.49 numeric-3 area code. See the {@linkplain Locale} class description about valid country
310 * values.
311 * @throws NullPointerException thrown if either argument is null.
312 * @return a new new Locale for the given country.
313 * @see Locale#Locale(String, String)
314 */
315 static Locale ofCountry(final String country) {
316 return new Locale(StringUtils.EMPTY, country);
317 }
318
319 /**
320 * Tries to parse a Locale from the given String.
321 * <p>
322 * See {@link Locale} for the format.
323 * </p>
324 *
325 * @param str the String to parse as a Locale.
326 * @return a Locale parsed from the given String.
327 * @throws IllegalArgumentException if the given String cannot be parsed.
328 * @see Locale
329 */
330 private static Locale parseLocale(final String str) {
331 if (isISO639LanguageCode(str)) {
332 return new Locale(str);
333 }
334 final int limit = 3;
335 final char separator = str.indexOf(UNDERSCORE) != -1 ? UNDERSCORE : DASH;
336 final String[] segments = str.split(String.valueOf(separator), 3);
337 final String language = segments[0];
338 if (segments.length == 2) {
339 final String country = segments[1];
340 if (isISO639LanguageCode(language) && isISO3166CountryCode(country) || isNumericAreaCode(country)) {
341 return new Locale(language, country);
342 }
343 } else if (segments.length == limit) {
344 final String country = segments[1];
345 final String variant = segments[2];
346 if (isISO639LanguageCode(language) && (country.isEmpty() || isISO3166CountryCode(country) || isNumericAreaCode(country)) && !variant.isEmpty()) {
347 return new Locale(language, country, variant);
348 }
349 }
350 if (ArrayUtils.contains(Locale.getISOCountries(), str)) {
351 return new Locale(StringUtils.EMPTY, str);
352 }
353 throw new IllegalArgumentException("Invalid locale format: " + str);
354 }
355
356 /**
357 * Returns the given locale if non-{@code null}, otherwise {@link Locale#getDefault()}.
358 *
359 * @param locale a locale or {@code null}.
360 * @return the given locale if non-{@code null}, otherwise {@link Locale#getDefault()}.
361 * @since 3.12.0
362 */
363 public static Locale toLocale(final Locale locale) {
364 return locale != null ? locale : Locale.getDefault();
365 }
366
367 /**
368 * Converts a String to a Locale.
369 *
370 * <p>
371 * This method takes the string format of a locale and creates the locale object from it.
372 * </p>
373 *
374 * <pre>
375 * LocaleUtils.toLocale("") = new Locale("", "")
376 * LocaleUtils.toLocale("en") = new Locale("en", "")
377 * LocaleUtils.toLocale("en_GB") = new Locale("en", "GB")
378 * LocaleUtils.toLocale("en-GB") = new Locale("en", "GB")
379 * LocaleUtils.toLocale("en_001") = new Locale("en", "001")
380 * LocaleUtils.toLocale("en_GB_xxx") = new Locale("en", "GB", "xxx") (#)
381 * LocaleUtils.toLocale("US") = new Locale("", "US") // Because "US" is Locale.getISOCountries()
382 * </pre>
383 *
384 * <p>
385 * (#) The behavior of the JDK variant constructor changed between JDK1.3 and JDK1.4. In JDK1.3, the constructor upper cases the variant, in JDK1.4, it
386 * doesn't. Thus, the result from getVariant() may vary depending on your JDK.
387 * </p>
388 *
389 * <p>
390 * This method validates the input strictly. The language code must be lowercase. The country code must be uppercase. The separator must be an underscore or
391 * a dash. The length must be correct.
392 * </p>
393 *
394 * @param str the locale String to convert, null returns null.
395 * @return a Locale, null if null input.
396 * @throws IllegalArgumentException if the string is an invalid format.
397 * @see Locale#forLanguageTag(String)
398 * @see Locale#getISOCountries()
399 */
400 public static Locale toLocale(final String str) {
401 if (str == null) {
402 // TODO Should this return the default locale?
403 return null;
404 }
405 if (str.isEmpty()) { // LANG-941 - JDK 8 introduced an empty locale where all fields are blank
406 return new Locale(StringUtils.EMPTY, StringUtils.EMPTY);
407 }
408 if (str.contains("#")) { // LANG-879 - Cannot handle Java 7 script & extensions
409 throw new IllegalArgumentException("Invalid locale format: " + str);
410 }
411 final int len = str.length();
412 if (len < 2) {
413 throw new IllegalArgumentException("Invalid locale format: " + str);
414 }
415 final char ch0 = str.charAt(0);
416 if (ch0 == UNDERSCORE || ch0 == DASH) {
417 if (len < 3) {
418 throw new IllegalArgumentException("Invalid locale format: " + str);
419 }
420 final char ch1 = str.charAt(1);
421 final char ch2 = str.charAt(2);
422 if (!Character.isUpperCase(ch1) || !Character.isUpperCase(ch2)) {
423 throw new IllegalArgumentException("Invalid locale format: " + str);
424 }
425 if (len == 3) {
426 return new Locale(StringUtils.EMPTY, str.substring(1, 3));
427 }
428 if (len < 5) {
429 throw new IllegalArgumentException("Invalid locale format: " + str);
430 }
431 if (str.charAt(3) != ch0) {
432 throw new IllegalArgumentException("Invalid locale format: " + str);
433 }
434 return new Locale(StringUtils.EMPTY, str.substring(1, 3), str.substring(4));
435 }
436 return parseLocale(str);
437 }
438
439 /**
440 * {@link LocaleUtils} instances should NOT be constructed in standard programming. Instead, the class should be used as
441 * {@code LocaleUtils.toLocale("en_GB");}.
442 *
443 * <p>
444 * This constructor is public to permit tools that require a JavaBean instance to operate.
445 * </p>
446 *
447 * @deprecated TODO Make private in 4.0.
448 */
449 @Deprecated
450 public LocaleUtils() {
451 // empty
452 }
453 }