001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.lang3;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collections;
023import java.util.Comparator;
024import java.util.LinkedHashSet;
025import java.util.List;
026import java.util.Locale;
027import java.util.Set;
028import java.util.concurrent.ConcurrentHashMap;
029import java.util.concurrent.ConcurrentMap;
030import java.util.function.Predicate;
031import java.util.stream.Collectors;
032
033/**
034 * Operations to assist when working with a {@link Locale}.
035 *
036 * <p>
037 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a {@code null} input. Each method documents its behavior in
038 * more detail.
039 * </p>
040 *
041 * @see Locale
042 * @since 2.2
043 */
044public class LocaleUtils {
045
046    /**
047     * Avoids synchronization, initializes on demand.
048     */
049    private static final class SyncAvoid {
050
051        /** Private unmodifiable and sorted list of available locales. */
052        private static final List<Locale> AVAILABLE_LOCALE_ULIST;
053
054        /** Private unmodifiable set of available locales. */
055        private static final Set<Locale> AVAILABLE_LOCALE_USET;
056        static {
057            AVAILABLE_LOCALE_ULIST = Collections
058                    .unmodifiableList(Arrays.asList(ArraySorter.sort(Locale.getAvailableLocales(), Comparator.comparing(Locale::toString))));
059            AVAILABLE_LOCALE_USET = Collections.unmodifiableSet(new LinkedHashSet<>(AVAILABLE_LOCALE_ULIST));
060        }
061    }
062
063    /**
064     * The underscore character {@code '}{@value}{@code '}.
065     */
066    private static final char UNDERSCORE = '_';
067
068    /**
069     * The undetermined language {@value}.
070     * <p>
071     * If a language is empty, or not <em>well-formed</am> (for example "a" or "e2"), {@link Locale#toLanguageTag()} will return {@code "und"} (Undetermined).
072     * </p>
073     *
074     * @see Locale#toLanguageTag()
075     */
076    private static final String UNDETERMINED = "und";
077
078    /**
079     * The dash character {@code '}{@value}{@code '}.
080     */
081    private static final char DASH = '-';
082
083    /**
084     * Concurrent map of language locales by country.
085     */
086    private static final ConcurrentMap<String, List<Locale>> cLanguagesByCountry = new ConcurrentHashMap<>();
087
088    /**
089     * Concurrent map of country locales by language.
090     */
091    private static final ConcurrentMap<String, List<Locale>> cCountriesByLanguage = new ConcurrentHashMap<>();
092
093    /**
094     * Obtains an unmodifiable and sorted list of installed locales.
095     *
096     * <p>
097     * This method is a wrapper around {@link Locale#getAvailableLocales()}. It is more efficient, as the JDK method must create a new array each time it is
098     * called.
099     * </p>
100     *
101     * @return the unmodifiable and sorted list of available locales.
102     */
103    public static List<Locale> availableLocaleList() {
104        return SyncAvoid.AVAILABLE_LOCALE_ULIST;
105    }
106
107    private static List<Locale> availableLocaleList(final Predicate<Locale> predicate) {
108        return availableLocaleList().stream().filter(predicate).collect(Collectors.toList());
109    }
110
111    /**
112     * Obtains an unmodifiable set of installed locales.
113     *
114     * <p>
115     * This method is a wrapper around {@link Locale#getAvailableLocales()}. It is more efficient, as the JDK method must create a new array each time it is
116     * called.
117     * </p>
118     *
119     * @return the unmodifiable set of available locales.
120     */
121    public static Set<Locale> availableLocaleSet() {
122        return SyncAvoid.AVAILABLE_LOCALE_USET;
123    }
124
125    /**
126     * Obtains the list of countries supported for a given language.
127     *
128     * <p>
129     * This method takes a language code and searches to find the countries available for that language. Variant locales are removed.
130     * </p>
131     *
132     * @param languageCode the 2 letter language code, null returns empty.
133     * @return an unmodifiable List of Locale objects, not null.
134     */
135    public static List<Locale> countriesByLanguage(final String languageCode) {
136        if (languageCode == null) {
137            return Collections.emptyList();
138        }
139        return cCountriesByLanguage.computeIfAbsent(languageCode, lc -> Collections
140                .unmodifiableList(availableLocaleList(locale -> languageCode.equals(locale.getLanguage()) && !hasCountry(locale) && hasVariant(locale))));
141    }
142
143    /**
144     * Tests whether the given Locale defines a variant.
145     *
146     * @param locale The Locale to test.
147     * @return whether the given Locale defines a variant.
148     */
149    private static boolean hasCountry(final Locale locale) {
150        return locale.getCountry().isEmpty();
151    }
152
153    /**
154     * Tests whether the given Locale defines a country.
155     *
156     * @param locale The Locale to test.
157     * @return whether the given Locale defines a country.
158     */
159    private static boolean hasVariant(final Locale locale) {
160        return locale.getVariant().isEmpty();
161    }
162
163    /**
164     * Tests whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-2 country code.
165     *
166     * @param str The string to test.
167     * @return whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-2 country code.
168     */
169    private static boolean isAlpha2Len(final String str) {
170        return str.length() == 2;
171    }
172
173    /**
174     * Tests whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-3 country code.
175     *
176     * @param str The string to test.
177     * @return whether the given string is the length of an <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-3 country code.
178     */
179    private static boolean isAlpha3Len(final String str) {
180        return str.length() == 3;
181    }
182
183    /**
184     * Checks if the locale specified is in the set of available locales.
185     *
186     * @param locale the Locale object to check if it is available.
187     * @return true if the locale is a known locale.
188     */
189    public static boolean isAvailableLocale(final Locale locale) {
190        return availableLocaleSet().contains(locale);
191    }
192
193    /**
194     * Tests whether the given String is a <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> alpha-2 country code.
195     *
196     * @param str the String to check.
197     * @return true, is the given String is a <a href="https://www.iso.org/iso-3166-country-codes.html">ISO 3166</a> compliant country code.
198     */
199    private static boolean isISO3166CountryCode(final String str) {
200        return StringUtils.isAllUpperCase(str) && isAlpha2Len(str);
201    }
202
203    /**
204     * Tests whether the given String is a <a href="https://www.iso.org/iso-639-language-code">ISO 639</a> compliant language code.
205     *
206     * @param str the String to check.
207     * @return true, if the given String is a <a href="https://www.iso.org/iso-639-language-code">ISO 639</a> compliant language code.
208     */
209    private static boolean isISO639LanguageCode(final String str) {
210        return StringUtils.isAllLowerCase(str) && (isAlpha2Len(str) || isAlpha3Len(str));
211    }
212
213    /**
214     * Tests whether a Locale's language is undetermined.
215     * <p>
216     * A Locale's language tag is undetermined if it's value is {@code "und"}. If a language is empty, or not well-formed (for example, "a" or "e2"), it will be
217     * equal to {@code "und"}.
218     * </p>
219     *
220     * @param locale the locale to test.
221     * @return whether a Locale's language is undetermined.
222     * @see Locale#toLanguageTag()
223     * @since 3.14.0
224     */
225    public static boolean isLanguageUndetermined(final Locale locale) {
226        return locale == null || UNDETERMINED.equals(locale.toLanguageTag());
227    }
228
229    /**
230     * TestsNo whether the given String is a UN M.49 numeric area code.
231     *
232     * @param str the String to check.
233     * @return true, is the given String is a UN M.49 numeric area code.
234     */
235    private static boolean isNumericAreaCode(final String str) {
236        return StringUtils.isNumeric(str) && isAlpha3Len(str);
237    }
238
239    /**
240     * Obtains the list of languages supported for a given country.
241     *
242     * <p>
243     * This method takes a country code and searches to find the languages available for that country. Variant locales are removed.
244     * </p>
245     *
246     * @param countryCode the 2-letter country code, null returns empty.
247     * @return an unmodifiable List of Locale objects, not null.
248     */
249    public static List<Locale> languagesByCountry(final String countryCode) {
250        if (countryCode == null) {
251            return Collections.emptyList();
252        }
253        return cLanguagesByCountry.computeIfAbsent(countryCode,
254                k -> Collections.unmodifiableList(availableLocaleList(locale -> countryCode.equals(locale.getCountry()) && hasVariant(locale))));
255    }
256
257    /**
258     * Obtains the list of locales to search through when performing a locale search.
259     *
260     * <pre>
261     * localeLookupList(Locale("fr", "CA", "xxx"))
262     *   = [Locale("fr", "CA", "xxx"), Locale("fr", "CA"), Locale("fr")]
263     * </pre>
264     *
265     * @param locale the locale to start from.
266     * @return the unmodifiable list of Locale objects, 0 being locale, not null.
267     */
268    public static List<Locale> localeLookupList(final Locale locale) {
269        return localeLookupList(locale, locale);
270    }
271
272    /**
273     * Obtains the list of locales to search through when performing a locale search.
274     *
275     * <pre>
276     * localeLookupList(Locale("fr", "CA", "xxx"), Locale("en"))
277     *   = [Locale("fr", "CA", "xxx"), Locale("fr", "CA"), Locale("fr"), Locale("en"]
278     * </pre>
279     *
280     * <p>
281     * The result list begins with the most specific locale, then the next more general and so on, finishing with the default locale. The list will never
282     * contain the same locale twice.
283     * </p>
284     *
285     * @param locale        the locale to start from, null returns empty list.
286     * @param defaultLocale the default locale to use if no other is found.
287     * @return the unmodifiable list of Locale objects, 0 being locale, not null.
288     */
289    public static List<Locale> localeLookupList(final Locale locale, final Locale defaultLocale) {
290        final List<Locale> list = new ArrayList<>(4);
291        if (locale != null) {
292            list.add(locale);
293            if (!hasVariant(locale)) {
294                list.add(new Locale(locale.getLanguage(), locale.getCountry()));
295            }
296            if (!hasCountry(locale)) {
297                list.add(new Locale(locale.getLanguage(), StringUtils.EMPTY));
298            }
299            if (!list.contains(defaultLocale)) {
300                list.add(defaultLocale);
301            }
302        }
303        return Collections.unmodifiableList(list);
304    }
305
306    /**
307     * Creates new {@linkplain Locale} for the given country.
308     *
309     * @param country An ISO 3166 alpha-2 country code or a UN M.49 numeric-3 area code. See the {@linkplain Locale} class description about valid country
310     *                values.
311     * @throws NullPointerException thrown if either argument is null.
312     * @return a new new Locale for the given country.
313     * @see Locale#Locale(String, String)
314     */
315    static Locale ofCountry(final String country) {
316        return new Locale(StringUtils.EMPTY, country);
317    }
318
319    /**
320     * Tries to parse a Locale from the given String.
321     * <p>
322     * See {@link Locale} for the format.
323     * </p>
324     *
325     * @param str the String to parse as a Locale.
326     * @return a Locale parsed from the given String.
327     * @throws IllegalArgumentException if the given String cannot be parsed.
328     * @see Locale
329     */
330    private static Locale parseLocale(final String str) {
331        if (isISO639LanguageCode(str)) {
332            return new Locale(str);
333        }
334        final int limit = 3;
335        final char separator = str.indexOf(UNDERSCORE) != -1 ? UNDERSCORE : DASH;
336        final String[] segments = str.split(String.valueOf(separator), 3);
337        final String language = segments[0];
338        if (segments.length == 2) {
339            final String country = segments[1];
340            if (isISO639LanguageCode(language) && isISO3166CountryCode(country) || isNumericAreaCode(country)) {
341                return new Locale(language, country);
342            }
343        } else if (segments.length == limit) {
344            final String country = segments[1];
345            final String variant = segments[2];
346            if (isISO639LanguageCode(language) && (country.isEmpty() || isISO3166CountryCode(country) || isNumericAreaCode(country)) && !variant.isEmpty()) {
347                return new Locale(language, country, variant);
348            }
349        }
350        if (ArrayUtils.contains(Locale.getISOCountries(), str)) {
351            return new Locale(StringUtils.EMPTY, str);
352        }
353        throw new IllegalArgumentException("Invalid locale format: " + str);
354    }
355
356    /**
357     * Returns the given locale if non-{@code null}, otherwise {@link Locale#getDefault()}.
358     *
359     * @param locale a locale or {@code null}.
360     * @return the given locale if non-{@code null}, otherwise {@link Locale#getDefault()}.
361     * @since 3.12.0
362     */
363    public static Locale toLocale(final Locale locale) {
364        return locale != null ? locale : Locale.getDefault();
365    }
366
367    /**
368     * Converts a String to a Locale.
369     *
370     * <p>
371     * This method takes the string format of a locale and creates the locale object from it.
372     * </p>
373     *
374     * <pre>
375     *   LocaleUtils.toLocale("")           = new Locale("", "")
376     *   LocaleUtils.toLocale("en")         = new Locale("en", "")
377     *   LocaleUtils.toLocale("en_GB")      = new Locale("en", "GB")
378     *   LocaleUtils.toLocale("en-GB")      = new Locale("en", "GB")
379     *   LocaleUtils.toLocale("en_001")     = new Locale("en", "001")
380     *   LocaleUtils.toLocale("en_GB_xxx")  = new Locale("en", "GB", "xxx")   (#)
381     *   LocaleUtils.toLocale("US")         = new Locale("", "US") // Because "US" is Locale.getISOCountries()
382     * </pre>
383     *
384     * <p>
385     * (#) The behavior of the JDK variant constructor changed between JDK1.3 and JDK1.4. In JDK1.3, the constructor upper cases the variant, in JDK1.4, it
386     * doesn't. Thus, the result from getVariant() may vary depending on your JDK.
387     * </p>
388     *
389     * <p>
390     * This method validates the input strictly. The language code must be lowercase. The country code must be uppercase. The separator must be an underscore or
391     * a dash. The length must be correct.
392     * </p>
393     *
394     * @param str the locale String to convert, null returns null.
395     * @return a Locale, null if null input.
396     * @throws IllegalArgumentException if the string is an invalid format.
397     * @see Locale#forLanguageTag(String)
398     * @see Locale#getISOCountries()
399     */
400    public static Locale toLocale(final String str) {
401        if (str == null) {
402            // TODO Should this return the default locale?
403            return null;
404        }
405        if (str.isEmpty()) { // LANG-941 - JDK 8 introduced an empty locale where all fields are blank
406            return new Locale(StringUtils.EMPTY, StringUtils.EMPTY);
407        }
408        if (str.contains("#")) { // LANG-879 - Cannot handle Java 7 script & extensions
409            throw new IllegalArgumentException("Invalid locale format: " + str);
410        }
411        final int len = str.length();
412        if (len < 2) {
413            throw new IllegalArgumentException("Invalid locale format: " + str);
414        }
415        final char ch0 = str.charAt(0);
416        if (ch0 == UNDERSCORE || ch0 == DASH) {
417            if (len < 3) {
418                throw new IllegalArgumentException("Invalid locale format: " + str);
419            }
420            final char ch1 = str.charAt(1);
421            final char ch2 = str.charAt(2);
422            if (!Character.isUpperCase(ch1) || !Character.isUpperCase(ch2)) {
423                throw new IllegalArgumentException("Invalid locale format: " + str);
424            }
425            if (len == 3) {
426                return new Locale(StringUtils.EMPTY, str.substring(1, 3));
427            }
428            if (len < 5) {
429                throw new IllegalArgumentException("Invalid locale format: " + str);
430            }
431            if (str.charAt(3) != ch0) {
432                throw new IllegalArgumentException("Invalid locale format: " + str);
433            }
434            return new Locale(StringUtils.EMPTY, str.substring(1, 3), str.substring(4));
435        }
436        return parseLocale(str);
437    }
438
439    /**
440     * {@link LocaleUtils} instances should NOT be constructed in standard programming. Instead, the class should be used as
441     * {@code LocaleUtils.toLocale("en_GB");}.
442     *
443     * <p>
444     * This constructor is public to permit tools that require a JavaBean instance to operate.
445     * </p>
446     *
447     * @deprecated TODO Make private in 4.0.
448     */
449    @Deprecated
450    public LocaleUtils() {
451        // empty
452    }
453}