001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language.bm;
019
020import java.util.Collections;
021import java.util.EnumMap;
022import java.util.HashSet;
023import java.util.Map;
024import java.util.NoSuchElementException;
025import java.util.Scanner;
026import java.util.Set;
027import java.util.stream.Collectors;
028
029import org.apache.commons.codec.Resources;
030
031/**
032 * Language codes.
033 * <p>
034 * Language codes are typically loaded from resource files. These are UTF-8
035 * encoded text files. They are systematically named following the pattern:
036 * </p>
037 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()}
038 * languages.txt</blockquote>
039 * <p>
040 * The format of these resources is the following:
041 * </p>
042 * <ul>
043 * <li><b>Language:</b> a single string containing no whitespace</li>
044 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text
045 * following on that line to be discarded as a comment.</li>
046 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start
047 * multi-line commenting mode. This will skip all content until a line ending in
048 * '*' and '/' is found.</li>
049 * <li><b>Blank lines:</b> All blank lines will be skipped.</li>
050 * </ul>
051 * <p>
052 * Ported from language.php
053 * </p>
054 * <p>
055 * This class is immutable and thread-safe.
056 * </p>
057 *
058 * @since 1.6
059 */
060public class Languages {
061    // Implementation note: This class is divided into two sections. The first part
062    // is a static factory interface that
063    // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in
064    // NameType.* as a list of supported
065    // languages, and a second part that provides instance methods for accessing
066    // this set for supported languages.
067
068    public static final String ANY = "any";
069
070    private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class);
071
072    /**
073     * No languages at all.
074     */
075    public static final LanguageSet NO_LANGUAGES = new LanguageSet() {
076        @Override
077        public boolean contains(final String language) {
078            return false;
079        }
080
081        @Override
082        public String getAny() {
083            throw new NoSuchElementException("Can't fetch any language from the empty language set.");
084        }
085
086        @Override
087        public boolean isEmpty() {
088            return true;
089        }
090
091        @Override
092        public boolean isSingleton() {
093            return false;
094        }
095
096        @Override
097        public LanguageSet restrictTo(final LanguageSet other) {
098            return this;
099        }
100
101        @Override
102        public LanguageSet merge(final LanguageSet other) {
103            return other;
104        }
105
106        @Override
107        public String toString() {
108            return "NO_LANGUAGES";
109        }
110    };
111
112    /**
113     * Any/all languages.
114     */
115    public static final LanguageSet ANY_LANGUAGE = new LanguageSet() {
116        @Override
117        public boolean contains(final String language) {
118            return true;
119        }
120
121        @Override
122        public String getAny() {
123            throw new NoSuchElementException("Can't fetch any language from the any language set.");
124        }
125
126        @Override
127        public boolean isEmpty() {
128            return false;
129        }
130
131        @Override
132        public boolean isSingleton() {
133            return false;
134        }
135
136        @Override
137        public LanguageSet restrictTo(final LanguageSet other) {
138            return other;
139        }
140
141        @Override
142        public LanguageSet merge(final LanguageSet other) {
143            return other;
144        }
145
146        @Override
147        public String toString() {
148            return "ANY_LANGUAGE";
149        }
150    };
151
152    private final Set<String> languages;
153
154    /**
155     * A set of languages.
156     */
157    public static abstract class LanguageSet {
158
159        public static LanguageSet from(final Set<String> langs) {
160            return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs);
161        }
162
163        public abstract boolean contains(String language);
164
165        public abstract String getAny();
166
167        public abstract boolean isEmpty();
168
169        public abstract boolean isSingleton();
170
171        public abstract LanguageSet restrictTo(LanguageSet other);
172
173        abstract LanguageSet merge(LanguageSet other);
174    }
175
176    /**
177     * Some languages, explicitly enumerated.
178     */
179    public static final class SomeLanguages extends LanguageSet {
180        private final Set<String> languages;
181
182        private SomeLanguages(final Set<String> languages) {
183            this.languages = Collections.unmodifiableSet(languages);
184        }
185
186        @Override
187        public boolean contains(final String language) {
188            return this.languages.contains(language);
189        }
190
191        @Override
192        public String getAny() {
193            return this.languages.iterator().next();
194        }
195
196        public Set<String> getLanguages() {
197            return this.languages;
198        }
199
200        @Override
201        public boolean isEmpty() {
202            return this.languages.isEmpty();
203        }
204
205        @Override
206        public boolean isSingleton() {
207            return this.languages.size() == 1;
208        }
209
210        @Override
211        public LanguageSet restrictTo(final LanguageSet other) {
212            if (other == NO_LANGUAGES) {
213                return other;
214            }
215            if (other == ANY_LANGUAGE) {
216                return this;
217            }
218            final SomeLanguages someLanguages = (SomeLanguages) other;
219            return from(languages.stream().filter(lang -> someLanguages.languages.contains(lang)).collect(Collectors.toSet()));
220        }
221
222        @Override
223        public LanguageSet merge(final LanguageSet other) {
224            if (other == NO_LANGUAGES) {
225                return this;
226            }
227            if (other == ANY_LANGUAGE) {
228                return other;
229            }
230            final SomeLanguages someLanguages = (SomeLanguages) other;
231            final Set<String> set = new HashSet<>(languages);
232            set.addAll(someLanguages.languages);
233            return from(set);
234        }
235
236        @Override
237        public String toString() {
238            return "Languages(" + languages.toString() + ")";
239        }
240
241    }
242
243    static {
244        for (final NameType s : NameType.values()) {
245            LANGUAGES.put(s, getInstance(langResourceName(s)));
246        }
247    }
248
249    public static Languages getInstance(final NameType nameType) {
250        return LANGUAGES.get(nameType);
251    }
252
253    public static Languages getInstance(final String languagesResourceName) {
254        // read languages list
255        final Set<String> ls = new HashSet<>();
256        try (final Scanner lsScanner = new Scanner(Resources.getInputStream(languagesResourceName),
257                ResourceConstants.ENCODING)) {
258            boolean inExtendedComment = false;
259            while (lsScanner.hasNextLine()) {
260                final String line = lsScanner.nextLine().trim();
261                if (inExtendedComment) {
262                    if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
263                        inExtendedComment = false;
264                    }
265                } else if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
266                    inExtendedComment = true;
267                } else if (!line.isEmpty()) {
268                    ls.add(line);
269                }
270            }
271            return new Languages(Collections.unmodifiableSet(ls));
272        }
273    }
274
275    private static String langResourceName(final NameType nameType) {
276        return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName());
277    }
278
279    private Languages(final Set<String> languages) {
280        this.languages = languages;
281    }
282
283    public Set<String> getLanguages() {
284        return this.languages;
285    }
286}