001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.language.bm; 019 020import java.util.Collections; 021import java.util.EnumMap; 022import java.util.HashSet; 023import java.util.Map; 024import java.util.NoSuchElementException; 025import java.util.Scanner; 026import java.util.Set; 027import java.util.stream.Collectors; 028 029import org.apache.commons.codec.Resources; 030 031/** 032 * Language codes. 033 * <p> 034 * Language codes are typically loaded from resource files. These are UTF-8 035 * encoded text files. They are systematically named following the pattern: 036 * </p> 037 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} 038 * languages.txt</blockquote> 039 * <p> 040 * The format of these resources is the following: 041 * </p> 042 * <ul> 043 * <li><b>Language:</b> a single string containing no whitespace</li> 044 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text 045 * following on that line to be discarded as a comment.</li> 046 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start 047 * multi-line commenting mode. This will skip all content until a line ending in 048 * '*' and '/' is found.</li> 049 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> 050 * </ul> 051 * <p> 052 * Ported from language.php 053 * </p> 054 * <p> 055 * This class is immutable and thread-safe. 056 * </p> 057 * 058 * @since 1.6 059 */ 060public class Languages { 061 // Implementation note: This class is divided into two sections. The first part 062 // is a static factory interface that 063 // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in 064 // NameType.* as a list of supported 065 // languages, and a second part that provides instance methods for accessing 066 // this set for supported languages. 067 068 public static final String ANY = "any"; 069 070 private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class); 071 072 /** 073 * No languages at all. 074 */ 075 public static final LanguageSet NO_LANGUAGES = new LanguageSet() { 076 @Override 077 public boolean contains(final String language) { 078 return false; 079 } 080 081 @Override 082 public String getAny() { 083 throw new NoSuchElementException("Can't fetch any language from the empty language set."); 084 } 085 086 @Override 087 public boolean isEmpty() { 088 return true; 089 } 090 091 @Override 092 public boolean isSingleton() { 093 return false; 094 } 095 096 @Override 097 public LanguageSet restrictTo(final LanguageSet other) { 098 return this; 099 } 100 101 @Override 102 public LanguageSet merge(final LanguageSet other) { 103 return other; 104 } 105 106 @Override 107 public String toString() { 108 return "NO_LANGUAGES"; 109 } 110 }; 111 112 /** 113 * Any/all languages. 114 */ 115 public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { 116 @Override 117 public boolean contains(final String language) { 118 return true; 119 } 120 121 @Override 122 public String getAny() { 123 throw new NoSuchElementException("Can't fetch any language from the any language set."); 124 } 125 126 @Override 127 public boolean isEmpty() { 128 return false; 129 } 130 131 @Override 132 public boolean isSingleton() { 133 return false; 134 } 135 136 @Override 137 public LanguageSet restrictTo(final LanguageSet other) { 138 return other; 139 } 140 141 @Override 142 public LanguageSet merge(final LanguageSet other) { 143 return other; 144 } 145 146 @Override 147 public String toString() { 148 return "ANY_LANGUAGE"; 149 } 150 }; 151 152 private final Set<String> languages; 153 154 /** 155 * A set of languages. 156 */ 157 public static abstract class LanguageSet { 158 159 public static LanguageSet from(final Set<String> langs) { 160 return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs); 161 } 162 163 public abstract boolean contains(String language); 164 165 public abstract String getAny(); 166 167 public abstract boolean isEmpty(); 168 169 public abstract boolean isSingleton(); 170 171 public abstract LanguageSet restrictTo(LanguageSet other); 172 173 abstract LanguageSet merge(LanguageSet other); 174 } 175 176 /** 177 * Some languages, explicitly enumerated. 178 */ 179 public static final class SomeLanguages extends LanguageSet { 180 private final Set<String> languages; 181 182 private SomeLanguages(final Set<String> languages) { 183 this.languages = Collections.unmodifiableSet(languages); 184 } 185 186 @Override 187 public boolean contains(final String language) { 188 return this.languages.contains(language); 189 } 190 191 @Override 192 public String getAny() { 193 return this.languages.iterator().next(); 194 } 195 196 public Set<String> getLanguages() { 197 return this.languages; 198 } 199 200 @Override 201 public boolean isEmpty() { 202 return this.languages.isEmpty(); 203 } 204 205 @Override 206 public boolean isSingleton() { 207 return this.languages.size() == 1; 208 } 209 210 @Override 211 public LanguageSet restrictTo(final LanguageSet other) { 212 if (other == NO_LANGUAGES) { 213 return other; 214 } 215 if (other == ANY_LANGUAGE) { 216 return this; 217 } 218 final SomeLanguages someLanguages = (SomeLanguages) other; 219 return from(languages.stream().filter(lang -> someLanguages.languages.contains(lang)).collect(Collectors.toSet())); 220 } 221 222 @Override 223 public LanguageSet merge(final LanguageSet other) { 224 if (other == NO_LANGUAGES) { 225 return this; 226 } 227 if (other == ANY_LANGUAGE) { 228 return other; 229 } 230 final SomeLanguages someLanguages = (SomeLanguages) other; 231 final Set<String> set = new HashSet<>(languages); 232 set.addAll(someLanguages.languages); 233 return from(set); 234 } 235 236 @Override 237 public String toString() { 238 return "Languages(" + languages.toString() + ")"; 239 } 240 241 } 242 243 static { 244 for (final NameType s : NameType.values()) { 245 LANGUAGES.put(s, getInstance(langResourceName(s))); 246 } 247 } 248 249 public static Languages getInstance(final NameType nameType) { 250 return LANGUAGES.get(nameType); 251 } 252 253 public static Languages getInstance(final String languagesResourceName) { 254 // read languages list 255 final Set<String> ls = new HashSet<>(); 256 try (final Scanner lsScanner = new Scanner(Resources.getInputStream(languagesResourceName), 257 ResourceConstants.ENCODING)) { 258 boolean inExtendedComment = false; 259 while (lsScanner.hasNextLine()) { 260 final String line = lsScanner.nextLine().trim(); 261 if (inExtendedComment) { 262 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { 263 inExtendedComment = false; 264 } 265 } else if (line.startsWith(ResourceConstants.EXT_CMT_START)) { 266 inExtendedComment = true; 267 } else if (!line.isEmpty()) { 268 ls.add(line); 269 } 270 } 271 return new Languages(Collections.unmodifiableSet(ls)); 272 } 273 } 274 275 private static String langResourceName(final NameType nameType) { 276 return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName()); 277 } 278 279 private Languages(final Set<String> languages) { 280 this.languages = languages; 281 } 282 283 public Set<String> getLanguages() { 284 return this.languages; 285 } 286}