====== UTF-8 property files in Java 6 ====== ===== Using UTF-8 ''ResourceBundle'' ===== It is known, that Java 5 and below did not have any possibility to control how the data should be interpreted when it is loaded from ''.property'' files. That is because ''ResourceBundle'' uses ''InputStream'' for loading the data, which is not well suitable for UTF-8 data. But how to inject the UTF8-aware implementation into ''ResourceBundle''? In Java 6 this is possible due to introduction of ''ResourceBundle.Control'' abstraction, which is responsible for actual loading of a bundle. Here it is only described how to make it: import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Serializable; import java.util.Enumeration; import java.util.Locale; import java.util.Properties; import java.util.ResourceBundle; import org.apache.commons.io.IOUtils; /* ********************************************************************************** * UTF bundle support * **********************************************************************************/ public class UTFPropertyResourceBundle extends ResourceBundle implements Serializable { static class UTFPropertyResourceBundleControl extends ResourceBundle.Control { /** * This method is called from {@link ResourceBundle} in case of cache miss to actually load a bundle. This * method is called for different locale combinations (from most specific to less) until not-null * result is returned. * * @see java.util.ResourceBundle.Control#newBundle(String, Locale, String, ClassLoader, boolean) */ @Override public ResourceBundle newBundle(String baseName, Locale locale, String format, ClassLoader loader, boolean reload) { if ("java.properties".equals(format)) { final InputStream stream = loader.getResourceAsStream(toResourceName(toBundleName(baseName, locale), "properties")); if (stream != null) { // Try to load the data from s stream: try { return new UTFPropertyResourceBundle(stream); } catch (IOException e) { return null; } } } return null; } } private static ResourceBundle.Control utfPropertyResourceBundleControl = new UTFPropertyResourceBundleControl(); /** * The same as {@link ResourceBundle#getBundle(String, Locale)} which is declared as final, so we * cannot override it. */ public static ResourceBundle getUTFBundle(String name, Locale locale) { return UTFPropertyResourceBundle.getBundle(name, locale, utfPropertyResourceBundleControl); } private final Properties properties = new Properties(); /** * Creates a new UTFPropertyResourceBundle instance. */ public UTFPropertyResourceBundle(InputStream stream) throws IOException { final InputStreamReader is = new InputStreamReader(stream, Charset.forName("UTF-8")); properties.load(is); IOUtils.closeQuietly(is); } /** * @see java.util.ResourceBundle#getKeys() */ @Override public Enumeration getKeys() { return Collections.enumeration(properties.stringPropertyNames()); } /** * @see java.util.ResourceBundle#handleGetObject(String) */ @Override protected Object handleGetObject(String key) { return properties.get(key); } /** * Remove from final version. Only for testing! */ public static void main(String[] args) { final ResourceBundle bundle = UTFPropertyResourceBundle.getUTFBundle("test", Locale.getDefault()); System.out.println("value=" + bundle.getString("key1")); } } With ''test.properties'' like this: key1=value \ line break да + é + ô the output will be value line break да + é + ô ===== Using ''.property'' file conversion from UTF-8 to ASCII ===== The blow demonstrated Ant task for Maven2 shows how to make UTF-8 to ASCII conversion for all ''.utf8'' files into ''.properties'' files. One can use a separate folder for ''.utf8'' properties, then there is no need to exclude them in line 21. 4.0.0 ... src/java test/java org.apache.maven.plugins maven-war-plugin src/webapp **/*.utf8 maven-antrun-plugin 1.1 generate-sources run {{tag>UTF maven}}