UTF-8 property files in Java 6

Using UTF-8 ResourceBundle

It is known, that Java 5 and below did not have any possibility to control how the data should be interpreted when it is loaded from .property files. That is because ResourceBundle uses InputStream for loading the data, which is not well suitable for UTF-8 data. But how to inject the UTF8-aware implementation into ResourceBundle? In Java 6 this is possible due to introduction of ResourceBundle.Control abstraction, which is responsible for actual loading of a bundle. Here it is only described how to make it:

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Locale;
import java.util.Properties;
import java.util.ResourceBundle;
 
import org.apache.commons.io.IOUtils;
 
/* **********************************************************************************
 * UTF bundle support
 * **********************************************************************************/
public class UTFPropertyResourceBundle extends ResourceBundle implements Serializable
{
    static class UTFPropertyResourceBundleControl extends ResourceBundle.Control
    {
        /**
         * This method is called from {@link ResourceBundle} in case of cache miss to actually load a bundle. This
         * method is called for different locale combinations (from most specific to less) until not-null
         * result is returned.
         *
         * @see java.util.ResourceBundle.Control#newBundle(String, Locale, String, ClassLoader, boolean)
         */
        @Override
        public ResourceBundle newBundle(String baseName, Locale locale, String format, ClassLoader loader, boolean reload)
        {
            if ("java.properties".equals(format))
            {
                final InputStream stream = loader.getResourceAsStream(toResourceName(toBundleName(baseName, locale), "properties"));
 
                if (stream != null)
                {
                    // Try to load the data from s stream:
                    try
                    {
                        return new UTFPropertyResourceBundle(stream);
                    }
                    catch (IOException e)
                    {
                        return null;
                    }
                }
            }
 
            return null;
        }
    }
 
    private static ResourceBundle.Control    utfPropertyResourceBundleControl    = new UTFPropertyResourceBundleControl();
 
    /**
     * The same as {@link ResourceBundle#getBundle(String, Locale)} which is declared as final, so we
     * cannot override it.
     */
    public static ResourceBundle getUTFBundle(String name, Locale locale)
    {
        return UTFPropertyResourceBundle.getBundle(name, locale, utfPropertyResourceBundleControl);
    }
 
    private final Properties properties = new Properties();
 
    /**
     * Creates a new UTFPropertyResourceBundle instance.
     */
    public UTFPropertyResourceBundle(InputStream stream) throws IOException {
        final InputStreamReader is = new InputStreamReader(stream, Charset.forName("UTF-8"));
 
        properties.load(is);
 
        IOUtils.closeQuietly(is);
    }
 
    /**
     * @see java.util.ResourceBundle#getKeys()
     */
    @Override
    public Enumeration<String> getKeys() {
        return Collections.enumeration(properties.stringPropertyNames());
    }
 
    /**
     * @see java.util.ResourceBundle#handleGetObject(String)
     */
    @Override
    protected Object handleGetObject(String key) {
        return properties.get(key);
    }
 
    /**
     * Remove from final version. Only for testing!
     */
    public static void main(String[] args)
    {
        final ResourceBundle bundle = UTFPropertyResourceBundle.getUTFBundle("test", Locale.getDefault());
 
        System.out.println("value=" + bundle.getString("key1"));
    }
}

With test.properties like this:

key1=value \
line break да + é + ô

the output will be

value line break да + é + ô

Using .property file conversion from UTF-8 to ASCII

The blow demonstrated Ant task for Maven2 shows how to make UTF-8 to ASCII conversion for all .utf8 files into .properties files.

One can use a separate folder for .utf8 properties, then there is no need to exclude them in line 21.

pom.xml

 1: <project
 2:     xmlns="http://maven.apache.org/POM/4.0.0"
 3:     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4:     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 5:
 6:     <modelVersion>4.0.0</modelVersion>
 7:
 8:     ...
 9:
10:     <build>
11:         <sourceDirectory>src/java</sourceDirectory>
12:
13:         <testSourceDirectory>test/java</testSourceDirectory>
14:
15:         <plugins>
16:             <plugin>
17:                 <groupId>org.apache.maven.plugins</groupId>
18:                 <artifactId>maven-war-plugin</artifactId>
19:                 <configuration>
20:                     <warSourceDirectory>src/webapp</warSourceDirectory>
21:                     <warSourceExcludes>**/*.utf8</warSourceExcludes>
22:                 </configuration>
23:             </plugin>
24:             <plugin>
25:                 <artifactId>maven-antrun-plugin</artifactId>
26:                 <version>1.1</version>
27:                 <executions>
28:                     <execution>
29:                         <phase>generate-sources</phase> <!-- see complete list here: http://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html#Lifecycle_Reference -->
30:                         <configuration>
31:                             <tasks>
32:                                 <path id="ant.classpath">
33:                                     <pathelement location="lib/ant/ant-nodeps.jar"/> <!-- contains org.apache.tools.ant.taskdefs.optional.Native2Ascii -->
34:                                     <pathelement location="${JAVA_HOME}/lib/tools.jar"/> <!-- contains sun.tools.native2ascii.Main -->
35:                                 </path>
36:                                 <taskdef name="native2ascii" classname="org.apache.tools.ant.taskdefs.optional.Native2Ascii">
37:                                     <classpath refid="ant.classpath"/>
38:                                 </taskdef>
39:                                 <native2ascii encoding="UTF-8" src="src/webapp/i18n/" dest="src/webapp/i18n/" includes="*.utf8" ext=".properties"/> <!-- or define a glob mapper, see http://ant.apache.org/manual/CoreTypes/mapper.html#identity-mapper -->
40:                             </tasks>
41:                         </configuration>
42:                         <goals>
43:                             <goal>run</goal>
44:                         </goals>
45:                     </execution>
46:                 </executions>
47:             </plugin>
48:         </plugins>
49:     </build>
50: </project>
programming/java/utf8_bundle.txt · Last modified: 2009/06/10 16:42 by dmitry
 
 
Recent changes RSS feed Driven by DokuWiki