Generating Nepali Unicode Sequence For Characters

While I was working on a translation project for myself, I got into some localization phase. Since Nepali Characters are represented in unicode (u09xx), it was difficult to write Strings in this form. So, I wrote this class called unicode generator, where the input to this is common english keys used to write in nepali using romanized keyboard layout. This means, if you are going to write ‘Kushal’ in nepali, you would key in ‘kuSl’. Now, when you pass ‘kuSl’ to the format method of the following class, you will retrieve string like ‘u09xxu00xxu00xxu00xx’ where xx will be replaced by some number for correct characters. For instance, for the work ‘ka’ in Nepali, u0915 is the unicode. You can use this string to be displayed in GUI’s.

/**
 *UnicodeConverter.java
 *Converts from standard keyboard input to nepali unicode characters.
 *Written On: 12th November 2005
 *Author: Kushal Paudyal (kushalzone@gmail.com)
 * http://java.sanjaal.com
 */
package com.kushal.utilities;

import java.util.Vector;
import javax.swing.JOptionPane;

public class UnicodeConverter {
	/**
	 * Declare a vector that will store all nepali unicode characters
	 */
	Vector unicodeVector;
	/**
	 * Declare and define a standard string that holds the
	 * keyboard input fornepali unicode sets in ascending order.
	 * There are some charaters forwhich no keys are assigned on
	 * the keyboard for their input. I have useddummy characters
	 * or their unicode values themselves in preparing the standard
         * string of keysets.
	 */

	/*
	 * Current faults discovered 123456789
         * u0944 u0945 u0946 u0949 u094A
	 * u094E u094F u0951 u0953 u0954
	 */
	static String standard = " 1234567890VM:u0904HA["+
                  "{fFZu090Cu090Du090E]}u0911"
		+ "u0912OWkKgG
		+ "u0934vSzsh`~aiIuURu0944u0945u0946eEu0949"
		+ "u094Aow/u094Eu094Fu0951|u0953u0954u0958u0959";

	String language;

	public UnicodeConverter(String language) {
		this.language = language;
		if (language == "np") {
			prepareVector();
		}

	}

	/*
	 * This method will take a string formed by proper keying for text that has
	 * to be converted into unicode. It will read the string characterwise and
	 * generate the index of that character from the standard string. Using this
	 * index, it will generate a corresponding nepali unicode from the
	 * unicodeVector.
	 */
	public String format(String str) {
		if (language == "np") {

			String formattedStr = "";
			for (int i = 0; i < str.length(); i++) {
				char temp = str.charAt(i);
				int index = standard.indexOf(temp);
				if (index < 0) // handle non-nepali characters
				{
					System.out.println("no index found for " + temp);
					formattedStr += temp;
				} else {
					System.out.println("index for " + temp + " is:" + index);
					formattedStr += unicodeVector.elementAt(index);
				}

			}
			return formattedStr;
		} else
			return str;
	}

	public void prepareVector() {
		unicodeVector = new Vector();

		/*
		 * Keep all the nepali unicode characters in the vector
		 */
		unicodeVector.addElement(" "); // space
		unicodeVector.addElement("u0966"); // zero
		unicodeVector.addElement("u0967"); // one
		unicodeVector.addElement("u0968"); // two
		unicodeVector.addElement("u0969"); // three
		unicodeVector.addElement("u096A"); // four
		unicodeVector.addElement("u096B"); // five
		unicodeVector.addElement("u096C"); // six
		unicodeVector.addElement("u096D"); // seven
		unicodeVector.addElement("u096E"); // eight
		unicodeVector.addElement("u096F"); // nine
		unicodeVector.addElement("u0901");
		unicodeVector.addElement("u0902");
		unicodeVector.addElement("u0903");
		unicodeVector.addElement("u0904");
		unicodeVector.addElement("u0905"); // a
		unicodeVector.addElement("u0906"); // aa
		unicodeVector.addElement("u0907"); // i
		unicodeVector.addElement("u0908"); // ii
		unicodeVector.addElement("u0909"); // u
		unicodeVector.addElement("u090A"); // U
		unicodeVector.addElement("u090B"); // vocalic R (ri)
		unicodeVector.addElement("u090C"); // vocalic L (lri)
		unicodeVector.addElement("u090D"); // candra E
		unicodeVector.addElement("u090E"); // short E
		unicodeVector.addElement("u090F"); // E
		unicodeVector.addElement("u0910"); // AI
		unicodeVector.addElement("u0911"); // candra o
		unicodeVector.addElement("u0912"); // short o
		unicodeVector.addElement("u0913"); // O
		unicodeVector.addElement("u0914"); // AU
		unicodeVector.addElement("u0915"); // ka
		unicodeVector.addElement("u0916"); // kha
		unicodeVector.addElement("u0917"); // ga
		unicodeVector.addElement("u0918"); // gha
		unicodeVector.addElement("u0919"); // nga
		unicodeVector.addElement("u091A"); // cha
		unicodeVector.addElement("u091B"); // chha
		unicodeVector.addElement("u091C"); // ja
		unicodeVector.addElement("u091D"); // jha
		unicodeVector.addElement("u091E"); // nya
		unicodeVector.addElement("u091F"); // tta
		unicodeVector.addElement("u0920"); // ttha
		unicodeVector.addElement("u0921"); // dda
		unicodeVector.addElement("u0922"); // ddha
		unicodeVector.addElement("u0923"); // nna
		unicodeVector.addElement("u0924"); // ta
		unicodeVector.addElement("u0925"); // tha
		unicodeVector.addElement("u0926"); // da
		unicodeVector.addElement("u0927"); // dha
		unicodeVector.addElement("u0928"); // na
		unicodeVector.addElement("u0929"); // nna
		unicodeVector.addElement("u092A"); // pa
		unicodeVector.addElement("u092B"); // pha
		unicodeVector.addElement("u092C"); // ba
		unicodeVector.addElement("u092D"); // bha
		unicodeVector.addElement("u092E"); // ma
		unicodeVector.addElement("u092F"); // ya
		unicodeVector.addElement("u0930"); // ra
		unicodeVector.addElement("u0931"); // rra
		unicodeVector.addElement("u0932"); // la
		unicodeVector.addElement("u0933"); // lla
		unicodeVector.addElement("u0934"); // llla
		unicodeVector.addElement("u0935");
		unicodeVector.addElement("u0936");
		unicodeVector.addElement("u0937");
		unicodeVector.addElement("u0938");
		unicodeVector.addElement("u0939");
		unicodeVector.addElement("u093C");
		unicodeVector.addElement("u093D");
		unicodeVector.addElement("u093E");
		unicodeVector.addElement("u093F");
		unicodeVector.addElement("u0940");
		unicodeVector.addElement("u0941");
		unicodeVector.addElement("u0942");
		unicodeVector.addElement("u0943");
		unicodeVector.addElement("u0944");
		unicodeVector.addElement("u0945");
		unicodeVector.addElement("u0946");
		unicodeVector.addElement("u0947");
		unicodeVector.addElement("u0948");
		unicodeVector.addElement("u0949");
		unicodeVector.addElement("u094A");
		unicodeVector.addElement("u094B");
		unicodeVector.addElement("u094C");
		unicodeVector.addElement("u094D");
		unicodeVector.addElement("u094E");
		unicodeVector.addElement("u094F");
		unicodeVector.addElement("u0950");
		unicodeVector.addElement("u0951");
		unicodeVector.addElement("u0952");
		unicodeVector.addElement("u0953");
		unicodeVector.addElement("u0954");
		unicodeVector.addElement("u0958");
		unicodeVector.addElement("u0959");

	}

	/**
	 * Usage Example
	 */
	public static void main(String args[]) {
		UnicodeConverter generator = new UnicodeConverter("np");
		/*
		 * Usage: If you want to print mero naam Kushal ho, you would be keying
		 * in the following for romanized unicode keyborads mero nam kuSl ho
		 */
		// String test = generator.format("raz/q/riy"); // Rastriya
		String test = generator.format("mero nam kuSl ho");
		JOptionPane.showMessageDialog(null, test);
	}

}