/****************************************************************************** * Compilation: javac UnicodeTest.java * Execution: java UnicodeTest * Dependencies: none * * This programs prints out all of the Unicode characters in the basic * multilingual plane (U+0000 to U+FFFF) in a table. It skips the * following types of characters: * - undefined * - control characters * - modifier symbols * - non-spacing marks * - Unicode formatting commands * - reserved for surrogate pairs * - reserved for private use * * * % java UnicodeTest * U+0020 ! " # $ % & ' ( ) * + , - . / * U+0030 0 1 2 3 4 5 6 7 8 9 : ; < = > ? * U+0040 @ A B C D E F G H I J K L M N O * U+0050 P Q R S T U V W X Y Z [ \ ] _ * U+0060 a b c d e f g h i j k l m n o * U+0070 p q r s t u v w x y z { | } ~ * U+00A0   ¡ ¢ £ ¤ ¥ ¦ § © ª « ¬ ® * U+00B0 ° ± ² ³ µ ¶ · ¹ º » ¼ ½ ¾ ¿ * U+00C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï * U+00D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß * U+00E0 à á â ã ä å æ ç è é ê ë ì í î ï * U+00F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ * U+0100 Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď ď * ... * * Depending on your system setup and font, not all of the Unicode * characters may display properly. * * Quirks: when printing certain Hebrew or Arabic characters, the * table may print right-to-left instead of left-to-right. * * For a description of Unicode terminology, see: * http://docs.oracle.com/javase/tutorial/i18n/text/terminology.html * * For the Character API, see: * http://docs.oracle.com/javase/7/docs/api/java/lang/Character.html * * To see what each Unicode character should look like, see: * http://www.fileformat.info/info/unicode/index.htm * http://www.fileformat.info/info/unicode/char/05D0/index.htm * * ******************************************************************************/ import edu.princeton.cs.algs4.StdOut; public class UnicodeTest { // number of Unicode characters to display per line private static final int CHARS_PER_LINE = 16; // number of Unicode characters to display (basic multilingual plane) private static final int MAX_CHAR = 65536; // do not instantiate private UnicodeTest() { } // Returns a string representation of the given codePoint, or a single // space if the codePoint should not be suppressed when printing. private static String toString(int codePoint) { if (!Character.isDefined(codePoint)) return " "; if (Character.isISOControl(codePoint)) return " "; if (Character.isWhitespace(codePoint)) return " "; // if (Character.isSurrogate(codePoint) return " "; // Java 1.7+ only if (Character.isLowSurrogate((char) codePoint)) return " "; // Java 1.5+ if (Character.isHighSurrogate((char) codePoint)) return " "; // Java 1.5+ switch(Character.getType(codePoint)) { case Character.MODIFIER_SYMBOL: return " "; case Character.CONTROL: return " "; case Character.MODIFIER_LETTER: return " "; case Character.NON_SPACING_MARK: return " "; case Character.FORMAT: return " "; case Character.PRIVATE_USE: return " "; default: return new String(Character.toChars(codePoint)); } } /** * Prints Unicode characters to standard output. */ public static void main(String[] args) { for (int line = 0; line < 2*Character.MAX_VALUE / CHARS_PER_LINE; line++) { StringBuilder buffer = new StringBuilder(); for (int i = 0; i < CHARS_PER_LINE; i++) { int codePoint = CHARS_PER_LINE*line + i; buffer.append(toString(codePoint) + " "); } String output = buffer.toString(); if (!output.trim().equals("")) { // U+202D is the Unicode override to force left-to-right direction // but doesn't seem to work with Unix more StdOut.printf("U+%04X %s\n", 16*line, output); } } } }