/* Wotonomy: OpenStep design patterns for pure Java applications. Copyright (C) 2000 Blacksmith, Inc. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, see http://www.gnu.org */ package net.wotonomy.foundation.internal; import java.util.*; //collections import java.io.*; /** * PropertyListParser can parse a property list (plist) file or string, and * return the top-level object represented by the plist. *

* * A property list is a hierarchical data structure containing only Maps, Lists, * and Strings -- nothing else. In other words, a property list is either a Map, * List, or String instance, with the restrictions that the collections may only * contain Map, List, or String instances. *

* * This class can read a particularly-formatted string or file, and create the * property list structure described. It provides a convenient means for having * a structured data file, letting programs simply deal with the structure * rather than having to do a lot of string parsing work as well. The concept is * similar to Properties files, except that the values can be nested Maps or * Lists instead of only Strings. *

* * A Map is specified in a file by key/value pairs surrounded by brace * characters. An equal sign (=) must be between the key and value, and there * must be a semicolon (;) following the value. * *

 *     {
 *         key1 = value1;
 *         key2 = value2;
 *         etc...
 *     }
 *

* * A List is specified by a comma-separated list of values surrounded by * parentheses, like: * *

 *     ( value1, value2, value3, etc... )
 *

* * A String can either be quoted in the manner of a constant string in Java, or * unquoted. If unquoted, the string can only contain alphanumerics, underscores * (_), periods (.), dollar signs ($), colons (:), or forward slashes (/). If * any other character appears in the string, it must be quoted (i.e., * surrounded by " characters). Quoted strings may also contain \n, \t, \f, * \v, \b, and \a escapes, octal escapes of the form \000, and unicode escapes * of the form of \U followed by four hexadecimal characters. Any other * character escaped by a backslash will be treated as that character, and the * escaping backslash character will be omitted. Thus, to represent an actual * backslash, it must appear as \\ in the quoted string. *

* * All whitespace between elements is ignored, and both //-style and /*-style * comments are allowed to appear anywhere between elements. *

* * If there are any syntax errors encountered while parsing, RuntimeExceptions * are thrown with the line number and column of the problem. *

* * Currently, HashMaps and ArrayLists are the actual Map and List classes used * when creating the property list. *

* * Examples: *

* *

   // This plist file represents a Map, since it starts with a '{'.
   {
       Map1 = { subkey1 = "foo"; };
       Map2 =
       {
           "key1"  = "This is a quoted string.";
           "key 2" = "bar\nbaz";    // the value has a newline in it
           key3    = ("a", b, c, "quux quux");   // a List of four Strings
       };  // We need a semicolon here, since it's following the value of the "Map2" key

       List1 = (foobar,foobaz,"foo,baz", (aa, ab, ac)); // a List of 3 Strings and a List

       // And now a List of two Maps
       List2 = (
           {
               key1 = value1;
               key2 = "value 2";
               key3 = (a,b,c,d);
               key4 = ();
           },  // We need the comma here
           {
               key1 = {};  // an empty Map
               key2 = "another String value";
           }
       );
   }
 *

* *

For those wondering, this is essentially a re-implementation of * NeXT/Apple's property lists, except that data values are not supported. * * @author clindberg@blacksmith.com * @version $Revision: 899 $ */ public class PropertyListParser { private char buffer[]; private int currIndex; private int lineNumber; private int currLineStartIndex; /** * Reads an object (String, List, or Map) from plistString and returns it. * RuntimeExceptions are raised if there are parse problems. */ public static Object propertyListFromString(String plistString) { PropertyListParser parser = new PropertyListParser(plistString); return parser.readTopLevelObject(); } /** * Reads all remaining characters from the Reader, and returns the result of * propertyListFromString(). RuntimeExceptions are raised if there are parse * problems */ public static Object propertyListFromReader(Reader reader) throws IOException { char charBuffer[] = new char[2048]; StringBuffer stringBuffer = new StringBuffer(); int numRead = 0; while (numRead >= 0) { numRead = reader.read(charBuffer); if (numRead > 0) stringBuffer.append(charBuffer, 0, numRead); } return propertyListFromString(stringBuffer.toString()); } /** * Reads the contents of the specified file, and parses the contents. If any * error occurs, prints out a message using System.out.println() and returns * null. */ public static Object propertyListFromFile(String filename) { try { FileInputStream stream = new FileInputStream(filename); return propertyListFromReader(new InputStreamReader(stream)); } catch (Exception exception) { String errorMessage = exception.getMessage(); System.out.println("Error parsing property list from " + filename + ": " + errorMessage); } return null; } /** * Creates a new PropertyListParser to parse the contents of the specified * String. */ public PropertyListParser(String plistString) { this(plistString.toCharArray()); } /** * Creates a new PropertyListParser to parse the specified char array. */ public PropertyListParser(char[] charArray) { buffer = charArray; lineNumber = 1; currLineStartIndex = 1; currIndex = 0; } public Object readTopLevelObject() { Object plist = readObject(); skipCommentWhitespace(); if (!isAtEnd()) { throwParseException( "Extra characters in plist string after parsing object. A plist should only contain one top-level object."); } return plist; } private void throwParseException(String errorMessage) { int column = currIndex - currLineStartIndex + 1; throw new RuntimeException(errorMessage + " (Line " + lineNumber + ", column " + column + ")"); } private void updateLineNumberWithIndex(int lineStartIndex) { lineNumber++; currLineStartIndex = lineStartIndex; } private boolean isAtEnd() { return currIndex >= buffer.length; } private void skipDoubleslashComment() { while (!isAtEnd() && buffer[currIndex] != '\n') { currIndex++; } } private void skipStandardCComment() { currIndex++; // skip over the starting '/' while (!isAtEnd()) { if (buffer[currIndex] == '\n') updateLineNumberWithIndex(currIndex + 1); currIndex++; if (buffer[currIndex - 2] == '*' && buffer[currIndex - 1] == '/') { return; } } throwParseException("Input exhausted while parsing comment"); } private void skipWhitespace() { while (!isAtEnd() && isWhitespace(buffer[currIndex])) { if (buffer[currIndex] == '\n') updateLineNumberWithIndex(currIndex + 1); currIndex++; } } private void skipCommentWhitespace() { boolean done = false; while (!done) { done = true; skipWhitespace(); if ((buffer.length - currIndex) > 1 && buffer[currIndex] == '/') { if (buffer[currIndex + 1] == '/') { done = false; // iterate again skipDoubleslashComment(); } else if (buffer[currIndex + 1] == '*') { done = false; // iterate again skipStandardCComment(); } } } } private Object readObject() { skipCommentWhitespace(); if (isAtEnd()) return null; // Data (i.e. byte[]) not supported if (buffer[currIndex] == '"') return readQuotedString(); if (buffer[currIndex] == '(') return readList(); if (buffer[currIndex] == '{') return readMap(); return readUnquotedString(); } private static final byte valueForHexDigit(char c) { if (c >= '0' && c <= '9') return (byte) (c - '0'); if (c >= 'a' && c <= 'f') return (byte) ((c - 'a') + 10); if (c >= 'A' && c <= 'F') return (byte) ((c - 'A') + 10); return 0; } private static final boolean isOctalDigit(char c) { return c >= '0' && c <= '7'; } private static final boolean isHexDigit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } private static String unquotedStringChars = "._$:/"; // chars allowed in unquoted strings private static String whitespaceChars = " \t\n\r\f"; private static final boolean isWhitespace(char c) { return whitespaceChars.indexOf(c) >= 0; } private static final boolean isValidUnquotedStringChar(char c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || unquotedStringChars.indexOf(c) >= 0); } private String readUnquotedString() { int startIndex = currIndex; while (!isAtEnd() && isValidUnquotedStringChar(buffer[currIndex])) currIndex++; if (startIndex == currIndex) throwParseException("No allowable characters found to parse unquoted string"); return new String(buffer, startIndex, currIndex - startIndex); } private String readQuotedString() { currIndex++; // skip over '"' StringBuffer stringBuffer = new StringBuffer(); int startIndex = currIndex; while (!isAtEnd() && buffer[currIndex] != '"') { if (buffer[currIndex] != '\\') { if (buffer[currIndex] == '\n') updateLineNumberWithIndex(currIndex + 1); /* * Just increment the index -- all these characters will be appended in chunks, * either before an escape sequence or at the end. */ currIndex++; } else // it's an escape { /* Append anything scanned past before the '\\' */ if (startIndex < currIndex) stringBuffer.append(buffer, startIndex, currIndex - startIndex); currIndex++; // skip over '\\' if (isAtEnd()) throwParseException("Input exhausted while parsing escape sequence"); switch (buffer[currIndex]) { case 't': stringBuffer.append('\t'); currIndex++; break; // tab case 'n': stringBuffer.append('\n'); currIndex++; break; // newline case 'r': stringBuffer.append('\r'); currIndex++; break; // carriage return case 'f': stringBuffer.append('\f'); currIndex++; break; // form feed case 'b': stringBuffer.append('\b'); currIndex++; break; // backspace case 'a': stringBuffer.append('\007'); currIndex++; break; // bell case 'v': stringBuffer.append('\013'); currIndex++; break; // vertical tab case 'U': case 'u': { /* A Unicode escape. Always followed by 4 hex digits. */ currIndex++; // skip past the 'U' if ((currIndex + 4) > buffer.length) throwParseException("Not enough chars to parse \\U sequence"); if (!isHexDigit(buffer[currIndex]) || !isHexDigit(buffer[currIndex + 1]) || !isHexDigit(buffer[currIndex + 2]) || !isHexDigit(buffer[currIndex + 3])) { throwParseException("Four hex digits not found for \\U sequence"); } byte byte3 = valueForHexDigit(buffer[currIndex]); byte byte2 = valueForHexDigit(buffer[currIndex + 1]); byte byte1 = valueForHexDigit(buffer[currIndex + 2]); byte byte0 = valueForHexDigit(buffer[currIndex + 3]); char theChar = (char) ((byte3 << 12) + (byte2 << 8) + (byte1 << 4) + byte0); stringBuffer.append(theChar); currIndex += 4; break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { /* An octal escape. Expect 1, 2, or 3 octal digits. */ int digits = 0; int value = 0; do { value *= 8; value += (int) (buffer[currIndex] - '0'); currIndex++; digits++; } while (digits <= 3 && !isAtEnd() && isOctalDigit(buffer[currIndex])); if (value > 255) throwParseException("Value too large in octal escape sequence (> 0377)"); // This assumes value is in ISO Latin 1 encoding stringBuffer.append((char) value); break; } /* I guess plists can't have the \x{HEX}{HEX} escapes */ default: { // Unknown escape sequence, just add the character. // GCC warns if this isn't a '"', '\'', or '\\'... stringBuffer.append(buffer[currIndex]); if (buffer[currIndex] == '\n') updateLineNumberWithIndex(currIndex + 1); currIndex++; break; } } // end case /* Reset startIndex, so a verbatim copy will now start from this index */ startIndex = currIndex; } // end '\\' escape } if (isAtEnd()) throwParseException("Input exhausted while parsing quoted string"); if (startIndex < currIndex) stringBuffer.append(buffer, startIndex, currIndex - startIndex); currIndex++; // skip past '"' return stringBuffer.toString(); } private List