import java.io.*; import java.util.*; /** * This is a SamTokenizer implementation that * has full backwards/forwards mobility and loads everything into * memory as soon as it is created */ public class SamTokenizer implements Tokenizer{ /** * The source */ protected PushbackReader in; /** * The unread tokens */ protected Stack tokens = new Stack(); /** * The read tokens */ protected Stack readTokens = new Stack(); /** * Creates a new SamTokenizer with a file for a source * @param FileName The file name of the file to read * @throws IOException If there is a file error * @throws FileNotFoundException If the file could not be found */ public SamTokenizer (String FileName) throws IOException, FileNotFoundException{ FileReader r = new FileReader(FileName); in = new PushbackReader(r); read(); } /** * Creates a new SamTokenizer with a Reader as a source * @param r The source * @throws IOException if there is a stream error */ public SamTokenizer (Reader r) throws IOException{ in = new PushbackReader(r); read(); } /** * Creates a new SamTokenizer with System.in as a source * @param r The source * @throws IOException if there is a stream error */ public SamTokenizer () throws IOException{ in = new PushbackReader(new InputStreamReader(System.in)); read(); } // Reads and parses the stream into tokens private void read() throws IOException{ int lineNo = 1; int cin; char c; while((cin = in.read()) != -1){ c = (char)cin; if(c == '\n') lineNo++; // Now we need to detect if its a word, operator, or integer if(Character.isWhitespace(c)) continue; // Skip out the comments else if(c == '/'){ int din = in.read(); if(din == -1) tokens.push(new Operator('/', lineNo)); // This is a valid character but not a comment else if((char)din != '/'){ in.unread(din); tokens.push(new Operator('/', lineNo)); } else{ // OK, so this is a comment while((din = in.read()) != '\n') continue; lineNo++; } } // Words can be defined as either just a group of letters // or a quote enclosed group of characters else if(c == '"'){ String word = ""; while((cin = in.read()) != -1 && cin != '"'){ if(cin == '\\'){ cin = in.read(); switch(cin){ case 't': word += '\t'; break; case '\\': word += '\\'; break; case 'n': word += '\n'; break; case 'r': word += '\r'; break; case '"': word += '"'; break; default: throw new IOException("Invalid escape character at " + lineNo); } } else word += (char)cin; } tokens.push(new Word(word, lineNo)); } // now it's either an integer or a word or an operator else if(Character.isLetter(c)){ // It's a word String word = ""; word += c; int din; while((din = in.read()) != -1){ if(Character.isLetter((char)din) || Character.isDigit((char)din) || din == '_') word += (char)din; // : are allowed in words but only to end them else if(din == ':'){ word += ':'; break; } else{ in.unread(din); break; } } tokens.push(new Word(word, lineNo)); } else if(Character.isDigit(c)){ in.unread(c); readNumber(lineNo, null); } else{ // Now this is an operator. The only problem is - and . can also be a number // So check for that if(c == '-' || c == '.'){ int din = in.read(); if(din == -1) tokens.push(new Operator(c, lineNo)); else if(Character.isDigit((char)din)){ in.unread(din); if(c == '.'){ readNumber(lineNo, "."); } else readNumber(lineNo, "-"); } else{ in.unread(din); tokens.push(new Operator(c, lineNo)); } } else tokens.push(new Operator(c, lineNo)); } } Stack a = new Stack(); while(!tokens.empty()) a.push(tokens.pop()); tokens = a; } // Reads in a number // tok is the start of the number private void readNumber(int lineNo, String tok) throws IOException{ int type = Tokenizer.INTEGER; if(tok == null) tok = ""; else if(tok.equals(".")) type = Tokenizer.FLOAT; else if(!tok.equals("-")) tok = ""; int din; while((din = in.read()) != -1){ if(din == '.'){ switch(type){ case Tokenizer.INTEGER: type = Tokenizer.FLOAT; break; case Tokenizer.FLOAT: type = Tokenizer.WORD; break; } tok += (char)din; } else if(Character.isDigit((char)din) || (type == Tokenizer.WORD && (Character.isLetter((char)din) || din == '_'))) tok += (char)din; else{ in.unread(din); break; } } try{ switch(type){ case Tokenizer.FLOAT: tokens.push(new FloatT(Float.parseFloat(tok), lineNo)); break; case Tokenizer.INTEGER: tokens.push(new Int(Integer.parseInt(tok), lineNo)); break; default: tokens.push(new Word(tok, lineNo)); } } catch(NumberFormatException e){ tokens.push(new Int(0, lineNo));} } /** * Return the type of the next token * @return the type of the next token */ public int peekAtKind(){ if(tokens.empty()) return EOF; return ((Token)tokens.peek()).getType(); } /** * Returns the next token if its an integer * @return the next token, or 0 if its not an integer */ public int getInt() throws TokenizerException{ if(peekAtKind() == Tokenizer.INTEGER){ Int i = (Int)tokens.pop(); readTokens.push(i); return i.getInt(); } else throw new TokenizerException("Attempt to read non-integer value as an integer", lineNo()); } /** * Returns the next token if its a float * @return the next token, or 0 if its not an integer */ public float getFloat() throws TokenizerException{ if(peekAtKind() == Tokenizer.FLOAT){ FloatT f = (FloatT)tokens.pop(); readTokens.push(f); return f.getFloat(); } else throw new TokenizerException("Attempt to read non-float value as a float", lineNo()); } /** * Returns the next token if its a word * @return the next token or "" if it is not a word */ public String getWord() throws TokenizerException{ if(peekAtKind() == Tokenizer.WORD){ Word word = (Word)tokens.pop(); readTokens.push(word); return word.getWord(); } else throw new TokenizerException("Attempt to read non-word value as a word.", lineNo()); } /** * Return the next token if its an operator * @return the next token or ' ' if it is not an operator */ public char getOp() throws TokenizerException{ if(peekAtKind() == Tokenizer.OPERATOR){ Operator op = (Operator)tokens.pop(); readTokens.push(op); return op.getOp(); } else throw new TokenizerException("Attempt to read non-operator value as an op", lineNo()); } /** * Matches the next operator or throws TokenizerException * @param c the character to match * @throws TokenizerException if the next token is not c */ public void match(char c) throws TokenizerException{ char n; if(peekAtKind() == Tokenizer.OPERATOR){ n = getOp(); if(n != c) throw new TokenizerException("Expecting " + c + " but found " + n, lineNo()); } else throw new TokenizerException("Did not find " + c, lineNo()); } /** * Matches the next word or throws TokenizerException * @param s the word to match * @throws TokenizerException if the next token is not s */ public void match(String s) throws TokenizerException{ String n; if(peekAtKind() == Tokenizer.WORD){ n = getWord(); if(!n.equals(s)) throw new TokenizerException("Expecting " + s + " but found " + n, lineNo()); } else throw new TokenizerException("Did not find " + s, lineNo()); } /** * Checks to see if the next token is an operator and is the provided test case * If it is, it is eaten up, otherwise, it is pushed back * @param c the operator to check against * @return true if the next token is an operator and is c, false otherwise */ public boolean check(char c) { try { if (c == getOp()) return true; pushBack(); return false; } catch (TokenizerException e) { return false; } } /** * Checks to see if the next token is a word and is the provided test case * If it is, it is eaten up, otherwise, it is pushed back * @param s the word to check against * @return true if the next token is a word and is s, false otherwise */ public boolean check(String s) { try { if (s.equals(getWord())) return true; pushBack(); return false; } catch (TokenizerException e) { return false; } } /** * Moves the position of the parser back by one */ public void pushBack(){ if(!readTokens.empty()) tokens.push(readTokens.pop()); } /** * Returns the line number of the last token read * @return the line number of the last token read */ public int lineNo(){ if(readTokens.empty()) return 1; else return ((Token)readTokens.peek()).lineNo(); } /** * Closes the stream */ public void close(){ try{ in.close(); } catch(Exception e){} } /** * Checks if the parser can move back * @return true if pushBack() will do anything, false otherwise */ public boolean canPushBack(){ return !readTokens.empty(); } /** * Skips the next token */ public void skipToken(){ if(!tokens.empty()) readTokens.push(tokens.pop()); } static interface Token{ public int lineNo(); public int getType(); } static class Int implements Token{ int integer; int lineNo; public Int(int integer, int lineNo){ this.lineNo = lineNo; this.integer = integer; } public int getInt(){ return integer; } public int lineNo(){ return lineNo; } public String toString(){ return "" + integer; } public int getType(){ return Tokenizer.INTEGER; } } static class FloatT implements Token{ float f; int lineNo; public FloatT(float fl, int lineNo){ this.lineNo = lineNo; this.f = fl; } public float getFloat(){ return f; } public int lineNo(){ return lineNo; } public String toString(){ return "" + f; } public int getType(){ return Tokenizer.FLOAT; } } static class Word implements Token{ String word; int lineNo; public Word(String word, int lineNo){ this.lineNo = lineNo; this.word = word; } public String getWord(){ return word; } public int lineNo(){ return lineNo; } public String toString(){ return word; } public int getType(){ return Tokenizer.WORD; } } static class Operator implements Token{ char op; int lineNo; public Operator(char op, int lineNo){ this.op = op; this.lineNo = lineNo; } public char getOp(){ return op; } public int lineNo(){ return lineNo; } public String toString(){ return "" + op; } public int getType(){ return Tokenizer.OPERATOR; } } }