summaryrefslogtreecommitdiffstats
path: root/labb8/lib/StanfordCPPLib/tokenscanner.h
diff options
context:
space:
mode:
Diffstat (limited to 'labb8/lib/StanfordCPPLib/tokenscanner.h')
-rwxr-xr-xlabb8/lib/StanfordCPPLib/tokenscanner.h351
1 files changed, 351 insertions, 0 deletions
diff --git a/labb8/lib/StanfordCPPLib/tokenscanner.h b/labb8/lib/StanfordCPPLib/tokenscanner.h
new file mode 100755
index 0000000..0b2eb32
--- /dev/null
+++ b/labb8/lib/StanfordCPPLib/tokenscanner.h
@@ -0,0 +1,351 @@
+/*
+ * File: tokenscanner.h
+ * --------------------
+ * This file exports a <code>TokenScanner</code> class that divides
+ * a string into individual logical units called <b><i>tokens</i></b>.
+ */
+
+#ifndef _tokenscanner_h
+#define _tokenscanner_h
+
+#include <iostream>
+#include <string>
+#include "private/tokenpatch.h"
+
+/*
+ * Type: TokenType
+ * ---------------
+ * This enumerated type defines the values of the
+ * <code>getTokenType</code> method.
+ */
+
+#ifdef _WIN32
+# define TokenType TokenTypeT
+# define WORD WORD_TC
+#endif
+
+enum TokenType { SEPARATOR, WORD, NUMBER, STRING, OPERATOR };
+
+/*
+ * Class: TokenScanner
+ * -------------------
+ * This class divides a string into individual tokens. The typical
+ * use of the <code>TokenScanner</code> class is illustrated by the
+ * following pattern, which reads the tokens in the string variable
+ * <code>input</code>:
+ *
+ *<pre>
+ * TokenScanner scanner(input);
+ * while (scanner.hasMoreTokens()) {
+ * string token = scanner.nextToken();
+ * ... process the token ...
+ * }
+ *</pre>
+ *
+ * The <code>TokenScanner</code> class exports several additional methods
+ * that give clients more control over its behavior. Those methods are
+ * described individually in the documentation.
+ */
+
+class TokenScanner {
+
+public:
+
+/*
+ * Constructor: TokenScanner
+ * Usage: TokenScanner scanner;
+ * TokenScanner scanner(str);
+ * TokenScanner scanner(infile);
+ * ------------------------------------
+ * Initializes a scanner object. The initial token stream comes from
+ * the specified string or input stream, if supplied. The default
+ * constructor creates a scanner with an empty token stream.
+ */
+
+ TokenScanner();
+ TokenScanner(std::string str);
+ TokenScanner(std::istream & infile);
+
+/*
+ * Destructor: ~TokenScanner
+ * -------------------------
+ * Deallocates the storage associated with this scanner.
+ */
+
+ virtual ~TokenScanner();
+
+/*
+ * Method: setInput
+ * Usage: scanner.setInput(str);
+ * scanner.setInput(infile);
+ * --------------------------------
+ * Sets the token stream for this scanner to the specified string or
+ * input stream. Any previous token stream is discarded.
+ */
+
+ void setInput(std::string str);
+ void setInput(std::istream & infile);
+
+/*
+ * Method: hasMoreTokens
+ * Usage: if (scanner.hasMoreTokens()) ...
+ * ---------------------------------------
+ * Returns <code>true</code> if there are additional tokens for this
+ * scanner to read.
+ */
+
+ bool hasMoreTokens();
+
+/*
+ * Method: nextToken
+ * Usage: token = scanner.nextToken();
+ * -----------------------------------
+ * Returns the next token from this scanner. If <code>nextToken</code>
+ * is called when no tokens are available, it returns the empty string.
+ */
+
+ std::string nextToken();
+
+/*
+ * Method: saveToken
+ * Usage: scanner.saveToken(token);
+ * --------------------------------
+ * Pushes the specified token back into this scanner's input stream.
+ * On the next call to <code>nextToken</code>, the scanner will return
+ * the saved token without reading any additional characters from the
+ * token stream.
+ */
+
+ void saveToken(std::string token);
+
+/*
+ * Method: getPosition
+ * Usage: int pos = scanner.getPosition();
+ * ---------------------------------------
+ * Returns the current position of the scanner in the input stream.
+ * If <code>saveToken</code> has been called, this position corresponds
+ * to the beginning of the saved token. If <code>saveToken</code> is
+ * called more than once, <code>getPosition</code> returns -1.
+ */
+
+ int getPosition() const;
+
+/*
+ * Method: ignoreWhitespace
+ * Usage: scanner.ignoreWhitespace();
+ * ----------------------------------
+ * Tells the scanner to ignore whitespace characters. By default,
+ * the <code>nextToken</code> method treats whitespace characters
+ * (typically spaces and tabs) just like any other punctuation mark
+ * and returns them as single-character tokens.
+ * Calling
+ *
+ *<pre>
+ * scanner.ignoreWhitespace();
+ *</pre>
+ *
+ * changes this behavior so that the scanner ignore whitespace characters.
+ */
+
+ void ignoreWhitespace();
+
+/*
+ * Method: ignoreComments
+ * Usage: scanner.ignoreComments();
+ * --------------------------------
+ * Tells the scanner to ignore comments. The scanner package recognizes
+ * both the slash-star and slash-slash comment format from the C-based
+ * family of languages. Calling
+ *
+ *<pre>
+ * scanner.ignoreComments();
+ *</pre>
+ *
+ * sets the parser to ignore comments.
+ */
+
+ void ignoreComments();
+
+/*
+ * Method: scanNumbers
+ * Usage: scanner.scanNumbers();
+ * -----------------------------
+ * Controls how the scanner treats tokens that begin with a digit. By
+ * default, the <code>nextToken</code> method treats numbers and letters
+ * identically and therefore does not provide any special processing for
+ * numbers. Calling
+ *
+ *<pre>
+ * scanner.scanNumbers();
+ *</pre>
+ *
+ * changes this behavior so that <code>nextToken</code> returns the
+ * longest substring that can be interpreted as a real number.
+ */
+
+ void scanNumbers();
+
+/*
+ * Method: scanStrings
+ * Usage: scanner.scanStrings();
+ * -----------------------------
+ * Controls how the scanner treats tokens enclosed in quotation marks. By
+ * default, quotation marks (either single or double) are treated just like
+ * any other punctuation character. Calling
+ *
+ *<pre>
+ * scanner.scanStrings();
+ *</pre>
+ *
+ * changes this assumption so that <code>nextToken</code> returns a single
+ * token consisting of all characters through the matching quotation mark.
+ * The quotation marks are returned as part of the scanned token so that
+ * clients can differentiate strings from other token types.
+ */
+
+ void scanStrings();
+
+/*
+ * Method: addWordCharacters
+ * Usage: scanner.addWordCharacters(str);
+ * --------------------------------------
+ * Adds the characters in <code>str</code> to the set of characters
+ * legal in a <code>WORD</code> token. For example, calling
+ * <code>addWordCharacters("_")</code> adds the underscore to the
+ * set of characters that are accepted as part of a word.
+ */
+
+ void addWordCharacters(std::string str);
+
+/*
+ * Method: isWordCharacter
+ * Usage: if (scanner.isWordCharacter(ch)) ...
+ * -------------------------------------------
+ * Returns <code>true</code> if the character is valid in a word.
+ */
+
+ bool isWordCharacter(char ch) const;
+
+/*
+ * Method: addOperator
+ * Usage: scanner.addOperator(op);
+ * -------------------------------
+ * Defines a new multicharacter operator. Whenever you call
+ * <code>nextToken</code> when the input stream contains operator
+ * characters, the scanner returns the longest possible operator
+ * string that can be read at that point.
+ */
+
+ void addOperator(std::string op);
+
+/*
+ * Method: verifyToken
+ * Usage: scanner.verifyToken(expected);
+ * -------------------------------------
+ * Reads the next token and makes sure it matches the string
+ * <code>expected</code>. If it does not, <code>verifyToken</code>
+ * throws an error.
+ */
+
+ void verifyToken(std::string expected);
+
+/*
+ * Method: getTokenType
+ * Usage: TokenType type = scanner.getTokenType(token);
+ * ----------------------------------------------------
+ * Returns the type of this token. This type will match one of the
+ * following enumerated type constants: <code>EOF</code>,
+ * <code>SEPARATOR</code>, <code>WORD</code>, <code>NUMBER</code>,
+ * <code>STRING</code>, or <code>OPERATOR</code>.
+ */
+
+TokenType getTokenType(std::string token) const;
+
+/*
+ * Method: getChar
+ * Usage: int ch = scanner.getChar();
+ * ----------------------------------
+ * Reads the next character from the scanner input stream.
+ */
+
+int getChar();
+
+/*
+ * Method: ungetChar
+ * Usage: scanner.ungetChar(ch);
+ * -----------------------------
+ * Pushes the character <code>ch</code> back into the scanner stream.
+ * The character must match the one that was read.
+ */
+
+void ungetChar(int ch);
+
+/*
+ * Method: getStringValue
+ * Usage: string str = scanner.getStringValue(token);
+ * --------------------------------------------------
+ * Returns the string value of a token. This value is formed by removing
+ * any surrounding quotation marks and replacing escape sequences by the
+ * appropriate characters.
+ */
+
+ std::string getStringValue(std::string token) const;
+
+/* Private section */
+
+/**********************************************************************/
+/* Note: Everything below this point in the file is logically part */
+/* of the implementation and should not be of interest to clients. */
+/**********************************************************************/
+
+private:
+
+/*
+ * Private type: StringCell
+ * ------------------------
+ * This type is used to construct linked lists of cells, which are used
+ * to represent both the stack of saved tokens and the set of defined
+ * operators. These types cannot use the Stack and Lexicon classes
+ * directly because tokenscanner.h is an extremely low-level interface,
+ * and doing so would create circular dependencies in the .h files.
+ */
+
+ struct StringCell {
+ std::string str;
+ StringCell *link;
+ };
+
+ enum NumberScannerState {
+ INITIAL_STATE,
+ BEFORE_DECIMAL_POINT,
+ AFTER_DECIMAL_POINT,
+ STARTING_EXPONENT,
+ FOUND_EXPONENT_SIGN,
+ SCANNING_EXPONENT,
+ FINAL_STATE
+ };
+
+ std::string buffer; /* The original argument string */
+ std::istream *isp; /* The input stream for tokens */
+ bool stringInputFlag; /* Flag indicating string input */
+ bool ignoreWhitespaceFlag; /* Scanner ignores whitespace */
+ bool ignoreCommentsFlag; /* Scanner ignores comments */
+ bool scanNumbersFlag; /* Scanner parses numbers */
+ bool scanStringsFlag; /* Scanner parses strings */
+ std::string wordChars; /* Additional word characters */
+ StringCell *savedTokens; /* Stack of saved tokens */
+ StringCell *operators; /* List of multichar operators */
+
+/* Private method prototypes */
+
+ void initScanner();
+ void skipSpaces();
+ std::string scanWord();
+ std::string scanNumber();
+ std::string scanString();
+ bool isOperator(std::string op);
+ bool isOperatorPrefix(std::string op);
+
+};
+
+#endif