add given code

author: Gustav Sörnäs <gustav@sornas.net> 2020-11-17 15:42:27 +0100
committer: Gustav Sörnäs <gustav@sornas.net> 2020-11-17 15:42:27 +0100
commit: fb80ac50825c7ca1fa063d3493175b7b27adbdb1 (patch)
tree: 4d84895d45ca2c177ddefe11164575bf6762b3ca /labb5/lib/StanfordCPPLib/lexicon.h
parent: 4065799f7080260f507a5e3ea8c2d8375e735166 (diff)
download: tddd86-fb80ac50825c7ca1fa063d3493175b7b27adbdb1.tar.gz
1 files changed, 366 insertions, 0 deletions
diff --git a/labb5/lib/StanfordCPPLib/lexicon.h b/labb5/lib/StanfordCPPLib/lexicon.h
new file mode 100755
index 0000000..fea4726
--- /dev/null
+++ b/labb5/lib/StanfordCPPLib/lexicon.h
@@ -0,0 +1,366 @@
+/*
+ * File: lexicon.h
+ * ---------------
+ * This file exports the <code>Lexicon</code> class, which is a
+ * compact structure for storing a list of words.
+ */
+
+#ifndef _lexicon_h
+#define _lexicon_h
+
+#include <string>
+#include "foreach.h"
+#include "set.h"
+#include "stack.h"
+
+/*
+ * Class: Lexicon
+ * --------------
+ * This class is used to represent a <b><i>lexicon,</i></b> or word list.
+ * The main difference between a lexicon and a dictionary is that
+ * a lexicon does not provide any mechanism for storing definitions;
+ * the lexicon contains only words, with no associated information.
+ * It is therefore similar to a set of strings, but with a more
+ * space-efficient internal representation.  The <code>Lexicon</code>
+ * class supports efficient lookup operations for words and prefixes.
+ *
+ * <p>As an example of the use of the <code>Lexicon</code> class, the
+ * following program lists all the two-letter words in the lexicon
+ * stored in <code>EnglishWords.dat</code>:
+ *
+ *<pre>
+ *    int main() {
+ *       Lexicon english("EnglishWords.dat");
+ *       foreach (string word in english) {
+ *          if (word.length() == 2) {
+ *             cout << word << endl;
+ *          }
+ *       }
+ *       return 0;
+ *    }
+ *</pre>
+ */
+
+#include <cctype>
+
+class Lexicon {
+
+public:
+
+/*
+ * Constructor: Lexicon
+ * Usage: Lexicon lex;
+ *        Lexicon lex(filename);
+ * -----------------------------
+ * Initializes a new lexicon.  The default constructor creates an empty
+ * lexicon.  The second form reads in the contents of the lexicon from
+ * the specified data file.  The data file must be in one of two formats:
+ * (1) a space-efficient precompiled binary format or (2) a text file
+ * containing one word per line.  The Stanford library distribution
+ * includes a binary lexicon file named <code>English.dat</code>
+ * containing a list of words in English.  The standard code pattern
+ * to initialize that lexicon looks like this:
+ *
+ *<pre>
+ *    Lexicon english("English.dat");
+ *</pre>
+ */
+
+   Lexicon();
+   Lexicon(std::string filename);
+
+/*
+ * Destructor: ~Lexicon
+ * --------------------
+ * The destructor deallocates any storage associated with the lexicon.
+ */
+
+   virtual ~Lexicon();
+
+/*
+ * Method: size
+ * Usage: int n = lex.size();
+ * --------------------------
+ * Returns the number of words contained in the lexicon.
+ */
+
+   int size() const;
+
+/*
+ * Method: isEmpty
+ * Usage: if (lex.isEmpty()) ...
+ * -----------------------------
+ * Returns <code>true</code> if the lexicon contains no words.
+ */
+
+   bool isEmpty() const;
+
+/*
+ * Method: clear
+ * Usage: lex.clear();
+ * -------------------
+ * Removes all words from the lexicon.
+ */
+
+   void clear();
+
+/*
+ * Method: add
+ * Usage: lex.add(word);
+ * ---------------------
+ * Adds the specified word to the lexicon.
+ */
+
+   void add(std::string word);
+
+/*
+ * Method: addWordsFromFile
+ * Usage: lex.addWordsFromFile(filename);
+ * --------------------------------------
+ * Reads the file and adds all of its words to the lexicon.
+ */
+
+   void addWordsFromFile(std::string filename);
+
+/*
+ * Method: contains
+ * Usage: if (lex.contains(word)) ...
+ * ----------------------------------
+ * Returns <code>true</code> if <code>word</code> is contained in the
+ * lexicon.  In the <code>Lexicon</code> class, the case of letters is
+ * ignored, so "Zoo" is the same as "ZOO" or "zoo".
+ */
+
+   bool contains(std::string word) const;
+
+/*
+ * Method: containsPrefix
+ * Usage: if (lex.containsPrefix(prefix)) ...
+ * ------------------------------------------
+ * Returns true if any words in the lexicon begin with <code>prefix</code>.
+ * Like <code>containsWord</code>, this method ignores the case of letters
+ * so that "MO" is a prefix of "monkey" or "Monday".
+ */
+
+   bool containsPrefix(std::string prefix) const;
+
+/*
+ * Method: mapAll
+ * Usage: lexicon.mapAll(fn);
+ * --------------------------
+ * Calls the specified function on each word in the lexicon.
+ */
+
+   void mapAll(void (*fn)(std::string)) const;
+   void mapAll(void (*fn)(const std::string &)) const;
+
+   template <typename FunctorType>
+   void mapAll(FunctorType fn) const;
+
+/*
+ * Additional Lexicon operations
+ * -----------------------------
+ * In addition to the methods listed in this interface, the Lexicon
+ * class supports the following operations:
+ *
+ *   - Deep copying for the copy constructor and assignment operator
+ *   - Iteration using the range-based for statement and STL iterators
+ *
+ * All iteration is guaranteed to proceed in alphabetical order.  All
+ * words in the lexicon are stored in lowercase.
+ */
+
+/* Private section */
+
+/**********************************************************************/
+/* Note: Everything below this point in the file is logically part    */
+/* of the implementation and should not be of interest to clients.    */
+/**********************************************************************/
+
+private:
+
+#ifdef _WIN32
+#define LITTLE_ENDIAN 1
+#define BYTE_ORDER LITTLE_ENDIAN
+#endif
+
+#pragma pack(1)
+   struct Edge {
+#if defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN
+      unsigned long letter:5;
+      unsigned long lastEdge:1;
+      unsigned long accept:1;
+      unsigned long unused:1;
+      unsigned long children:24;
+#else
+      unsigned long children:24;
+      unsigned long unused:1;
+      unsigned long accept:1;
+      unsigned long lastEdge:1;
+      unsigned long letter:5;
+#endif
+   };
+#pragma pack()
+
+   Edge *edges, *start;
+   int numEdges, numDawgWords;
+   Set<std::string> otherWords;
+
+public:
+
+/*
+ * Deep copying support
+ * --------------------
+ * This copy constructor and operator= are defined to make a
+ * deep copy, making it possible to pass/return lexicons by value
+ * and assign from one lexicon to another.  The entire contents of
+ * the lexicon, including all words, are copied.  Making copies is
+ * generally avoided because of the expense and thus, lexicons are
+ * typically passed by reference.  When a copy is needed, these
+ * operations are supported.
+ */
+
+   Lexicon(const Lexicon & src);
+   Lexicon & operator=(const Lexicon & src);
+
+/*
+ * Iterator support
+ * ----------------
+ * The classes in the StanfordCPPLib collection implement input
+ * iterators so that they work symmetrically with respect to the
+ * corresponding STL classes.
+ */
+
+   class iterator : public std::iterator<std::input_iterator_tag,std::string> {
+   private:
+      const Lexicon *lp;
+      int index;
+      std::string currentDawgPrefix;
+      std::string currentSetWord;
+      std::string tmpWord;
+      Edge *edgePtr;
+      Stack<Edge *> stack;
+      Set<std::string>::iterator setIterator;
+      Set<std::string>::iterator setEnd;
+
+      void advanceToNextWordInDawg();
+      void advanceToNextWordInSet();
+      void advanceToNextEdge();
+
+   public:
+      iterator() {
+         this->lp = NULL;
+      }
+
+      iterator(const Lexicon *lp, bool endFlag) {
+         this->lp = lp;
+         if (endFlag) {
+            index = lp->size();
+         } else {
+            index = 0;
+            edgePtr = NULL;
+            setIterator = lp->otherWords.begin();
+            setEnd = lp->otherWords.end();
+            currentDawgPrefix = "";
+            currentSetWord = "";
+            advanceToNextWordInDawg();
+            advanceToNextWordInSet();
+         }
+      }
+
+      iterator(const iterator & it) {
+         lp = it.lp;
+         index = it.index;
+         currentDawgPrefix = it.currentDawgPrefix;
+         currentSetWord = it.currentSetWord;
+         edgePtr = it.edgePtr;
+         stack = it.stack;
+         setIterator = it.setIterator;
+      }
+
+      iterator & operator++() {
+         if (edgePtr == NULL) {
+            advanceToNextWordInSet();
+         } else {
+            if (currentSetWord == "" || currentDawgPrefix < currentSetWord) {
+               advanceToNextWordInDawg();
+            } else {
+               advanceToNextWordInSet();
+            }
+         }
+         index++;
+         return *this;
+      }
+
+      iterator operator++(int) {
+         iterator copy(*this);
+         operator++();
+         return copy;
+      }
+
+      bool operator==(const iterator & rhs) {
+         return lp == rhs.lp && index == rhs.index;
+      }
+
+      bool operator!=(const iterator & rhs) {
+         return !(*this == rhs);
+      }
+
+      std::string operator*() {
+         if (edgePtr == NULL) return currentSetWord;
+         if (currentSetWord == "" || currentDawgPrefix < currentSetWord) {
+            return currentDawgPrefix + lp->ordToChar(edgePtr->letter);
+         } else {
+            return currentSetWord;
+         }
+      }
+
+      std::string *operator->() {
+         if (edgePtr == NULL) return &currentSetWord;
+         if (currentSetWord == "" || currentDawgPrefix < currentSetWord) {
+            tmpWord = currentDawgPrefix + lp->ordToChar(edgePtr->letter);
+            return &tmpWord;
+         } else {
+            return &currentSetWord;
+         }
+      }
+
+   };
+
+   iterator begin() const {
+      return iterator(this, false);
+   }
+
+   iterator end() const {
+      return iterator(this, true);
+   }
+
+private:
+
+   Edge *findEdgeForChar(Edge *children, char ch) const;
+   Edge *traceToLastEdge(const std::string & s) const;
+   void readBinaryFile(std::string filename);
+   void deepCopy(const Lexicon & src);
+   int countDawgWords(Edge *start) const;
+
+   unsigned int charToOrd(char ch) const {
+      return ((unsigned int)(tolower(ch) - 'a' + 1));
+   }
+
+   char ordToChar(unsigned int ord) const {
+      return ((char)(ord - 1 + 'a'));
+   }
+
+};
+
+template <typename FunctorType>
+void Lexicon::mapAll(FunctorType fn) const {
+   foreach (std::string word in *this) {
+      fn(word);
+   }
+}
+
+// hashing functions for lexicons;  defined in hashmap.cpp
+int hashCode(const Lexicon& l);
+
+#endif
author	Gustav Sörnäs <gustav@sornas.net>	2020-11-17 15:42:27 +0100
committer	Gustav Sörnäs <gustav@sornas.net>	2020-11-17 15:42:27 +0100
commit	fb80ac50825c7ca1fa063d3493175b7b27adbdb1 (patch)
tree	4d84895d45ca2c177ddefe11164575bf6762b3ca /labb5/lib/StanfordCPPLib/lexicon.h
parent	4065799f7080260f507a5e3ea8c2d8375e735166 (diff)
download	tddd86-fb80ac50825c7ca1fa063d3493175b7b27adbdb1.tar.gz