Merge branch 'master' of gitlab.cs.washington.edu:cse332-19wi/p2

00f004f1 · Apollo · 23e6556d · 025fa2a7 · 00f004f1 · 00f004f1
Commit 00f004f1 authored 6 years ago by Apollo
--- a/src/datastructures/dictionaries/AVLTree.java
+++ b/src/datastructures/dictionaries/AVLTree.java
@@ -6,7 +6,7 @@ import cse332.datastructures.trees.BinarySearchTree;
 * TODO: Replace this comment with your own as appropriate.
 *
 * AVLTree must be a subclass of BinarySearchTree<E> and must use
- * inheritance and callst o superclass methods to avoid unnecessary
+ * inheritance and calls to superclass methods to avoid unnecessary
 * duplication or copying of functionality.
 *
 * 1. Create a subclass of BSTNode, perhaps named AVLNode.

--- a/src/datastructures/dictionaries/ChainingHashTable.java
+++ b/src/datastructures/dictionaries/ChainingHashTable.java
@@ -14,8 +14,10 @@ import cse332.interfaces.misc.Dictionary;
 *    restrict the size of the input domain (i.e., it must accept 
 *    any key) or the number of inputs (i.e., it must grow as necessary).
 * 3. Your HashTable should rehash as appropriate (use load factor as
- *    shown in class).
- * 5. HashTable should be able to grow at least up to 200,000 elements. 
+ *    shown in class!). 
+ * 5. HashTable should be able to resize its capacity to prime numbers for more 
+ *    than 200,000 elements. After more than 200,000 elements, it should 
+ *    continue to resize using some other mechanism.
 * 6. We suggest you hard code some prime numbers. You can use this
 *    list: http://primes.utm.edu/lists/small/100000.txt 
 *    NOTE: Do NOT copy the whole list!

--- a/src/p2/wordsuggestor/ParseFBMessages.java
+++ b/src/p2/wordsuggestor/ParseFBMessages.java
 package p2.wordsuggestor;

 import java.io.File;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.util.Stack;

-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import cse332.interfaces.worklists.LIFOWorkList;
-import datastructures.worklists.ArrayStack;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+import org.json.simple.JSONArray;

 public final class ParseFBMessages {
    private ParseFBMessages() {
@@ -29,38 +28,38 @@ public final class ParseFBMessages {
    // (e.g. "/Users/Me/Downloads/MyArchiveName" or "C:\Users\Me\Downloads\MyArchiveName")
    // You may be able to use a relative path like "./MyArchiveName", but results can
    // vary from machine to machine.
+    //
+    // DO NOT PUSH YOUR ME.TXT FILE TO GITLAB. WE DO NOT WANT YOUR PRIVATE CONVERSATIONS!!!!
    public static void main(String[] args) throws IOException {
-        if (args.length != 2) {
-            System.out.println("USAGE: ParseFBMessages <Your FB Name> <Your FB Archive>");
-            System.exit(1);
-        }
-
-        // Note: you can replace these with your FB Name and Archive instead of
-        // using the command line if you'd like.
-        String name = args[0];
-        String archive = args[1];
+        String name = "<Your FB Name>"; // e.g. "Ruth Anderson"
+        String archive = "<Your FB Archive>"; // e.g. "/Users/rea/workspace/332/facebook-rea/messages"

-        LIFOWorkList<String> messages = new ArrayStack<String>();
-        File[] listOfFiles = (new File(archive + File.separator + "messages")).listFiles();
+        Stack<String> corpus = new Stack<>();
+        File[] listOfFiles = (new File(archive + File.separator + "inbox")).listFiles();

        for (int i = 0; i < listOfFiles.length; i++) {
-          if (listOfFiles[i].isFile()) {
-              Document doc = Jsoup
-                      .parse(listOfFiles[i], "UTF-8");
-              Elements messagesElements = doc.getElementsByTag("p");
-              for (Element content : messagesElements) {
-                  if (content.previousElementSibling().getElementsByClass("user").text()
-                          .equals(name)) {
-                      messages.add(content.text());
-                  }
-              }
-          }
+            File conversation = new File(listOfFiles[i], "message.json");
+            if (conversation.isFile()) {
+                try {
+                    JSONObject obj = (JSONObject) new JSONParser().parse(new FileReader(conversation));
+                    JSONArray messages = (JSONArray) obj.get("messages");
+                    for (Object m: messages) {
+                        JSONObject msg = (JSONObject) m;
+                        String sender = (String) msg.get("sender_name");
+                        if(sender != null && sender.equals(name)) {
+                            corpus.push((String) msg.get("content"));
+                        }
+                    }
+                } catch (ParseException e) {
+                    System.err.println("Could not parse: " + conversation.toString());
+                }
+            }
        }

        PrintWriter out = new PrintWriter("me.txt", "UTF-8");

-        while (messages.hasWork()) {
-            out.println(messages.next());
+        while (!corpus.isEmpty()) {
+            out.println(corpus.pop());
        }

        out.close();

--- a/src/tests/gitlab/ckpt1/NGramToNextChoicesMapTests.java
+++ b/src/tests/gitlab/ckpt1/NGramToNextChoicesMapTests.java
@@ -95,7 +95,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
            if (items.length != answer.length) return 0;
            String[] itemsWithoutCounts = new String[items.length];
            for (int j = 0; j < answer.length; j++) {
-                if (items[j].value != 1) return 0;
+                if (!items[j].value.equals(1)) return 0;
                itemsWithoutCounts[j] = items[j].key;
            }
            Arrays.sort(itemsWithoutCounts);
@@ -127,10 +127,10 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
        return 1;
    }
    
-    // TODO: Not finished yet
    @SuppressWarnings("unchecked")
    public static int testRepeatedWordsPerNGram() {
        NGramToNextChoicesMap map = init();
+        // Creates Ngrams to test for with N = 3
        NGram[] ngrams = new NGram[]{
                new NGram(new String[]{"foo", "bar", "baz"}),
                new NGram(new String[]{"fee", "fi", "fo"}),
@@ -138,7 +138,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
                new NGram(new String[]{"3", "2", "2"}),
                new NGram(new String[]{"a", "s", "d"})
        };
-        
+        // Array of words seen after each Ngram with correlating index from above
        String[][] words = new String[][] {
            new String[]{"bop", "bip", "boop", "bop", "bop"},
            new String[]{"fum", "giants", "giants"},
@@ -148,6 +148,9 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
        };
        
        // yes this is awful, but i can't think of a better way to do it atm
+        // Creates answers for getCountsAfter - Word seen after and count 
+        // corrlates with words and ngrams above
+        // Note that words after are in sorted order, not in order of array in words
        Map<NGram, Item<String, Integer>[]> answers = new TreeMap<>();
        answers.put(ngrams[0], (Item<String, Integer>[]) new Item[3]);
        answers.get(ngrams[0])[0] = new Item<String, Integer>("bip", 1);
@@ -167,12 +170,14 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
        answers.get(ngrams[4])[1] = new Item<String, Integer>("for", 2);
        answers.get(ngrams[4])[2] = new Item<String, Integer>("while", 2);
        
+        // Adds nGrams and words after to student's NGramToNextChoicesMap
        for (int i = 0; i < ngrams.length; i++) {
            for (int j = 0; j < words[i].length; j++) {
                map.seenWordAfterNGram(ngrams[i], words[i][j]);
            }
            
        }
+        // checks to see if getCountsAfter returns correctly
        for (int i = 0; i < ngrams.length; i++) {
            NGram ngram = ngrams[i];
            Item<String, Integer>[] results = map.getCountsAfter(ngram);
@@ -187,12 +192,15 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
                
            });
            Item<String, Integer>[] expected = answers.get(ngram);
+            // checks for correct number of unique words after
            if (results.length != expected.length) return 0;
            for (int j = 0; j < expected.length; j++) {
+                // checks if correct word after via sorted words
                if (!expected[j].key.equals(results[j].key)) {
                    return 0;
                }
-                if (expected[j].value != results[j].value) {
+                // checks if correct count for given word after
+                if (!expected[j].value.equals(results[j].value)) {
                    return 0;
                }
            }