diff --git a/src/datastructures/dictionaries/AVLTree.java b/src/datastructures/dictionaries/AVLTree.java index 42d408e66f51360fb31da482617267840abe64db..02f53108d1020c76b43eaef6586a6ea916e8d87c 100644 --- a/src/datastructures/dictionaries/AVLTree.java +++ b/src/datastructures/dictionaries/AVLTree.java @@ -6,7 +6,7 @@ import cse332.datastructures.trees.BinarySearchTree; * TODO: Replace this comment with your own as appropriate. * * AVLTree must be a subclass of BinarySearchTree<E> and must use - * inheritance and callst o superclass methods to avoid unnecessary + * inheritance and calls to superclass methods to avoid unnecessary * duplication or copying of functionality. * * 1. Create a subclass of BSTNode, perhaps named AVLNode. diff --git a/src/datastructures/dictionaries/ChainingHashTable.java b/src/datastructures/dictionaries/ChainingHashTable.java index 62f89404bab54bb81fff3327487f97c0a4af605d..1123bd54e3cb91397a0b9f7b4f5f39370f548e02 100644 --- a/src/datastructures/dictionaries/ChainingHashTable.java +++ b/src/datastructures/dictionaries/ChainingHashTable.java @@ -14,8 +14,10 @@ import cse332.interfaces.misc.Dictionary; * restrict the size of the input domain (i.e., it must accept * any key) or the number of inputs (i.e., it must grow as necessary). * 3. Your HashTable should rehash as appropriate (use load factor as - * shown in class). - * 5. HashTable should be able to grow at least up to 200,000 elements. + * shown in class!). + * 5. HashTable should be able to resize its capacity to prime numbers for more + * than 200,000 elements. After more than 200,000 elements, it should + * continue to resize using some other mechanism. * 6. We suggest you hard code some prime numbers. You can use this * list: http://primes.utm.edu/lists/small/100000.txt * NOTE: Do NOT copy the whole list! diff --git a/src/p2/wordsuggestor/ParseFBMessages.java b/src/p2/wordsuggestor/ParseFBMessages.java index 29a266da628e607c4fd1a67f94c3650e01de7105..46fa971a817412ad296c8f41f916c4c88f43dd00 100644 --- a/src/p2/wordsuggestor/ParseFBMessages.java +++ b/src/p2/wordsuggestor/ParseFBMessages.java @@ -1,16 +1,15 @@ package p2.wordsuggestor; import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.io.PrintWriter; +import java.util.Stack; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -import cse332.interfaces.worklists.LIFOWorkList; -import datastructures.worklists.ArrayStack; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; +import org.json.simple.JSONArray; public final class ParseFBMessages { private ParseFBMessages() { @@ -29,38 +28,38 @@ public final class ParseFBMessages { // (e.g. "/Users/Me/Downloads/MyArchiveName" or "C:\Users\Me\Downloads\MyArchiveName") // You may be able to use a relative path like "./MyArchiveName", but results can // vary from machine to machine. + // + // DO NOT PUSH YOUR ME.TXT FILE TO GITLAB. WE DO NOT WANT YOUR PRIVATE CONVERSATIONS!!!! public static void main(String[] args) throws IOException { - if (args.length != 2) { - System.out.println("USAGE: ParseFBMessages <Your FB Name> <Your FB Archive>"); - System.exit(1); - } - - // Note: you can replace these with your FB Name and Archive instead of - // using the command line if you'd like. - String name = args[0]; - String archive = args[1]; + String name = "<Your FB Name>"; // e.g. "Ruth Anderson" + String archive = "<Your FB Archive>"; // e.g. "/Users/rea/workspace/332/facebook-rea/messages" - LIFOWorkList<String> messages = new ArrayStack<String>(); - File[] listOfFiles = (new File(archive + File.separator + "messages")).listFiles(); + Stack<String> corpus = new Stack<>(); + File[] listOfFiles = (new File(archive + File.separator + "inbox")).listFiles(); for (int i = 0; i < listOfFiles.length; i++) { - if (listOfFiles[i].isFile()) { - Document doc = Jsoup - .parse(listOfFiles[i], "UTF-8"); - Elements messagesElements = doc.getElementsByTag("p"); - for (Element content : messagesElements) { - if (content.previousElementSibling().getElementsByClass("user").text() - .equals(name)) { - messages.add(content.text()); - } - } - } + File conversation = new File(listOfFiles[i], "message.json"); + if (conversation.isFile()) { + try { + JSONObject obj = (JSONObject) new JSONParser().parse(new FileReader(conversation)); + JSONArray messages = (JSONArray) obj.get("messages"); + for (Object m: messages) { + JSONObject msg = (JSONObject) m; + String sender = (String) msg.get("sender_name"); + if(sender != null && sender.equals(name)) { + corpus.push((String) msg.get("content")); + } + } + } catch (ParseException e) { + System.err.println("Could not parse: " + conversation.toString()); + } + } } PrintWriter out = new PrintWriter("me.txt", "UTF-8"); - while (messages.hasWork()) { - out.println(messages.next()); + while (!corpus.isEmpty()) { + out.println(corpus.pop()); } out.close(); diff --git a/src/tests/gitlab/ckpt1/NGramToNextChoicesMapTests.java b/src/tests/gitlab/ckpt1/NGramToNextChoicesMapTests.java index bf12ff39b6a4a584aad6812c25c50eb10e052e78..cc9657960184475778206e24fb088ab752a104ce 100644 --- a/src/tests/gitlab/ckpt1/NGramToNextChoicesMapTests.java +++ b/src/tests/gitlab/ckpt1/NGramToNextChoicesMapTests.java @@ -95,7 +95,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility { if (items.length != answer.length) return 0; String[] itemsWithoutCounts = new String[items.length]; for (int j = 0; j < answer.length; j++) { - if (items[j].value != 1) return 0; + if (!items[j].value.equals(1)) return 0; itemsWithoutCounts[j] = items[j].key; } Arrays.sort(itemsWithoutCounts); @@ -127,10 +127,10 @@ public class NGramToNextChoicesMapTests extends TestsUtility { return 1; } - // TODO: Not finished yet @SuppressWarnings("unchecked") public static int testRepeatedWordsPerNGram() { NGramToNextChoicesMap map = init(); + // Creates Ngrams to test for with N = 3 NGram[] ngrams = new NGram[]{ new NGram(new String[]{"foo", "bar", "baz"}), new NGram(new String[]{"fee", "fi", "fo"}), @@ -138,7 +138,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility { new NGram(new String[]{"3", "2", "2"}), new NGram(new String[]{"a", "s", "d"}) }; - + // Array of words seen after each Ngram with correlating index from above String[][] words = new String[][] { new String[]{"bop", "bip", "boop", "bop", "bop"}, new String[]{"fum", "giants", "giants"}, @@ -148,6 +148,9 @@ public class NGramToNextChoicesMapTests extends TestsUtility { }; // yes this is awful, but i can't think of a better way to do it atm + // Creates answers for getCountsAfter - Word seen after and count + // corrlates with words and ngrams above + // Note that words after are in sorted order, not in order of array in words Map<NGram, Item<String, Integer>[]> answers = new TreeMap<>(); answers.put(ngrams[0], (Item<String, Integer>[]) new Item[3]); answers.get(ngrams[0])[0] = new Item<String, Integer>("bip", 1); @@ -167,12 +170,14 @@ public class NGramToNextChoicesMapTests extends TestsUtility { answers.get(ngrams[4])[1] = new Item<String, Integer>("for", 2); answers.get(ngrams[4])[2] = new Item<String, Integer>("while", 2); + // Adds nGrams and words after to student's NGramToNextChoicesMap for (int i = 0; i < ngrams.length; i++) { for (int j = 0; j < words[i].length; j++) { map.seenWordAfterNGram(ngrams[i], words[i][j]); } } + // checks to see if getCountsAfter returns correctly for (int i = 0; i < ngrams.length; i++) { NGram ngram = ngrams[i]; Item<String, Integer>[] results = map.getCountsAfter(ngram); @@ -187,12 +192,15 @@ public class NGramToNextChoicesMapTests extends TestsUtility { }); Item<String, Integer>[] expected = answers.get(ngram); + // checks for correct number of unique words after if (results.length != expected.length) return 0; for (int j = 0; j < expected.length; j++) { + // checks if correct word after via sorted words if (!expected[j].key.equals(results[j].key)) { return 0; } - if (expected[j].value != results[j].value) { + // checks if correct count for given word after + if (!expected[j].value.equals(results[j].value)) { return 0; } }