Skip to content
Snippets Groups Projects
Commit 00f004f1 authored by Apollo's avatar Apollo
Browse files

Merge branch 'master' of gitlab.cs.washington.edu:cse332-19wi/p2

parents 23e6556d 025fa2a7
No related branches found
No related tags found
No related merge requests found
...@@ -6,7 +6,7 @@ import cse332.datastructures.trees.BinarySearchTree; ...@@ -6,7 +6,7 @@ import cse332.datastructures.trees.BinarySearchTree;
* TODO: Replace this comment with your own as appropriate. * TODO: Replace this comment with your own as appropriate.
* *
* AVLTree must be a subclass of BinarySearchTree<E> and must use * AVLTree must be a subclass of BinarySearchTree<E> and must use
* inheritance and callst o superclass methods to avoid unnecessary * inheritance and calls to superclass methods to avoid unnecessary
* duplication or copying of functionality. * duplication or copying of functionality.
* *
* 1. Create a subclass of BSTNode, perhaps named AVLNode. * 1. Create a subclass of BSTNode, perhaps named AVLNode.
......
...@@ -14,8 +14,10 @@ import cse332.interfaces.misc.Dictionary; ...@@ -14,8 +14,10 @@ import cse332.interfaces.misc.Dictionary;
* restrict the size of the input domain (i.e., it must accept * restrict the size of the input domain (i.e., it must accept
* any key) or the number of inputs (i.e., it must grow as necessary). * any key) or the number of inputs (i.e., it must grow as necessary).
* 3. Your HashTable should rehash as appropriate (use load factor as * 3. Your HashTable should rehash as appropriate (use load factor as
* shown in class). * shown in class!).
* 5. HashTable should be able to grow at least up to 200,000 elements. * 5. HashTable should be able to resize its capacity to prime numbers for more
* than 200,000 elements. After more than 200,000 elements, it should
* continue to resize using some other mechanism.
* 6. We suggest you hard code some prime numbers. You can use this * 6. We suggest you hard code some prime numbers. You can use this
* list: http://primes.utm.edu/lists/small/100000.txt * list: http://primes.utm.edu/lists/small/100000.txt
* NOTE: Do NOT copy the whole list! * NOTE: Do NOT copy the whole list!
......
package p2.wordsuggestor; package p2.wordsuggestor;
import java.io.File; import java.io.File;
import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.Stack;
import org.jsoup.Jsoup; import org.json.simple.JSONObject;
import org.jsoup.nodes.Document; import org.json.simple.parser.JSONParser;
import org.jsoup.nodes.Element; import org.json.simple.parser.ParseException;
import org.jsoup.select.Elements; import org.json.simple.JSONArray;
import cse332.interfaces.worklists.LIFOWorkList;
import datastructures.worklists.ArrayStack;
public final class ParseFBMessages { public final class ParseFBMessages {
private ParseFBMessages() { private ParseFBMessages() {
...@@ -29,38 +28,38 @@ public final class ParseFBMessages { ...@@ -29,38 +28,38 @@ public final class ParseFBMessages {
// (e.g. "/Users/Me/Downloads/MyArchiveName" or "C:\Users\Me\Downloads\MyArchiveName") // (e.g. "/Users/Me/Downloads/MyArchiveName" or "C:\Users\Me\Downloads\MyArchiveName")
// You may be able to use a relative path like "./MyArchiveName", but results can // You may be able to use a relative path like "./MyArchiveName", but results can
// vary from machine to machine. // vary from machine to machine.
//
// DO NOT PUSH YOUR ME.TXT FILE TO GITLAB. WE DO NOT WANT YOUR PRIVATE CONVERSATIONS!!!!
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
if (args.length != 2) { String name = "<Your FB Name>"; // e.g. "Ruth Anderson"
System.out.println("USAGE: ParseFBMessages <Your FB Name> <Your FB Archive>"); String archive = "<Your FB Archive>"; // e.g. "/Users/rea/workspace/332/facebook-rea/messages"
System.exit(1);
}
// Note: you can replace these with your FB Name and Archive instead of
// using the command line if you'd like.
String name = args[0];
String archive = args[1];
LIFOWorkList<String> messages = new ArrayStack<String>(); Stack<String> corpus = new Stack<>();
File[] listOfFiles = (new File(archive + File.separator + "messages")).listFiles(); File[] listOfFiles = (new File(archive + File.separator + "inbox")).listFiles();
for (int i = 0; i < listOfFiles.length; i++) { for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].isFile()) { File conversation = new File(listOfFiles[i], "message.json");
Document doc = Jsoup if (conversation.isFile()) {
.parse(listOfFiles[i], "UTF-8"); try {
Elements messagesElements = doc.getElementsByTag("p"); JSONObject obj = (JSONObject) new JSONParser().parse(new FileReader(conversation));
for (Element content : messagesElements) { JSONArray messages = (JSONArray) obj.get("messages");
if (content.previousElementSibling().getElementsByClass("user").text() for (Object m: messages) {
.equals(name)) { JSONObject msg = (JSONObject) m;
messages.add(content.text()); String sender = (String) msg.get("sender_name");
} if(sender != null && sender.equals(name)) {
} corpus.push((String) msg.get("content"));
} }
}
} catch (ParseException e) {
System.err.println("Could not parse: " + conversation.toString());
}
}
} }
PrintWriter out = new PrintWriter("me.txt", "UTF-8"); PrintWriter out = new PrintWriter("me.txt", "UTF-8");
while (messages.hasWork()) { while (!corpus.isEmpty()) {
out.println(messages.next()); out.println(corpus.pop());
} }
out.close(); out.close();
......
...@@ -95,7 +95,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility { ...@@ -95,7 +95,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
if (items.length != answer.length) return 0; if (items.length != answer.length) return 0;
String[] itemsWithoutCounts = new String[items.length]; String[] itemsWithoutCounts = new String[items.length];
for (int j = 0; j < answer.length; j++) { for (int j = 0; j < answer.length; j++) {
if (items[j].value != 1) return 0; if (!items[j].value.equals(1)) return 0;
itemsWithoutCounts[j] = items[j].key; itemsWithoutCounts[j] = items[j].key;
} }
Arrays.sort(itemsWithoutCounts); Arrays.sort(itemsWithoutCounts);
...@@ -127,10 +127,10 @@ public class NGramToNextChoicesMapTests extends TestsUtility { ...@@ -127,10 +127,10 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
return 1; return 1;
} }
// TODO: Not finished yet
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static int testRepeatedWordsPerNGram() { public static int testRepeatedWordsPerNGram() {
NGramToNextChoicesMap map = init(); NGramToNextChoicesMap map = init();
// Creates Ngrams to test for with N = 3
NGram[] ngrams = new NGram[]{ NGram[] ngrams = new NGram[]{
new NGram(new String[]{"foo", "bar", "baz"}), new NGram(new String[]{"foo", "bar", "baz"}),
new NGram(new String[]{"fee", "fi", "fo"}), new NGram(new String[]{"fee", "fi", "fo"}),
...@@ -138,7 +138,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility { ...@@ -138,7 +138,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
new NGram(new String[]{"3", "2", "2"}), new NGram(new String[]{"3", "2", "2"}),
new NGram(new String[]{"a", "s", "d"}) new NGram(new String[]{"a", "s", "d"})
}; };
// Array of words seen after each Ngram with correlating index from above
String[][] words = new String[][] { String[][] words = new String[][] {
new String[]{"bop", "bip", "boop", "bop", "bop"}, new String[]{"bop", "bip", "boop", "bop", "bop"},
new String[]{"fum", "giants", "giants"}, new String[]{"fum", "giants", "giants"},
...@@ -148,6 +148,9 @@ public class NGramToNextChoicesMapTests extends TestsUtility { ...@@ -148,6 +148,9 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
}; };
// yes this is awful, but i can't think of a better way to do it atm // yes this is awful, but i can't think of a better way to do it atm
// Creates answers for getCountsAfter - Word seen after and count
// corrlates with words and ngrams above
// Note that words after are in sorted order, not in order of array in words
Map<NGram, Item<String, Integer>[]> answers = new TreeMap<>(); Map<NGram, Item<String, Integer>[]> answers = new TreeMap<>();
answers.put(ngrams[0], (Item<String, Integer>[]) new Item[3]); answers.put(ngrams[0], (Item<String, Integer>[]) new Item[3]);
answers.get(ngrams[0])[0] = new Item<String, Integer>("bip", 1); answers.get(ngrams[0])[0] = new Item<String, Integer>("bip", 1);
...@@ -167,12 +170,14 @@ public class NGramToNextChoicesMapTests extends TestsUtility { ...@@ -167,12 +170,14 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
answers.get(ngrams[4])[1] = new Item<String, Integer>("for", 2); answers.get(ngrams[4])[1] = new Item<String, Integer>("for", 2);
answers.get(ngrams[4])[2] = new Item<String, Integer>("while", 2); answers.get(ngrams[4])[2] = new Item<String, Integer>("while", 2);
// Adds nGrams and words after to student's NGramToNextChoicesMap
for (int i = 0; i < ngrams.length; i++) { for (int i = 0; i < ngrams.length; i++) {
for (int j = 0; j < words[i].length; j++) { for (int j = 0; j < words[i].length; j++) {
map.seenWordAfterNGram(ngrams[i], words[i][j]); map.seenWordAfterNGram(ngrams[i], words[i][j]);
} }
} }
// checks to see if getCountsAfter returns correctly
for (int i = 0; i < ngrams.length; i++) { for (int i = 0; i < ngrams.length; i++) {
NGram ngram = ngrams[i]; NGram ngram = ngrams[i];
Item<String, Integer>[] results = map.getCountsAfter(ngram); Item<String, Integer>[] results = map.getCountsAfter(ngram);
...@@ -187,12 +192,15 @@ public class NGramToNextChoicesMapTests extends TestsUtility { ...@@ -187,12 +192,15 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
}); });
Item<String, Integer>[] expected = answers.get(ngram); Item<String, Integer>[] expected = answers.get(ngram);
// checks for correct number of unique words after
if (results.length != expected.length) return 0; if (results.length != expected.length) return 0;
for (int j = 0; j < expected.length; j++) { for (int j = 0; j < expected.length; j++) {
// checks if correct word after via sorted words
if (!expected[j].key.equals(results[j].key)) { if (!expected[j].key.equals(results[j].key)) {
return 0; return 0;
} }
if (expected[j].value != results[j].value) { // checks if correct count for given word after
if (!expected[j].value.equals(results[j].value)) {
return 0; return 0;
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment