Skip to content
Snippets Groups Projects
Commit 00f004f1 authored by Apollo's avatar Apollo
Browse files

Merge branch 'master' of gitlab.cs.washington.edu:cse332-19wi/p2

parents 23e6556d 025fa2a7
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@ import cse332.datastructures.trees.BinarySearchTree;
* TODO: Replace this comment with your own as appropriate.
*
* AVLTree must be a subclass of BinarySearchTree<E> and must use
* inheritance and callst o superclass methods to avoid unnecessary
* inheritance and calls to superclass methods to avoid unnecessary
* duplication or copying of functionality.
*
* 1. Create a subclass of BSTNode, perhaps named AVLNode.
......
......@@ -14,8 +14,10 @@ import cse332.interfaces.misc.Dictionary;
* restrict the size of the input domain (i.e., it must accept
* any key) or the number of inputs (i.e., it must grow as necessary).
* 3. Your HashTable should rehash as appropriate (use load factor as
* shown in class).
* 5. HashTable should be able to grow at least up to 200,000 elements.
* shown in class!).
* 5. HashTable should be able to resize its capacity to prime numbers for more
* than 200,000 elements. After more than 200,000 elements, it should
* continue to resize using some other mechanism.
* 6. We suggest you hard code some prime numbers. You can use this
* list: http://primes.utm.edu/lists/small/100000.txt
* NOTE: Do NOT copy the whole list!
......
package p2.wordsuggestor;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Stack;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import cse332.interfaces.worklists.LIFOWorkList;
import datastructures.worklists.ArrayStack;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.json.simple.JSONArray;
public final class ParseFBMessages {
private ParseFBMessages() {
......@@ -29,38 +28,38 @@ public final class ParseFBMessages {
// (e.g. "/Users/Me/Downloads/MyArchiveName" or "C:\Users\Me\Downloads\MyArchiveName")
// You may be able to use a relative path like "./MyArchiveName", but results can
// vary from machine to machine.
//
// DO NOT PUSH YOUR ME.TXT FILE TO GITLAB. WE DO NOT WANT YOUR PRIVATE CONVERSATIONS!!!!
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.out.println("USAGE: ParseFBMessages <Your FB Name> <Your FB Archive>");
System.exit(1);
}
// Note: you can replace these with your FB Name and Archive instead of
// using the command line if you'd like.
String name = args[0];
String archive = args[1];
String name = "<Your FB Name>"; // e.g. "Ruth Anderson"
String archive = "<Your FB Archive>"; // e.g. "/Users/rea/workspace/332/facebook-rea/messages"
LIFOWorkList<String> messages = new ArrayStack<String>();
File[] listOfFiles = (new File(archive + File.separator + "messages")).listFiles();
Stack<String> corpus = new Stack<>();
File[] listOfFiles = (new File(archive + File.separator + "inbox")).listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].isFile()) {
Document doc = Jsoup
.parse(listOfFiles[i], "UTF-8");
Elements messagesElements = doc.getElementsByTag("p");
for (Element content : messagesElements) {
if (content.previousElementSibling().getElementsByClass("user").text()
.equals(name)) {
messages.add(content.text());
}
}
}
File conversation = new File(listOfFiles[i], "message.json");
if (conversation.isFile()) {
try {
JSONObject obj = (JSONObject) new JSONParser().parse(new FileReader(conversation));
JSONArray messages = (JSONArray) obj.get("messages");
for (Object m: messages) {
JSONObject msg = (JSONObject) m;
String sender = (String) msg.get("sender_name");
if(sender != null && sender.equals(name)) {
corpus.push((String) msg.get("content"));
}
}
} catch (ParseException e) {
System.err.println("Could not parse: " + conversation.toString());
}
}
}
PrintWriter out = new PrintWriter("me.txt", "UTF-8");
while (messages.hasWork()) {
out.println(messages.next());
while (!corpus.isEmpty()) {
out.println(corpus.pop());
}
out.close();
......
......@@ -95,7 +95,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
if (items.length != answer.length) return 0;
String[] itemsWithoutCounts = new String[items.length];
for (int j = 0; j < answer.length; j++) {
if (items[j].value != 1) return 0;
if (!items[j].value.equals(1)) return 0;
itemsWithoutCounts[j] = items[j].key;
}
Arrays.sort(itemsWithoutCounts);
......@@ -127,10 +127,10 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
return 1;
}
// TODO: Not finished yet
@SuppressWarnings("unchecked")
public static int testRepeatedWordsPerNGram() {
NGramToNextChoicesMap map = init();
// Creates Ngrams to test for with N = 3
NGram[] ngrams = new NGram[]{
new NGram(new String[]{"foo", "bar", "baz"}),
new NGram(new String[]{"fee", "fi", "fo"}),
......@@ -138,7 +138,7 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
new NGram(new String[]{"3", "2", "2"}),
new NGram(new String[]{"a", "s", "d"})
};
// Array of words seen after each Ngram with correlating index from above
String[][] words = new String[][] {
new String[]{"bop", "bip", "boop", "bop", "bop"},
new String[]{"fum", "giants", "giants"},
......@@ -148,6 +148,9 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
};
// yes this is awful, but i can't think of a better way to do it atm
// Creates answers for getCountsAfter - Word seen after and count
// corrlates with words and ngrams above
// Note that words after are in sorted order, not in order of array in words
Map<NGram, Item<String, Integer>[]> answers = new TreeMap<>();
answers.put(ngrams[0], (Item<String, Integer>[]) new Item[3]);
answers.get(ngrams[0])[0] = new Item<String, Integer>("bip", 1);
......@@ -167,12 +170,14 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
answers.get(ngrams[4])[1] = new Item<String, Integer>("for", 2);
answers.get(ngrams[4])[2] = new Item<String, Integer>("while", 2);
// Adds nGrams and words after to student's NGramToNextChoicesMap
for (int i = 0; i < ngrams.length; i++) {
for (int j = 0; j < words[i].length; j++) {
map.seenWordAfterNGram(ngrams[i], words[i][j]);
}
}
// checks to see if getCountsAfter returns correctly
for (int i = 0; i < ngrams.length; i++) {
NGram ngram = ngrams[i];
Item<String, Integer>[] results = map.getCountsAfter(ngram);
......@@ -187,12 +192,15 @@ public class NGramToNextChoicesMapTests extends TestsUtility {
});
Item<String, Integer>[] expected = answers.get(ngram);
// checks for correct number of unique words after
if (results.length != expected.length) return 0;
for (int j = 0; j < expected.length; j++) {
// checks if correct word after via sorted words
if (!expected[j].key.equals(results[j].key)) {
return 0;
}
if (expected[j].value != results[j].value) {
// checks if correct count for given word after
if (!expected[j].value.equals(results[j].value)) {
return 0;
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment