Skip to content
Snippets Groups Projects
Commit b23d5e18 authored by Dan Suciu's avatar Dan Suciu
Browse files

working files

parent 88da5092
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
create table Pub (k text, p text);
create table Field (k text, i text, p text, v text);
copy Pub from 'pubFile.txt';
copy Field from 'fieldFile.txt';
#!/usr/bin/python
import psycopg2
def main():
try:
conn = psycopg2.connect("dbname='dblp' user='<YOUR USER NAME>' host='localhost' password=''")
except psycopg2.Error, e:
print "I am unable to connect to the database"
cur = conn.cursor()
cur.execute("SELECT * FROM author LIMIT 10")
rows = cur.fetchall()
print "Showing first 10 results:\n"
for row in rows:
print row[0], row[1]
if __name__ == "__main__":
main()
import xml.sax
import re
class DBLPContentHandler(xml.sax.ContentHandler):
"""
Reads the dblp.xml file and produces two output files.
pubFile.txt = (key, pubtype) tuples
fieldFile.txt = (key, fieldCnt, field, value) tuples
Each file is tab-separated
Once the program finishes, load these two files in a relational database; run createSchema.sql
"""
def __init__(self):
xml.sax.ContentHandler.__init__(self)
def startElement(self, name, attrs):
if name == "dblp":
DBLPContentHandler.pubFile = open('pubFile.txt', 'w')
DBLPContentHandler.fieldFile = open('fieldFile.txt', 'w')
DBLPContentHandler.pubList = ["article", "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"]
DBLPContentHandler.fieldList = ["author", "editor", "title", "booktitle", "pages", "year", "address", "journal", "volume", "number", "month", "url", "ee", "cdrom", "cite", "publisher", "note", "crossref", "isbn", "series", "school", "chapter"]
DBLPContentHandler.content = ""
if name in DBLPContentHandler.pubList:
DBLPContentHandler.key = attrs.getValue("key")
DBLPContentHandler.pub = name
DBLPContentHandler.fieldCount = 0
DBLPContentHandler.content = ""
if name in DBLPContentHandler.fieldList:
DBLPContentHandler.field = name
DBLPContentHandler.content = ""
def endElement(self, name):
if name in DBLPContentHandler.fieldList:
DBLPContentHandler.fieldFile.write(DBLPContentHandler.key)
DBLPContentHandler.fieldFile.write("\t")
DBLPContentHandler.fieldFile.write(str(DBLPContentHandler.fieldCount))
DBLPContentHandler.fieldFile.write( "\t")
DBLPContentHandler.fieldFile.write(DBLPContentHandler.field)
DBLPContentHandler.fieldFile.write("\t")
DBLPContentHandler.fieldFile.write(DBLPContentHandler.content)
DBLPContentHandler.fieldFile.write("\n")
DBLPContentHandler.fieldCount += 1
if name in DBLPContentHandler.pubList:
DBLPContentHandler.pubFile.write(DBLPContentHandler.key)
DBLPContentHandler.pubFile.write("\t")
DBLPContentHandler.pubFile.write(DBLPContentHandler.pub)
DBLPContentHandler.pubFile.write("\n")
def characters(self, content):
DBLPContentHandler.content += content.replace('\\','\\\\')
def main(sourceFileName):
source = open(sourceFileName)
xml.sax.parse(source, DBLPContentHandler())
if __name__ == "__main__":
main("dblp.xml")
put your .sql files in this directory, one file per question.
#!/bin/bash
test -z "$1" && echo "Usage example: $0 hw1" && exit 1
#check no uncommitted changes.
(git status | grep -q modified:) && echo 'Error. There are uncommitted changes in your working directory. You can do "git status" to see them.
Please commit or stash uncommitted changes before submitting' && exit 1
COMMIT=$(git log | head -n 1 | cut -b 1-14)
if (git tag $1 2>/dev/null)
then
echo "Created tag '$1' pointing to $COMMIT"
else
git tag -d $1 && git tag $1
echo "Re-creating tag '$1'... (now $COMMIT)"
fi
echo "Now syncing with origin..."
git push origin --mirror #--atomic
echo "Please verify in gitlab that your tag '$1' matches what you expect. "
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment