Skip to content
Snippets Groups Projects
Commit f201ef5a authored by Arshana Jain's avatar Arshana Jain
Browse files

init_italy() finished

parent 753fda67
No related branches found
No related tags found
No related merge requests found
......@@ -22,7 +22,7 @@ CREATE TABLE Regions(
CREATE TABLE Districts(
district_code BIGINT IDENTITY(1, 1) PRIMARY KEY,
district_name VARCHAR(128) NOT NULL,
region_code VARCHAR(5) NOT NULL,
region_code BIGINT NOT NULL,
longitude FLOAT NULL,
latitude FLOAT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code)
......@@ -45,6 +45,7 @@ CREATE TABLE Cases_Per_Country(
death_numbers INT NULL,
case_numbers INT NULL,
recovery_numbers INT NULL,
hospitalization_numbers INT NULL,
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
......@@ -58,6 +59,7 @@ CREATE TABLE Cases_Per_Region(
death_numbers INT NULL,
case_numbers INT NULL,
recovery_numbers INT NULL,
hospitalization_numbers INT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
......@@ -71,6 +73,7 @@ CREATE TABLE Cases_Per_District(
death_numbers INT NULL,
case_numbers INT NULL,
recovery_numbers INT NULL,
hospitalization_numbers INT NULL,
FOREIGN KEY (district_code) REFERENCES Districts(district_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
......
......@@ -26,13 +26,62 @@ def init_italy():
# get country_code
italy_code = get_country_code("Italy", c)
print(italy_code)
# insert and get source id for source
italy_src_url = "https://github.com/RamiKrispin/covid19italy"
set_source(italy_src_url, c, conn)
italy_src = get_source_id(italy_src_url, c)
print(italy_src)
# insert total
prev_row = None
for row in df_total.itertuples():
prev_death = 0 if prev_row is None else prev_row.death
prev_recovered = 0 if prev_row is None else prev_row.recovered
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(italy_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
prev_row = row
conn.commit()
# set up regions
src_region_codes = df_region["region_code"].unique()
for src_code in src_region_codes:
region_rows = df_region.loc[df_region['region_code'] == src_code]
region_row = region_rows.iloc[0]
sql = '''INSERT INTO Regions (region_name, country_code, longitude, latitude) VALUES (?, ?, ?, ?)'''
c.execute(sql,(region_row.region_name, italy_code, region_row.long, region_row.lat))
conn.commit()
# insert regions
region_code = get_region_code(italy_code, region_row.region_name, c)
prev_death_dict = {}
prev_recovered_dict = {}
for row in df_region.itertuples():
region_code = get_region_code(italy_code, row.region_name, c)
prev_death = 0 if region_code not in prev_death_dict else prev_death_dict[region_code]
prev_recovered = 0 if region_code not in prev_recovered_dict else prev_recovered_dict[region_code]
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(region_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
if row.death is not "NaN":
prev_death_dict[region_code] = row.death
if row.recovered is not "NaN":
prev_recovered_dict[region_code] = row.recovered
conn.commit()
# set up + insert subregion
src_subregion_codes = df_subregion["province_code"].unique()
for src_code in src_subregion_codes:
subregion_rows = df_subregion.loc[df_subregion['province_code'] == src_code]
subregion_row = subregion_rows.iloc[0]
region_code = get_region_code(italy_code, subregion_row.region_name, c)
sql = '''INSERT INTO Districts (district_name, region_code, longitude, latitude) VALUES (?, ?, ?, ?)'''
c.execute(sql,(subregion_row.province_name, region_code, subregion_row.long, subregion_row.lat))
conn.commit()
subregion_code = get_district_code(region_code, subregion_row.province_name, c)
for i in range(len(subregion_rows)):
row = subregion_rows.iloc[i]
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, case_numbers) VALUES (?, ?, ?, ?)'''
c.execute(sql,(subregion_code, row.date, italy_src, int(row.new_cases)))
conn.commit()
conn.close()
......@@ -82,3 +131,4 @@ def init_ukraine():
def init_france():
pass
init_italy()
\ No newline at end of file
......@@ -6,6 +6,17 @@ def get_country_code(country_name, c):
result = c.fetchall()
return result[0][0]
def get_region_code(country_code, region_name, c):
c.execute('SELECT region_code FROM Regions WHERE country_code = "' + country_code + '" AND region_name = "' + region_name + '"')
result = c.fetchall()
return result[0][0]
# TODO Why does this insist on casting region_code to str
def get_district_code(region_code, district_name, c):
c.execute('SELECT district_code FROM Districts WHERE region_code = "' + str(region_code) + '" AND district_name = "' + district_name + '"')
result = c.fetchall()
return result[0][0]
# source_info is typically a general url for the data source
def set_source(source_info, c, conn):
c.execute("INSERT INTO Sources (source_information) VALUES('" + source_info + "');")
......@@ -40,7 +51,7 @@ c.execute('''
CREATE TABLE Districts(
district_code INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
district_name VARCHAR(128) NOT NULL,
region_code VARCHAR(5) NOT NULL,
region_code BIGINT NOT NULL,
longitude FLOAT NULL,
latitude FLOAT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code)
......@@ -62,6 +73,7 @@ c.execute('''
death_numbers INT NULL,
case_numbers INT NULL,
recovery_numbers INT NULL,
hospitalization_numbers INT NULL,
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
)
......@@ -75,6 +87,7 @@ c.execute('''
death_numbers INT NULL,
case_numbers INT NULL,
recovery_numbers INT NULL,
hospitalization_numbers INT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
)
......@@ -88,6 +101,7 @@ c.execute('''
death_numbers INT NULL,
case_numbers INT NULL,
recovery_numbers INT NULL,
hospitalization_numbers INT NULL,
FOREIGN KEY (district_code) REFERENCES Districts(district_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
)
......@@ -154,4 +168,73 @@ countries = pd.read_csv('country_countryCode.csv')
countries = countries.rename(columns={"Name": "country_name", "Code": "country_code"})
countries.to_sql('Countries',con=conn, if_exists = 'append', index=False)
c.close()
\ No newline at end of file
c.close()
def init_italy():
df_total = pd.read_csv('https://raw.githubusercontent.com/RamiKrispin/covid19Italy/master/csv/italy_total.csv', error_bad_lines=False)
df_region = pd.read_csv('https://raw.githubusercontent.com/RamiKrispin/covid19Italy/master/csv/italy_region.csv', error_bad_lines=False)
df_subregion = pd.read_csv('https://raw.githubusercontent.com/RamiKrispin/covid19Italy/master/csv/italy_province.csv', error_bad_lines=False)
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
# get country_code
italy_code = get_country_code("Italy", c)
# insert and get source id for source
italy_src_url = "https://github.com/RamiKrispin/covid19italy"
set_source(italy_src_url, c, conn)
italy_src = get_source_id(italy_src_url, c)
# insert total
prev_row = None
for row in df_total.itertuples():
prev_death = 0 if prev_row is None else prev_row.death
prev_recovered = 0 if prev_row is None else prev_row.recovered
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(italy_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
prev_row = row
conn.commit()
# set up regions
src_region_codes = df_region["region_code"].unique()
for src_code in src_region_codes:
region_rows = df_region.loc[df_region['region_code'] == src_code]
region_row = region_rows.iloc[0]
sql = '''INSERT INTO Regions (region_name, country_code, longitude, latitude) VALUES (?, ?, ?, ?)'''
c.execute(sql,(region_row.region_name, italy_code, region_row.long, region_row.lat))
conn.commit()
# insert regions
region_code = get_region_code(italy_code, region_row.region_name, c)
prev_death_dict = {}
prev_recovered_dict = {}
for row in df_region.itertuples():
region_code = get_region_code(italy_code, row.region_name, c)
prev_death = 0 if region_code not in prev_death_dict else prev_death_dict[region_code]
prev_recovered = 0 if region_code not in prev_recovered_dict else prev_recovered_dict[region_code]
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(region_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
if row.death is not "NaN":
prev_death_dict[region_code] = row.death
if row.recovered is not "NaN":
prev_recovered_dict[region_code] = row.recovered
conn.commit()
# set up + insert subregion
src_subregion_codes = df_subregion["province_code"].unique()
for src_code in src_subregion_codes:
subregion_rows = df_subregion.loc[df_subregion['province_code'] == src_code]
subregion_row = subregion_rows.iloc[0]
region_code = get_region_code(italy_code, subregion_row.region_name, c)
sql = '''INSERT INTO Districts (district_name, region_code, longitude, latitude) VALUES (?, ?, ?, ?)'''
c.execute(sql,(subregion_row.province_name, region_code, subregion_row.long, subregion_row.lat))
conn.commit()
subregion_code = get_district_code(region_code, subregion_row.province_name, c)
for i in range(len(subregion_rows)):
row = subregion_rows.iloc[i]
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, case_numbers) VALUES (?, ?, ?, ?)'''
c.execute(sql,(subregion_code, row.date, italy_src, int(row.new_cases)))
conn.commit()
conn.close()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment