Skip to content
Snippets Groups Projects
Commit 16457b47 authored by Arshana Jain's avatar Arshana Jain
Browse files

debugged some issues; started on jrc countries

parent 87715d2e
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,100 @@ sys.path.append("..")
from util import *
def daily_jrc():
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
src_url = "https://github.com/ec-jrc/COVID-19"
set_source(src_url, c, conn)
src_id = get_source_id(src_url, c)
daily_jrc_countries(src_id, c, conn)
c.close()
# Use this for European countries only. Other countries appear to be either unreliable or have a lot of holes in their data.
# Using this in addition to JHU data, because it includes hospitalization data, while JHU does not
def daily_jrc_countries(src_id, c, conn):
prev_death_dict = {}
prev_recovered_dict = {}
prev_cases_dict = {}
i = 0
with open('jrc_countries.json', 'r') as f:
for line in f:
if i == 0:
prev_death_dict = json.loads(line)
elif i == 1:
prev_recovered_dict = json.loads(line)
elif i == 2:
prev_cases_dict = json.loads(line)
i += 1
f.close()
missing_countries_set = set(()) # used to keep track of any countries that might need to be added to the countries table - for debugging purposes
# Certain countries have strange data, regardless of if they are in the EU or not. This set has includes country names that don't seem to have that strange pattern.
acceptable_countries_set = set(("Germany", "United Kingdom", "Italy", "Spain", "Romania", "Netherlands", "Belgium", "Sweden", "Austria", "Switzerland", "Slovakia", "Norway", "Albania"))
dt = datetime.datetime.today() - datetime.timedelta(days=2)
for i in range(0, 3):
date = jrc_date(dt)
sql = '''SELECT date_collected FROM Cases_Per_Country WHERE date_collected = ? AND source_id = ?'''
c.execute(sql, (date, src_id))
already_entered = c.fetchall() != []
if not already_entered:
try:
csv = "https://raw.githubusercontent.com/ec-jrc/COVID-19/master/data-by-country/jrc-covid-19-countries-" + date + ".csv"
df = pd.read_csv(csv)
for row in df.itertuples():
if row.EUcountry is True and row.CountryName in acceptable_countries_set:
country_code = get_country_code(row.CountryName, c)
if country_code == None:
missing_countries_set.add(row.CountryName)
else:
sql = '''SELECT date_collected FROM Cases_Per_Country WHERE date_collected = ? AND source_id = ? AND country_code = ?'''
c.execute(sql, (row.Date, src_id, country_code))
already_entered = c.fetchall() != []
if not already_entered:
prev_death = 0 if country_code not in prev_death_dict else prev_death_dict[country_code]
prev_recovered = 0 if country_code not in prev_recovered_dict else prev_recovered_dict[country_code]
prev_cases = 0 if country_code not in prev_cases_dict else prev_cases_dict[country_code]
deaths = (row.CumulativeDeceased - prev_death) if isNum(row.CumulativeDeceased) else None
cases = (row.CumulativePositive - prev_cases) if isNum(row.CumulativePositive) else None
recovered = (row.CumulativeRecovered - prev_recovered) if isNum(row.CumulativeRecovered) else None
hospitalized = int(row.Hospitalized) if isNum(row.Hospitalized) else None
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(country_code, row.Date, src_id, deaths, cases, recovered, hospitalized))
if isNum(row.CumulativeDeceased):
prev_death_dict[country_code] = row.CumulativeDeceased
if isNum(row.CumulativeRecovered):
prev_recovered_dict[country_code] = row.CumulativeRecovered
if isNum(row.CumulativePositive):
prev_cases_dict[country_code] = row.CumulativePositive
conn.commit()
except:
break
dt += datetime.timedelta(days=1)
# debugging
#print(missing_countries_set)
with open('jrc_countries.json', 'w') as f:
f.write(json.dumps(prev_death_dict)+'\n')
f.write(json.dumps(prev_recovered_dict)+'\n')
f.write(json.dumps(prev_cases_dict)+'\n')
f.close()
def jrc_date(dt):
return str(dt.year) + ('0' if dt.month < 10 else '') + str(dt.month) + ('0' if dt.day < 10 else '') + str(dt.day)
# JRC includes Italy data, but not the same subsets
def daily_italy():
df_total = pd.read_csv('https://raw.githubusercontent.com/RamiKrispin/covid19Italy/master/csv/italy_total.csv', error_bad_lines=False)
......@@ -53,10 +147,10 @@ def daily_italy():
prev_death = 0 if "death" not in prev_row else prev_row["death"]
prev_recovered = 0 if "recovered" not in prev_row else prev_row["recovered"]
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(italy_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
if row.death is not "NaN":
c.execute(sql,(italy_code, row.date, italy_src, (row.death - prev_death) if isNum(row.death) else None, int(row.daily_positive_cases) if isNum(row.daily_positive_cases) else None, (row.recovered - prev_recovered) if isNum(row.recovered) else None, int(row.total_hospitalized) if isNum(row.total_hospitalized) else None))
if isNum(row.death):
prev_row["death"] = row.death
if row.recovered is not "NaN":
if isNum(row.recovered):
prev_row["recovered"] = row.recovered
conn.commit()
dt += datetime.timedelta(days=1)
......@@ -81,10 +175,10 @@ def daily_italy():
prev_death = 0 if region_code not in prev_death_dict else prev_death_dict[region_code]
prev_recovered = 0 if region_code not in prev_recovered_dict else prev_recovered_dict[region_code]
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(region_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
if row.death is not "NaN":
c.execute(sql,(region_code, row.date, italy_src, (row.death - prev_death) if isNum(row.death) else None, int(row.daily_positive_cases) if isNum(row.daily_positive_cases) else None, (row.recovered - prev_recovered) if isNum(row.recovered) else None, int(row.total_hospitalized) if isNum(row.total_hospitalized) else None))
if isNum(row.death):
prev_death_dict[region_code] = row.death
if row.recovered is not "NaN":
if isNum(row.recovered):
prev_recovered_dict[region_code] = row.recovered
conn.commit()
dt += datetime.timedelta(days=1)
......
......@@ -67,15 +67,15 @@ def daily_jhu_us_states(c, conn, src_id):
if region_code is not None:
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(region_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None, row.People_Hospitalized - prev_hospitalized if row.People_Hospitalized is not None else None))
c.execute(sql,(region_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None, (row.People_Hospitalized - prev_hospitalized) if isNum(row.People_Hospitalized) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict[region_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict[region_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict[region_code] = row.Confirmed
if row.People_Hospitalized is not None:
if isNum(row.People_Hospitalized):
prev_hospitalized_dict[region_code] = row.People_Hospitalized
else:
last_error = (row.Province_State + " was missing from the Regions table - init_jhu_us_states " + csv_name + ".")
......@@ -212,13 +212,13 @@ def daily_jhu_global(c, conn, src_id):
prev_case = 0 if country_code not in prev_case_dict else prev_case_dict[country_code]
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(country_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None))
c.execute(sql,(country_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict[country_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict[country_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict[country_code] = row.Confirmed
elif (region_name != "Recovered" and region_name != "Unknown"): # a region-level entry
# skip Recovered row - irrelevant data - be on the look out for other special cases that haven't been noticed yet
......@@ -239,13 +239,13 @@ def daily_jhu_global(c, conn, src_id):
prev_case = 0 if region_code not in prev_case_dict else prev_case_dict[region_code]
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(region_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None))
c.execute(sql,(region_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict[region_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict[region_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict[region_code] = row.Confirmed
elif (subregion_name != "Unassigned"):
subregion_code = get_district_code(region_code, str(subregion_name), c)
......@@ -260,13 +260,13 @@ def daily_jhu_global(c, conn, src_id):
prev_case = 0 if subregion_code not in prev_case_dict_subregion else prev_case_dict_subregion[subregion_code]
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(subregion_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None))
c.execute(sql,(subregion_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict_subregion[subregion_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict_subregion[subregion_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict_subregion[subregion_code] = row.Confirmed
conn.commit() # runs after every csv
......@@ -279,7 +279,7 @@ def daily_jhu_global(c, conn, src_id):
dt += datetime.timedelta(days=1)
# debugging
#print(missing_countries_set)
# print(missing_countries_set)
with open('jhu_global.json', 'w') as f:
f.write(json.dumps(prev_death_dict)+'\n')
f.write(json.dumps(prev_recovered_dict)+'\n')
......
......@@ -9,9 +9,88 @@ sys.path.append("..")
from util import *
# Use this for European countries only. Other countries appear to be either unreliable or have a lot of holes in their data.
def init_jrc():
df = pd.read_csv('https://raw.githubusercontent.com/ec-jrc/COVID-19/master/data-by-region/jrc-covid-19-all-days-by-regions.csv', error_bad_lines=False)
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
src_url = "https://github.com/ec-jrc/COVID-19"
set_source(src_url, c, conn)
src_id = get_source_id(src_url, c)
init_jrc_countries(src_id, c, conn)
c.close()
# Use this for European countries only. Other countries appear to be either unreliable or have a lot of holes in their data.
# Using this in addition to JHU data, because it includes hospitalization data, while JHU does not
def init_jrc_countries(src_id, c, conn):
df = pd.read_csv('https://raw.githubusercontent.com/ec-jrc/COVID-19/master/data-by-country/jrc-covid-19-all-days-by-country.csv', error_bad_lines=False)
prev_death_dict = {}
prev_recovered_dict = {}
prev_cases_dict = {}
missing_countries_set = set(()) # used to keep track of any countries that might need to be added to the countries table - for debugging purposes
# Certain countries have strange data, regardless of if they are in the EU or not. This set has includes country names that don't seem to have that strange pattern.
acceptable_countries_set = set(("Germany", "United Kingdom", "Italy", "Spain", "Romania", "Netherlands", "Belgium", "Sweden", "Austria", "Switzerland", "Slovakia", "Norway", "Albania"))
dt = datetime.datetime(2020, 2, 28)
while(True):
try:
date = str(dt.year) + ('0' if dt.month < 10 else '') + str(dt.month) + ('0' if dt.day < 10 else '') + str(dt.day)
csv = "https://raw.githubusercontent.com/ec-jrc/COVID-19/master/data-by-country/jrc-covid-19-countries-" + date + ".csv"
df = pd.read_csv(csv)
for row in df.itertuples():
if row.EUcountry is True and row.CountryName in acceptable_countries_set:
country_code = get_country_code(row.CountryName, c)
if country_code == None:
missing_countries_set.add(row.CountryName)
# ('RO', '2020-04-23', 1, 0, 0, 0, 0)
else:
sql = '''SELECT date_collected FROM Cases_Per_Country WHERE date_collected = ? AND source_id = ? AND country_code = ?'''
c.execute(sql, (row.Date, src_id, country_code))
already_entered = c.fetchall() != []
if not already_entered:
prev_death = 0 if country_code not in prev_death_dict else prev_death_dict[country_code]
prev_recovered = 0 if country_code not in prev_recovered_dict else prev_recovered_dict[country_code]
prev_cases = 0 if country_code not in prev_cases_dict else prev_cases_dict[country_code]
deaths = (row.CumulativeDeceased - prev_death) if isNum(row.CumulativeDeceased) else None
cases = (row.CumulativePositive - prev_cases) if isNum(row.CumulativePositive) else None
recovered = (row.CumulativeRecovered - prev_recovered) if isNum(row.CumulativeRecovered) else None
hospitalized = int(row.Hospitalized) if isNum(row.Hospitalized) else None
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(country_code, row.Date, src_id, deaths, cases, recovered, hospitalized))
if isNum(row.CumulativeDeceased):
prev_death_dict[country_code] = row.CumulativeDeceased
if isNum(row.CumulativeRecovered):
prev_recovered_dict[country_code] = row.CumulativeRecovered
if isNum(row.CumulativePositive):
prev_cases_dict[country_code] = row.CumulativePositive
else:
print(row.date + " " + country_code)
conn.commit()
except:
pass
dt += datetime.timedelta(days=1)
# debugging
#print(missing_countries_set)
with open('jrc_countries.json', 'w') as f:
f.write(json.dumps(prev_death_dict)+'\n')
f.write(json.dumps(prev_recovered_dict)+'\n')
f.write(json.dumps(prev_cases_dict)+'\n')
f.close()
def init_jrc_regions(src_id, c, conn):
pass
# JRC includes Italy data, but not the same subsets
def init_italy():
......@@ -36,10 +115,10 @@ def init_italy():
prev_death = 0 if "death" not in prev_row else prev_row["death"]
prev_recovered = 0 if "recovered" not in prev_row else prev_row["recovered"]
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(italy_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
if row.death is not "NaN":
c.execute(sql,(italy_code, row.date, italy_src, (row.death - prev_death) if isNum(row.death) else None, int(row.daily_positive_cases) if isNum(row.daily_positive_cases) else None, (row.recovered - prev_recovered) if isNum(row.recovered) else None, int(row.total_hospitalized) if isNum(row.total_hospitalized) else None))
if isNum(row.death):
prev_row["death"] = row.death
if row.recovered is not "NaN":
if isNum(row.recovered):
prev_row["recovered"] = row.recovered
conn.commit()
......@@ -56,10 +135,10 @@ def init_italy():
prev_death = 0 if region_code not in prev_death_dict else prev_death_dict[region_code]
prev_recovered = 0 if region_code not in prev_recovered_dict else prev_recovered_dict[region_code]
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(region_code, row.date, italy_src, row.death - prev_death if row.death is not "NaN" else None, int(row.daily_positive_cases) if row.daily_positive_cases is not "NaN" else None, row.recovered - prev_recovered if row.recovered is not "NaN" else None, int(row.total_hospitalized) if row.total_hospitalized is not "NaN" else None))
if row.death is not "NaN":
c.execute(sql,(region_code, row.date, italy_src, (row.death - prev_death) if isNum(row.death) else None, int(row.daily_positive_cases) if isNum(row.daily_positive_cases) else None, (row.recovered - prev_recovered) if isNum(row.recovered) else None, int(row.total_hospitalized) if isNum(row.total_hospitalized) else None))
if isNum(row.death):
prev_death_dict[region_code] = row.death
if row.recovered is not "NaN":
if isNum(row.recovered):
prev_recovered_dict[region_code] = row.recovered
conn.commit()
......
......@@ -60,15 +60,15 @@ def init_jhu_us_states(c, conn, src_id):
if region_code is not None:
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers, hospitalization_numbers) VALUES (?, ?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(region_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None, row.People_Hospitalized - prev_hospitalized if row.People_Hospitalized is not None else None))
c.execute(sql,(region_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None, (row.People_Hospitalized - prev_hospitalized) if isNum(row.People_Hospitalized) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict[region_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict[region_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict[region_code] = row.Confirmed
if row.People_Hospitalized is not None:
if isNum(row.People_Hospitalized):
prev_hospitalized_dict[region_code] = row.People_Hospitalized
else:
last_error = (row.Province_State + " was missing from the Regions table - init_jhu_us_states " + csv_name + ".")
......@@ -181,13 +181,13 @@ def init_jhu_global(c, conn, src_id):
prev_case = 0 if country_code not in prev_case_dict else prev_case_dict[country_code]
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(country_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None))
c.execute(sql,(country_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict[country_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict[country_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict[country_code] = row.Confirmed
elif (region_name != "Recovered" and region_name != "Unknown"): # a region-level entry
# skip Recovered row - irrelevant data - be on the look out for other special cases that haven't been noticed yet
......@@ -208,13 +208,13 @@ def init_jhu_global(c, conn, src_id):
prev_case = 0 if region_code not in prev_case_dict else prev_case_dict[region_code]
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(region_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None))
c.execute(sql,(region_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict[region_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict[region_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict[region_code] = row.Confirmed
elif (subregion_name != "Unassigned"):
subregion_code = get_district_code(region_code, str(subregion_name), c)
......@@ -229,13 +229,13 @@ def init_jhu_global(c, conn, src_id):
prev_case = 0 if subregion_code not in prev_case_dict_subregion else prev_case_dict_subregion[subregion_code]
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
# handles the case of a blank column by inserting None
c.execute(sql,(subregion_code, date, src_id, row.Deaths - prev_death if row.Deaths is not None else None, row.Confirmed - prev_case if row.Confirmed is not None else None, row.Recovered - prev_recovered if row.Recovered is not None else None))
c.execute(sql,(subregion_code, date, src_id, (row.Deaths - prev_death) if isNum(row.Deaths) else None, (row.Confirmed - prev_case) if isNum(row.Confirmed) else None, (row.Recovered - prev_recovered) if isNum(row.Recovered) else None))
# update previous
if row.Deaths is not None:
if isNum(row.Deaths):
prev_death_dict_subregion[subregion_code] = row.Deaths
if row.Recovered is not None:
if isNum(row.Recovered):
prev_recovered_dict_subregion[subregion_code] = row.Recovered
if row.Confirmed is not None:
if isNum(row.Confirmed):
prev_case_dict_subregion[subregion_code] = row.Confirmed
conn.commit() # runs after every csv
......@@ -248,7 +248,8 @@ def init_jhu_global(c, conn, src_id):
dt += datetime.timedelta(days=1)
# debugging
#print(missing_countries_set)
# print(missing_countries_set)
with open('jhu_global.json', 'w') as f:
f.write(json.dumps(prev_death_dict)+'\n')
f.write(json.dumps(prev_recovered_dict)+'\n')
......
......@@ -213,3 +213,4 @@ c.close()
from initial_data_scripts.init_europe import init_italy, init_ukraine
from initial_data_scripts.init_asia import init_japan, init_korea, init_ina
from initial_data_scripts.init_global import init_jhu
from daily_data_scripts.daily_global import daily_jhu
import pandas as pd
# Get country code associated with country_name from Countries table.
def get_country_code(country_name, c):
c.execute('SELECT country_code FROM Countries WHERE country_name = "' + country_name + '"')
......@@ -31,3 +33,6 @@ def set_source(source_info, c, conn):
c.execute("INSERT INTO Sources (source_information) VALUES('" + source_info + "');")
conn.commit()
# Check if a value is a number
def isNum(value):
return (value is not None) and (not pd.isna(value)) and (str(value).lower() != "nan")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment