Skip to content
Snippets Groups Projects
Commit c1070c75 authored by Ivy Wang's avatar Ivy Wang
Browse files

update age table, add init and update for canada

parent a1342843
No related branches found
No related tags found
No related merge requests found
......@@ -89,12 +89,12 @@ def update_korea():
age_group = ["0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "over 80"]
for index, row in korea_age.iterrows():
date1 = row[0].date()
c.execute('SELECT * FROM Age_Per_Country WHERE country_id ="' + korea_code + '" AND date_collected ="' + str(date1)+ '"')
c.execute('SELECT * FROM Age_Per_Country WHERE country_code ="' + korea_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
for i in range(0, 9):
case = row[i + 2]
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
sql = '''INSERT INTO Age_Per_Country (date_collected, country_code, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, korea_code, korea_src,age_group[i], case))
else:
break
......@@ -199,7 +199,7 @@ def update_japan():
for index,row in japan_age.iterrows():
d = row[0].find("~")
date1 = datetime.datetime.strptime(row[0][d + 1:], "%Y/%m/%d").date()
c.execute('SELECT * FROM Age_Per_Country WHERE country_id ="' + japan_code + '" AND date_collected ="' + str(date1)+ '"')
c.execute('SELECT * FROM Age_Per_Country WHERE country_code ="' + japan_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
date1 = datetime.datetime.strptime(row[0][:d], "%Y/%m/%d").date()
......@@ -215,7 +215,7 @@ def update_japan():
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
sql = '''INSERT INTO Age_Per_Country (date_collected, country_code, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, japan_code, japan_src1, age, case))
else:
for j in range(0, 20):
......@@ -225,7 +225,7 @@ def update_japan():
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
sql = '''INSERT INTO Age_Per_Region (date_collected, region_code, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, region_dict[cities[i]], japan_src1, age, case))
date1 = date1 + datetime.timedelta(days=1)
else:
......@@ -317,7 +317,7 @@ def ina():
#insert country,region age data
date1 = ina_age_nation["Date"][0]
c.execute('SELECT * FROM Age_Per_Country WHERE country_id ="' + ina_code + '" AND date_collected ="' + str(date1)+ '"')
c.execute('SELECT * FROM Age_Per_Country WHERE country_code ="' + ina_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
c.execute('SELECT * FROM Cases_Per_Country WHERE country_code ="' + ina_code + '" AND date_collected ="' + str(date1)+ '"')
......@@ -328,7 +328,7 @@ def ina():
recovery = round(row["sembuh"] * result[0][5] / 100)
hos = round(row["perawatan"] * result[0][6] / 100)
death = round(row["meninggal"] * result[0][3] / 100)
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
sql = '''INSERT INTO Age_Per_Country (date_collected, country_code, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
c.execute(sql,(row["Date"], ina_code, ina_src, row["SubCategory"], case, recovery, hos, death))
conn.commit()
......@@ -349,7 +349,7 @@ def ina():
recovery = round(row["sembuh"] * result[2] / 100)
hos = round(row["perawatan"] * result[3] / 100)
death = round(row["meninggal"] * result[0] / 100)
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
sql = '''INSERT INTO Age_Per_Region (date_collected, region_code, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
c.execute(sql,(row["Date"], region_dict[row["Location"]], ina_src, row["SubCategory"], case, recovery, hos, death))
conn.commit()
......
......@@ -14,6 +14,13 @@ from urllib.request import urlopen
from bs4 import BeautifulSoup
from urllib.request import urlopen
def toint(s):
if pd.isna(s):
s = "NULL"
else:
s = int(s)
return s
#update country and county level case data and vaccination data for country and state
def update_us():
conn = sqlite3.connect('prototype_db')
......@@ -106,4 +113,71 @@ def update_us():
c.execute(sql,(row["Date"], row["Admin_Dose_1_Cumulative"], row["Series_Complete_Cumulative"], row["Booster_Cumulative"], region_dict[abb[row["Location"]]], us_src_v))
else:
break
conn.commit()
def update_canada():
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
# get country_code for Canada
ca_code = get_country_code("Canada", c)
#get source id for Canada data
ca_src_url = "https://health-infobase.canada.ca/covid-19/epidemiological-summary-covid-19-cases.html?redir=1#a8"
ca_src = get_source_id(ca_src_url, c)
ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
#insert country and region case data
ca_case = ca_case[::-1]
for index, row in ca_case.iterrows():
region = row["prname"]
case = row["numconf"]
death = toint(row["numdeaths"])
recover = toint(row["numrecover"])
if region == "Canada":
date1 = row['date']
c.execute('SELECT * FROM Cases_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(ca_code, row["date"], ca_src, death, case, recover))
else:
break
else:
region_code = get_region_code(region_dict[state], county, c)
date1 = row['date']
c.execute('SELECT * FROM Cases_Per_District WHERE district_code=' + str(county_code) + ' AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(region_code, row["date"], ca_src, death, case, recover))
else:
break
conn.commit()
#insert country and region vaccination data
ca_v = ca_v[::-1]
for index, row in ca_v.iterrows():
region = row["prename"]
first = row["numtotal_atleast1dose"]
second = toint(row["numtotal_fully"])
third = toint(row["numtotal_additional"])
if region == "Canada":
date1 = row["week_end"]
c.execute('SELECT * FROM Vaccinations_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["week_end"], first, second, third, ca_code, ca_src))
else:
break
else:
date1 = row["week_end"]
c.execute('SELECT * FROM Vaccinations_Per_Region WHERE region_code ="' + region_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
conn.commit()
\ No newline at end of file
......@@ -96,7 +96,7 @@ def init_japan():
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
sql = '''INSERT INTO Age_Per_Country (date_collected, country_code, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, japan_code, japan_src1, age, case))
else:
for j in range(0, 20):
......@@ -106,7 +106,7 @@ def init_japan():
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
sql = '''INSERT INTO Age_Per_Region (date_collected, region_code, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, region_dict[cities[i]], japan_src1, age, case))
date1 = date1 + datetime.timedelta(days=1)
conn.commit()
......@@ -218,7 +218,7 @@ def init_korea():
date1 = row[0].date()
for i in range(0, 9):
case = row[i + 2]
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
sql = '''INSERT INTO Age_Per_Country (date_collected, country_code, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, korea_code, korea_src,age_group[i], case))
conn.commit()
......@@ -341,7 +341,7 @@ def init_ina():
recovery = round(row["sembuh"] * result[0][5] / 100)
hos = round(row["perawatan"] * result[0][6] / 100)
death = round(row["meninggal"] * result[0][3] / 100)
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
sql = '''INSERT INTO Age_Per_Country (date_collected, country_code, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
c.execute(sql,(row["Date"], ina_code, ina_src, row["SubCategory"], case, recovery, hos, death))
conn.commit()
......@@ -359,7 +359,7 @@ def init_ina():
recovery = round(row["sembuh"] * result[2] / 100)
hos = round(row["perawatan"] * result[3] / 100)
death = round(row["meninggal"] * result[0] / 100)
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
sql = '''INSERT INTO Age_Per_Region (date_collected, region_code, source_id, age_group, case_number, recovery_number, hospitalization_number, death_number) VALUES (?, ?, ?, ?, ?, ?, ? ,?)'''
c.execute(sql,(row["Date"], region_dict[row["Location"]], ina_src, row["SubCategory"], case, recovery, hos, death))
conn.commit()
......
......@@ -15,6 +15,13 @@ from urllib.request import urlopen
from bs4 import BeautifulSoup
from urllib.request import urlopen
def toint(s):
if pd.isna(s):
s = "NULL"
else:
s = int(s)
return s
#add country and county level case data and vaccination data for country and state
def init_us():
conn = sqlite3.connect('prototype_db')
......@@ -120,4 +127,52 @@ def init_us():
else:
sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["Date"], row["Admin_Dose_1_Cumulative"], row["Series_Complete_Cumulative"], row["Booster_Cumulative"], region_dict[abb[row["Location"]]], us_src_v))
conn.commit()
def init_canada():
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
# get country_code for Canada
ca_code = get_country_code("Canada", c)
#insert and get source id for Canada data
ca_src_url = "https://health-infobase.canada.ca/covid-19/epidemiological-summary-covid-19-cases.html?redir=1#a8"
set_source(ca_src_url, c, conn)
ca_src = get_source_id(ca_src_url, c)
ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
#insert country and region case data
region_dict = {}
for index, row in ca_case.iterrows():
region = row["prname"]
case = row["numconf"]
death = toint(row["numdeaths"])
recover = toint(row["numrecover"])
if region == "Canada":
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(ca_code, row["date"], ca_src, death, case, recover))
else:
if region not in region_dict:
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
c.execute(sql,(region, ca_code))
region_dict[region] = get_region_code(ca_code, region, c)
sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(region_dict[region], row["date"], ca_src, death, case, recover))
conn.commit()
#insert country and region vaccination data
for index, row in ca_v.iterrows():
region = row["prename"]
first = row["numtotal_atleast1dose"]
second = toint(row["numtotal_fully"])
third = toint(row["numtotal_additional"])
if region == "Canada":
sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["week_end"], first, second, third, ca_code, ca_src))
else:
sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
conn.commit()
\ No newline at end of file
......@@ -147,7 +147,7 @@ c.execute('''CREATE TABLE Population_Per_District(
c.execute('''CREATE TABLE Age_Per_Country(
date_collected DATETIME2 NOT NULL,
country_id VARCHAR(3),
country_code VARCHAR(3),
source_id BIGINT NOT NULL,
age_group VARCHAR(64) NOT NULL,
case_number INT NULL,
......@@ -158,14 +158,14 @@ c.execute('''CREATE TABLE Age_Per_Country(
recovery_rate FLOAT NULL,
hospitalization_rate FLOAT NULL,
death_rate FLOAT NULL,
FOREIGN KEY (country_id) REFERENCES Countries(country_code),
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
''')
c.execute('''CREATE TABLE Age_Per_Region(
date_collected DATETIME2 NOT NULL,
region_id BIGINT,
region_code BIGINT,
source_id BIGINT NOT NULL,
age_group VARCHAR(64) NOT NULL,
case_number INT NULL,
......@@ -176,7 +176,7 @@ c.execute('''CREATE TABLE Age_Per_Region(
recovery_rate FLOAT NULL,
hospitalization_rate FLOAT NULL,
death_rate FLOAT NULL,
FOREIGN KEY (region_id) REFERENCES Regions(region_code),
FOREIGN KEY (region_code) REFERENCES Regions(region_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
......@@ -184,7 +184,7 @@ c.execute('''CREATE TABLE Age_Per_Region(
c.execute('''CREATE TABLE Age_Per_District(
date_collected DATETIME2 NOT NULL,
district_id BIGINT,
district_code BIGINT,
source_id BIGINT NOT NULL,
age_group VARCHAR(64) NOT NULL,
case_number INT NULL,
......@@ -195,7 +195,7 @@ c.execute('''CREATE TABLE Age_Per_District(
recovery_rate FLOAT NULL,
hospitalization_rate FLOAT NULL,
death_rate FLOAT NULL,
FOREIGN KEY (district_id) REFERENCES Districts(district_code),
FOREIGN KEY (district_code) REFERENCES Districts(district_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment