Skip to content
Snippets Groups Projects
Commit e1c84417 authored by Ivy Wang's avatar Ivy Wang
Browse files

add strain data for canada and data for guatemala

parent 87715d2e
No related branches found
No related tags found
No related merge requests found
......@@ -128,6 +128,7 @@ def update_canada():
ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
ca_s = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-epiSummary-variants.csv")
#insert country and region case data
ca_case = ca_case[::-1]
......@@ -180,4 +181,72 @@ def update_canada():
if len(result) == 0:
sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
conn.commit()
ca_strain = {}
for index, row in ca_s.iterrows():
if row["Variant Grouping"] == "VOC":
if row["Collection (week)"] not in ca_strain:
ca_strain[row["Collection (week)"]] = {"Alpha":0 , "Beta": 0, "Gamma" :0, "Delta": 0, "Omicron": 0}
ca_strain[row["Collection (week)"]][row["_Identifier"]] = ca_strain[row["Collection (week)"]][row["_Identifier"]] + row["%CT Count of Sample #"]
for date1 in ca_strain:
c.execute('SELECT * FROM Strains_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Strains_Per_Country (date_collected, country_code, source_id, alpha_rate, beta_rate, gamma_rate, delta_rate, omicron_rate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(date, ca_code, ca_src, ca_strain[date]["Alpha"], ca_strain[date]["Beta"], ca_strain[date]["Gamma"],ca_strain[date]["Delta"], ca_strain[date]["Omicron"]))
conn.commit()
def update_guatemala():
# get country_code for Guatemala
gu_code = get_country_code("Guatemala", c)
#get source id for US data
gu_src_url = "https://tablerocovid.mspas.gob.gt/"
gu_src = get_source_id(gu_src_url, c)
v_src = "https://github.com/owid/covid-19-data"
v_src = get_source_id(v_src, c)
#gu_death = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/fallecidosFF?w=0d14592e")
#gu_case = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/confirmadosFER?w=0d14592e")
gu_v = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/country_data/Guatemala.csv")
gu_case = pd.read_csv("Confirmados.csv")
gu_death = pd.read_csv("Fallecidos.csv")
gu = pd.merge(gu_case, gu_death, on=["departamento", "municipio"])
#insert district case data and population data for guatemala
region_dict = {}
city_dict = {}
for index, row in gu.iterrows():
if index >= 1:
region = row["departamento"]
city = row["municipio"]
for i in range(len(row) - 1, 4, -1):
if "_y" in gu.columns[i]:
date1 = gu.columns[i].replace("_y", "")
district_code = get_district_code(get_region_code(region), city, c)
c.execute('SELECT * FROM Cases_Per_District WHERE district_code=' + str(district_code) + ' AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
case = check(row[date1 + "_x"])
death = check(row[row[i]])
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(district_code, date1, gu_src, death, case))
else:
break
conn.commit()
#insert vaccination country data for guatemala
gu_v = gu_v[::-1]
for index, row in gu_v.iterrows():
date1 = row['date']
c.execute('SELECT * FROM Vaccinations_Per_Country WHERE country_code ="' + gu_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql, (row["date"], toint(row["people_vaccinated"]), toint(row["people_fully_vaccinated"]), toint(row["total_boosters"]), gu_code, v_src))
else:
break
conn.commit()
\ No newline at end of file
......@@ -113,39 +113,42 @@ CREATE TABLE Vaccinations_Per_District(
-- keeps track of strain data per country
CREATE TABLE Strains_Per_Country(
country_code VARCHAR(3) PRIMARY KEY,
date_collected DATETIME2 NOT NULL,
country_code VARCHAR(3),
source_id BIGINT NOT NULL,
alpha_rate INT NULL,
beta_rate INT NULL,
gamma_rate INT NULL,
delta_rate INT NULL,
omicron_rate INT NULL,
alpha_rate FLOAT NULL,
beta_rate FLOAT NULL,
gamma_rate FLOAT NULL,
delta_rate FLOAT NULL,
omicron_rate FLOAT NULL,
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
-- keeps track of strain data per region
CREATE TABLE Strains_Per_Region(
region_code BIGINT PRIMARY KEY,
date_collected DATETIME2 NOT NULL,
region_code BIGINT,
source_id INT NOT NULL,
alpha_rate INT NULL,
beta_rate INT NULL,
gamma_rate INT NULL,
delta_rate INT NULL,
omicron_rate INT NULL,
alpha_rate FLOAT NULL,
beta_rate FLOAT NULL,
gamma_rate FLOAT NULL,
delta_rate FLOAT NULL,
omicron_rate FLOAT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
-- keeps track of strain data per district
CREATE TABLE Strains_Per_District(
district_code BIGINT PRIMARY KEY,
date_collected DATETIME2 NOT NULL,
district_code BIGINT,
source_id INT NOT NULL,
alpha_rate INT NULL,
beta_rate INT NULL,
gamma_rate INT NULL,
delta_rate INT NULL,
omicron_rate INT NULL,
alpha_rate FLOAT NULL,
beta_rate FLOAT NULL,
gamma_rate FLOAT NULL,
delta_rate FLOAT NULL,
omicron_rate FLOAT NULL,
FOREIGN KEY (district_code) REFERENCES Districts(district_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
......
......@@ -143,6 +143,7 @@ def init_canada():
ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
ca_s = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-epiSummary-variants.csv")
#insert country and region case data
region_dict = {}
......@@ -175,4 +176,89 @@ def init_canada():
else:
sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
conn.commit()
#insert strain data for canada country
ca_strain = {}
for index, row in ca_s.iterrows():
if row["Variant Grouping"] == "VOC":
if row["Collection (week)"] not in ca_strain:
ca_strain[row["Collection (week)"]] = {"Alpha":0 , "Beta": 0, "Gamma" :0, "Delta": 0, "Omicron": 0}
ca_strain[row["Collection (week)"]][row["_Identifier"]] = ca_strain[row["Collection (week)"]][row["_Identifier"]] + row["%CT Count of Sample #"]
for date in ca_strain:
sql = '''INSERT INTO Strains_Per_Country (date_collected, country_code, source_id, alpha_rate, beta_rate, gamma_rate, delta_rate, omicron_rate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'''
c.execute(sql,(date, ca_code, ca_src, ca_strain[date]["Alpha"], ca_strain[date]["Beta"], ca_strain[date]["Gamma"],ca_strain[date]["Delta"], ca_strain[date]["Omicron"]))
conn.commit()
#insert population data for country and region
sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(ca_code, 38436447, datetime.datetime(2022, 2, 27).date()))
conn.commit()
wikiurl="https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table',{'class':"wikitable"})
ca_p = pd.read_html(str(table))
for i in range(0, 2):
ca_p1 = pd.DataFrame(ca_p[i])
for index, row in ca_p1.iterrows():
state = row[0]
state = state.replace("[b]", "")
if "Total" not in state:
sql = '''INSERT INTO Population_Per_Region (region_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(region_dict[state], row[6], datetime.datetime(2021, 8, 20).date()))
conn.commit()
def init_guatemala():
# get country_code for Guatemala
gu_code = get_country_code("Guatemala", c)
#insert and get source id for US data
gu_src_url = "https://tablerocovid.mspas.gob.gt/"
set_source(gu_src_url, c, conn)
gu_src = get_source_id(gu_src_url, c)
v_src = "https://github.com/owid/covid-19-data"
set_source(v_src, c, conn)
v_src = get_source_id(v_src, c)
gu_death = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/fallecidosFF?w=0d14592e")
gu_case = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/confirmadosFER?w=0d14592e")
gu_v = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/country_data/Guatemala.csv")
gu = pd.merge(gu_case, gu_death, on=["departamento", "municipio"])
#insert district case data and population data for guatemala
region_dict = {}
city_dict = {}
for index, row in gu.iterrows():
if index >= 1:
region = row["departamento"]
city = row["municipio"]
if region not in region_dict:
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
c.execute(sql,(region, gu_code))
region_dict[region] = get_region_code(gu_code, region, c)
city_dict[region] = {}
if city not in city_dict[region]:
sql = '''INSERT INTO Districts (district_name, region_code) VALUES (?, ?)'''
c.execute(sql,(city, region_dict[region]))
city_dict[region][city] = get_district_code(region_dict[region], city, c)
for i in range(5, len(row) - 1):
if "_x" in gu.columns[i]:
date1 = gu.columns[i].replace("_x", "")
case = check(row[i])
death = check(row[date1 + "_y"])
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(city_dict[region][city], date1, gu_src, death, case))
sql = '''INSERT INTO Population_Per_District (district_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(city_dict[region][city], check(row["poblacion_x"]), date.today()))
conn.commit()
#insert vaccination country data for guatemala
for index, row in gu_v.iterrows():
sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql, (row["date"], toint(row["people_vaccinated"]), toint(row["people_fully_vaccinated"]), toint(row["total_boosters"]), gu_code, v_src))
conn.commit()
\ No newline at end of file
......@@ -201,6 +201,21 @@ c.execute('''CREATE TABLE Age_Per_District(
''')
c.execute('''CREATE TABLE Strains_Per_Country(
date_collected DATETIME2 NOT NULL,
country_code VARCHAR(3),
source_id BIGINT NOT NULL,
alpha_rate FLOAT NULL,
beta_rate FLOAT NULL,
gamma_rate FLOAT NULL,
delta_rate FLOAT NULL,
omicron_rate FLOAT NULL,
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
FOREIGN KEY (source_id) REFERENCES Sources(source_id)
);
''')
conn.commit()
......@@ -213,3 +228,4 @@ c.close()
from initial_data_scripts.init_europe import init_italy, init_ukraine
from initial_data_scripts.init_asia import init_japan, init_korea, init_ina
from initial_data_scripts.init_global import init_jhu
from initial_data_scripts.init_north_america import init_us, init_canada, init_guatemala
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment