add strain data for canada and data for guatemala

e1c84417 · Ivy Wang · 87715d2e · e1c84417 · e1c84417 · e1c84417
Commit e1c84417 authored 3 years ago by Ivy Wang
--- a/daily_data_scripts/daily_na.py
+++ b/daily_data_scripts/daily_na.py
@@ -128,6 +128,7 @@ def update_canada():
    
    ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
    ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
+    ca_s = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-epiSummary-variants.csv")
    
    #insert country and region case data
    ca_case = ca_case[::-1]
@@ -180,4 +181,72 @@ def update_canada():
            if len(result) == 0:
                sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
                c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
+    conn.commit()
+    
+    ca_strain = {}
+    for index, row in ca_s.iterrows():
+        if row["Variant Grouping"] == "VOC":
+            if row["Collection (week)"] not in ca_strain:
+                ca_strain[row["Collection (week)"]] = {"Alpha":0 , "Beta": 0, "Gamma" :0, "Delta": 0, "Omicron": 0}
+            ca_strain[row["Collection (week)"]][row["_Identifier"]] = ca_strain[row["Collection (week)"]][row["_Identifier"]] + row["%CT Count of Sample #"]
+    for date1 in ca_strain:
+        c.execute('SELECT * FROM Strains_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
+        result = c.fetchall()
+        if len(result) == 0:
+            sql = '''INSERT INTO Strains_Per_Country (date_collected, country_code, source_id, alpha_rate, beta_rate, gamma_rate, delta_rate, omicron_rate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'''
+            c.execute(sql,(date, ca_code, ca_src, ca_strain[date]["Alpha"], ca_strain[date]["Beta"], ca_strain[date]["Gamma"],ca_strain[date]["Delta"], ca_strain[date]["Omicron"]))
+    conn.commit()
+
+def update_guatemala():
+    # get country_code for Guatemala
+    gu_code = get_country_code("Guatemala", c)
+    
+    #get source id for US data
+    gu_src_url = "https://tablerocovid.mspas.gob.gt/"
+    gu_src = get_source_id(gu_src_url, c)
+    
+    v_src = "https://github.com/owid/covid-19-data"
+    v_src = get_source_id(v_src, c)
+    
+    #gu_death = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/fallecidosFF?w=0d14592e")
+    #gu_case = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/confirmadosFER?w=0d14592e")
+    gu_v = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/country_data/Guatemala.csv")
+    
+    gu_case = pd.read_csv("Confirmados.csv")
+    gu_death = pd.read_csv("Fallecidos.csv")
+    
+    gu = pd.merge(gu_case, gu_death, on=["departamento", "municipio"])
+    #insert district case data and population data for guatemala
+    region_dict = {}
+    city_dict = {}
+    for index, row in gu.iterrows():
+        if index >= 1:
+            region = row["departamento"]
+            city = row["municipio"]
+            for i in range(len(row) - 1, 4, -1):
+                if "_y" in gu.columns[i]:
+                    date1 = gu.columns[i].replace("_y", "")
+                    district_code = get_district_code(get_region_code(region), city, c)
+                    c.execute('SELECT * FROM Cases_Per_District WHERE district_code=' + str(district_code) + ' AND date_collected ="' + str(date1)+ '"')
+                    result = c.fetchall()
+                    if len(result) == 0:
+                        case = check(row[date1 + "_x"])
+                        death = check(row[row[i]])
+                        sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
+                        c.execute(sql,(district_code, date1, gu_src, death, case))
+                    else:
+                        break
+    conn.commit()
+    
+    #insert vaccination country data for guatemala
+    gu_v = gu_v[::-1]
+    for index, row in gu_v.iterrows():
+        date1 = row['date']
+        c.execute('SELECT * FROM Vaccinations_Per_Country WHERE country_code ="' + gu_code + '" AND date_collected ="' + str(date1)+ '"')
+        result = c.fetchall()
+        if len(result) == 0:
+            sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
+            c.execute(sql, (row["date"], toint(row["people_vaccinated"]), toint(row["people_fully_vaccinated"]), toint(row["total_boosters"]), gu_code, v_src))
+        else:
+            break
    conn.commit()
\ No newline at end of file
--- a/data/OverallSchema.sql
+++ b/data/OverallSchema.sql
@@ -113,39 +113,42 @@ CREATE TABLE Vaccinations_Per_District(

 -- keeps track of strain data per country
 CREATE TABLE Strains_Per_Country(
-    country_code VARCHAR(3) PRIMARY KEY,
+    date_collected DATETIME2 NOT NULL,
+    country_code VARCHAR(3),
    source_id BIGINT NOT NULL,
-    alpha_rate INT NULL,
-    beta_rate INT NULL,
-    gamma_rate INT NULL,
-    delta_rate INT NULL,
-    omicron_rate INT NULL,
+    alpha_rate FLOAT NULL,
+    beta_rate FLOAT NULL,
+    gamma_rate FLOAT NULL,
+    delta_rate FLOAT NULL,
+    omicron_rate FLOAT NULL,
    FOREIGN KEY (country_code) REFERENCES Countries(country_code),
    FOREIGN KEY (source_id) REFERENCES Sources(source_id)
-);
+

 -- keeps track of strain data per region
 CREATE TABLE Strains_Per_Region(
-    region_code BIGINT PRIMARY KEY,
+    date_collected DATETIME2 NOT NULL,
+    region_code BIGINT,
    source_id INT NOT NULL,
-    alpha_rate INT NULL,
-    beta_rate INT NULL,
-    gamma_rate INT NULL,
-    delta_rate INT NULL,
-    omicron_rate INT NULL,
+    alpha_rate FLOAT NULL,
+    beta_rate FLOAT NULL,
+    gamma_rate FLOAT NULL,
+    delta_rate FLOAT NULL,
+    omicron_rate FLOAT NULL,
    FOREIGN KEY (region_code) REFERENCES Regions(region_code),
    FOREIGN KEY (source_id) REFERENCES Sources(source_id)
 );

 -- keeps track of strain data per district
 CREATE TABLE Strains_Per_District(
-    district_code BIGINT PRIMARY KEY,
+    date_collected DATETIME2 NOT NULL,
+    district_code BIGINT,
    source_id INT NOT NULL,
-    alpha_rate INT NULL,
-    beta_rate INT NULL,
-    gamma_rate INT NULL,
-    delta_rate INT NULL,
-    omicron_rate INT NULL,
+    alpha_rate FLOAT NULL,
+    beta_rate FLOAT NULL,
+    gamma_rate FLOAT NULL,
+    delta_rate FLOAT NULL,
+    omicron_rate FLOAT NULL,
    FOREIGN KEY (district_code) REFERENCES Districts(district_code),
    FOREIGN KEY (source_id) REFERENCES Sources(source_id)
 );

--- a/initial_data_scripts/init_north_america.py
+++ b/initial_data_scripts/init_north_america.py
@@ -143,6 +143,7 @@ def init_canada():
    
    ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
    ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
+    ca_s = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-epiSummary-variants.csv")
    
    #insert country and region case data
    region_dict = {}
@@ -175,4 +176,89 @@ def init_canada():
        else:
            sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
            c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
+    conn.commit()
+    
+    #insert strain data for canada country 
+    ca_strain = {}
+    for index, row in ca_s.iterrows():
+        if row["Variant Grouping"] == "VOC":
+            if row["Collection (week)"] not in ca_strain:
+                ca_strain[row["Collection (week)"]] = {"Alpha":0 , "Beta": 0, "Gamma" :0, "Delta": 0, "Omicron": 0}
+            ca_strain[row["Collection (week)"]][row["_Identifier"]] = ca_strain[row["Collection (week)"]][row["_Identifier"]] + row["%CT Count of Sample #"]   
+    for date in ca_strain:
+        sql = '''INSERT INTO Strains_Per_Country (date_collected, country_code, source_id, alpha_rate, beta_rate, gamma_rate, delta_rate, omicron_rate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'''
+        c.execute(sql,(date, ca_code, ca_src, ca_strain[date]["Alpha"], ca_strain[date]["Beta"], ca_strain[date]["Gamma"],ca_strain[date]["Delta"], ca_strain[date]["Omicron"]))
+    conn.commit()
+    
+    #insert population data for country and region
+    sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
+    c.execute(sql,(ca_code, 38436447, datetime.datetime(2022, 2, 27).date()))
+    conn.commit()
+    
+    wikiurl="https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada"
+    table_class="wikitable sortable jquery-tablesorter"
+    response=requests.get(wikiurl)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    table = soup.find_all('table',{'class':"wikitable"})
+    ca_p = pd.read_html(str(table))
+    for i in range(0, 2):
+        ca_p1 = pd.DataFrame(ca_p[i])
+        for index, row in ca_p1.iterrows():
+            state = row[0]
+            state = state.replace("[b]", "")
+            if "Total" not in state:
+                sql = '''INSERT INTO Population_Per_Region (region_code, population_amount, date_collected) VALUES (?, ?, ?)'''
+                c.execute(sql,(region_dict[state], row[6], datetime.datetime(2021, 8, 20).date()))
+    conn.commit()
+
+def init_guatemala():
+    # get country_code for Guatemala
+    gu_code = get_country_code("Guatemala", c)
+    
+    #insert and get source id for US data
+    gu_src_url = "https://tablerocovid.mspas.gob.gt/"
+    set_source(gu_src_url, c, conn)
+    gu_src = get_source_id(gu_src_url, c)
+    
+    v_src = "https://github.com/owid/covid-19-data"
+    set_source(v_src, c, conn)
+    v_src = get_source_id(v_src, c)
+    
+    gu_death = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/fallecidosFF?w=0d14592e")
+    gu_case = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/confirmadosFER?w=0d14592e")
+    gu_v = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/country_data/Guatemala.csv")
+    gu = pd.merge(gu_case, gu_death, on=["departamento", "municipio"])
+    
+
+    #insert district case data and population data for guatemala
+    region_dict = {}
+    city_dict = {}
+    for index, row in gu.iterrows():
+        if index >= 1:
+            region = row["departamento"]
+            city = row["municipio"]
+            if region not in region_dict:
+                sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
+                c.execute(sql,(region, gu_code))
+                region_dict[region] = get_region_code(gu_code, region, c)
+                city_dict[region] = {}
+            if city not in city_dict[region]:
+                sql = '''INSERT INTO Districts (district_name, region_code) VALUES (?, ?)'''
+                c.execute(sql,(city, region_dict[region]))
+                city_dict[region][city] = get_district_code(region_dict[region], city, c)
+            for i in range(5, len(row) - 1):
+                if "_x" in gu.columns[i]:
+                    date1 = gu.columns[i].replace("_x", "")
+                    case = check(row[i])
+                    death = check(row[date1 + "_y"])
+                    sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
+                    c.execute(sql,(city_dict[region][city], date1, gu_src, death, case))
+            sql = '''INSERT INTO Population_Per_District (district_code, population_amount, date_collected) VALUES (?, ?, ?)'''
+            c.execute(sql,(city_dict[region][city], check(row["poblacion_x"]), date.today()))
+    conn.commit()
+    
+    #insert vaccination country data for guatemala
+    for index, row in gu_v.iterrows():
+        sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
+        c.execute(sql, (row["date"], toint(row["people_vaccinated"]), toint(row["people_fully_vaccinated"]), toint(row["total_boosters"]), gu_code, v_src))
    conn.commit()
\ No newline at end of file
--- a/prototype_main_backend.py
+++ b/prototype_main_backend.py
@@ -201,6 +201,21 @@ c.execute('''CREATE TABLE Age_Per_District(

        ''')

+c.execute('''CREATE TABLE Strains_Per_Country(
+    date_collected DATETIME2 NOT NULL,
+    country_code VARCHAR(3),
+    source_id BIGINT NOT NULL,
+    alpha_rate FLOAT NULL,
+    beta_rate FLOAT NULL,
+    gamma_rate FLOAT NULL,
+    delta_rate FLOAT NULL,
+    omicron_rate FLOAT NULL,
+    FOREIGN KEY (country_code) REFERENCES Countries(country_code),
+    FOREIGN KEY (source_id) REFERENCES Sources(source_id)
+);
+
+        ''')
+
                  
 conn.commit()

@@ -213,3 +228,4 @@ c.close()
 from initial_data_scripts.init_europe import init_italy, init_ukraine
 from initial_data_scripts.init_asia import init_japan, init_korea, init_ina
 from initial_data_scripts.init_global import init_jhu
+from initial_data_scripts.init_north_america import init_us, init_canada, init_guatemala