Skip to content
Snippets Groups Projects
Select Git revision
  • c1070c753ffe00e2784ed50e651dd362e9589228
  • main default protected
2 results

daily_na.py

Blame
  • daily_na.py 12.43 KiB
    import pandas as pd
    import sqlite3
    import sys
    import datetime
    import requests
    from datetime import date
    
    sys.path.append("..")
    from util import *
    
    #install html parse tool
    from urllib.request import urlopen
    !pip install beautifulsoup4
    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    
    def toint(s):
        if pd.isna(s):
            s = "NULL"
        else:
            s = int(s)
        return s
    
    #update country and county level case data and vaccination data for country and state
    def update_us():
        conn = sqlite3.connect('prototype_db')
        c = conn.cursor()
        
        
        # get country_code for US
        us_code = get_country_code("United States", c)
        
        #get source id for US data
        us_src_url = "https://github.com/nytimes/covid-19-data"
        us_src = get_source_id(us_src_url, c)
        us_src_v = "https://covid.cdc.gov/covid-data-tracker/#datatracker-home"
        us_src_v = get_source_id(us_src_v, c)
        
        us_country = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us.csv")
        #us_state = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv")
        us_county = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-counties-recent.csv")
        us_sv = pd.read_csv("https://data.cdc.gov/api/views/rh2h-3yt2/rows.csv")
        
        #insert data for US
        us_country = us_country[::-1]
        for index, row in us_country.iterrows():
            date1 = row['date']
            c.execute('SELECT * FROM Cases_Per_Country WHERE country_code ="' + us_code + '" AND date_collected ="' + str(date1)+ '"')
            result = c.fetchall()
            if len(result) == 0:
                sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
                c.execute(sql,(us_code, row["date"], us_src, row["deaths"], row["cases"]))
            else:
                break
        conn.commit()
        
        region_dict = {}
        #get state code for US
        c.execute("SELECT region_code, region_name from Regions Where country_code = 'US'")
        result = c.fetchall()
    
        for i in range(0,len(result)):
            region_dict[result[i][1]] = result[i][0]
        
        #insert county code and data
        
        us_county = us_county[::-1]
        for index, row in us_county.iterrows():
            state = row["state"]
            county = row["county"]
            county_code = get_district_code(region_dict[state], county, c)
            date1 = row['date']
            c.execute('SELECT * FROM Cases_Per_District WHERE district_code=' + str(county_code) + ' AND date_collected ="' + str(date1)+ '"')
            result = c.fetchall()
            if len(result) == 0:
                sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
                c.execute(sql,(county_code, row["date"], us_src, row["deaths"], row["cases"]))
            else:
                break
        conn.commit()
        
        #get abb 
        abb = {}
        wikiurl="https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#cite_note-:0-18"
        table_class="wikitable sortable jquery-tablesorter"
        response=requests.get(wikiurl)
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find_all('table',{'class':"wikitable"})
        us_abb = pd.read_html(str(table))
        for i in range(0, 3):
            us_abb1 = pd.DataFrame(us_abb[i])
            for index, row in us_abb1.iterrows():
                state = row[0]
                state = state.replace("[D]", "")
                state = state.replace("U.S. ", "")
                abb[row[1]] = state
        abb["PW"] = "Palau"
        abb["MH"] = "Marshall Islands"
        abb["FM"] = "Federated States of Micronesia"
        
        #insert vaccination data for country and state
        for index, row in us_sv.iterrows():
            date1 = row['Date']
            c.execute('SELECT * FROM Vaccinations_Per_Country WHERE country_code ="' + us_code + '" AND date_collected ="' + str(date1)+ '"')
            result = c.fetchall()
            if len(result) == 0:
                if row["date_type"] == "Report":
                    if row["Location"] == "US":
                        sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
                        c.execute(sql,(row["Date"], row["Admin_Dose_1_Cumulative"], row["Series_Complete_Cumulative"], row["Booster_Cumulative"], us_code, us_src_v))
                    else:
                        sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
                        c.execute(sql,(row["Date"], row["Admin_Dose_1_Cumulative"], row["Series_Complete_Cumulative"], row["Booster_Cumulative"], region_dict[abb[row["Location"]]], us_src_v))
            else:
                break
        conn.commit()
    
    def update_canada():
        conn = sqlite3.connect('prototype_db')
        c = conn.cursor()
        
        # get country_code for Canada
        ca_code = get_country_code("Canada", c)
        
        #get source id for Canada data
        ca_src_url = "https://health-infobase.canada.ca/covid-19/epidemiological-summary-covid-19-cases.html?redir=1#a8"
        ca_src = get_source_id(ca_src_url, c)
        
        ca_case = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
        ca_v = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/vaccination-coverage-map.csv")
        ca_s = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-epiSummary-variants.csv")
        
        #insert country and region case data
        ca_case = ca_case[::-1]
        for index, row in ca_case.iterrows():
            region = row["prname"]
            case = row["numconf"]
            death = toint(row["numdeaths"])
            recover = toint(row["numrecover"])
            if region == "Canada":
                date1 = row['date']
                c.execute('SELECT * FROM Cases_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
                result = c.fetchall()
                if len(result) == 0:
                    sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
                    c.execute(sql,(ca_code, row["date"], ca_src, death, case, recover))
                else:
                    break
            else:
                region_code = get_region_code(ca_code, region, c)
                date1 = row['date']
                c.execute('SELECT * FROM Cases_Per_Region WHERE region_code=' + str(region_code) + ' AND date_collected ="' + str(date1)+ '"')
                result = c.fetchall()
                if len(result) == 0:
                    sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
                    c.execute(sql,(region_code, row["date"], ca_src, death, case, recover))
                else:
                    break
        conn.commit()
        
        #insert country and region vaccination data
        ca_v = ca_v[::-1]
        for index, row in ca_v.iterrows():
            region = row["prename"]
            first = row["numtotal_atleast1dose"]
            second = toint(row["numtotal_fully"])
            third = toint(row["numtotal_additional"])
            if region == "Canada":
                date1 = row["week_end"]
                c.execute('SELECT * FROM Vaccinations_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
                result = c.fetchall()
                if len(result) == 0:
                    sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
                    c.execute(sql,(row["week_end"], first, second, third, ca_code, ca_src))
                else:
                    break
            else:
                date1 = row["week_end"]
                region_code = get_region_code(ca_code, region, c)
                c.execute('SELECT * FROM Vaccinations_Per_Region WHERE region_code ="' + str(region_code) + '" AND date_collected ="' + str(date1)+ '"')
                result = c.fetchall()
                if len(result) == 0:
                    sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
                    c.execute(sql,(row["week_end"], first, second, third, region_dict[region], ca_src))
                else:
                    break
    
        conn.commit()
        
        ca_strain = {}
        for index, row in ca_s.iterrows():
            if row["Variant Grouping"] == "VOC":
                if row["Collection (week)"] not in ca_strain:
                    ca_strain[row["Collection (week)"]] = {"Alpha":0 , "Beta": 0, "Gamma" :0, "Delta": 0, "Omicron": 0}
                ca_strain[row["Collection (week)"]][row["_Identifier"]] = ca_strain[row["Collection (week)"]][row["_Identifier"]] + row["%CT Count of Sample #"]
        for date1 in ca_strain:
            c.execute('SELECT * FROM Strains_Per_Country WHERE country_code ="' + ca_code + '" AND date_collected ="' + str(date1)+ '"')
            result = c.fetchall()
            if len(result) == 0:
                sql = '''INSERT INTO Strains_Per_Country (date_collected, country_code, source_id, alpha_rate, beta_rate, gamma_rate, delta_rate, omicron_rate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'''
                c.execute(sql,(date, ca_code, ca_src, ca_strain[date]["Alpha"], ca_strain[date]["Beta"], ca_strain[date]["Gamma"],ca_strain[date]["Delta"], ca_strain[date]["Omicron"]))
        conn.commit()
    
    def update_guatemala():
        conn = sqlite3.connect('prototype_db')
        c = conn.cursor()
        
        # get country_code for Guatemala
        gu_code = get_country_code("Guatemala", c)
        
        #get source id for US data
        gu_src_url = "https://tablerocovid.mspas.gob.gt/"
        gu_src = get_source_id(gu_src_url, c)
        
        v_src = "https://github.com/owid/covid-19-data"
        v_src = get_source_id(v_src, c)
        
        gu_death = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/fallecidosFF?w=0d14592e")
        gu_case = pd.read_csv("https://gtmvigilanciacovid.shinyapps.io/1GEAxasgYEyITt3Y2GrQqQFEDKW89fl9/_w_0d14592e/session/1f0e3b3486ac8317dfaad7a0be3f8481/download/confirmadosFER?w=0d14592e")
        gu_v = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/country_data/Guatemala.csv")
        
        
        gu = pd.merge(gu_case, gu_death, on=["departamento", "municipio"])
        #insert district case data and population data for guatemala
        region_dict = {}
        city_dict = {}
        for index, row in gu.iterrows():
            if index >= 1:
                region = row["departamento"]
                city = row["municipio"]
                for i in range(len(row) - 1, 4, -1):
                    if "_y" in gu.columns[i]:
                        date1 = gu.columns[i].replace("_y", "")
                        district_code = get_district_code(get_region_code(region), city, c)
                        c.execute('SELECT * FROM Cases_Per_District WHERE district_code=' + str(district_code) + ' AND date_collected ="' + str(date1)+ '"')
                        result = c.fetchall()
                        if len(result) == 0:
                            case = check(row[date1 + "_x"])
                            death = check(row[row[i]])
                            sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
                            c.execute(sql,(district_code, date1, gu_src, death, case))
                        else:
                            break
        conn.commit()
        
        #insert vaccination country data for guatemala
        gu_v = gu_v[::-1]
        for index, row in gu_v.iterrows():
            date1 = row['date']
            c.execute('SELECT * FROM Vaccinations_Per_Country WHERE country_code ="' + gu_code + '" AND date_collected ="' + str(date1)+ '"')
            result = c.fetchall()
            if len(result) == 0:
                sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
                c.execute(sql, (row["date"], toint(row["people_vaccinated"]), toint(row["people_fully_vaccinated"]), toint(row["total_boosters"]), gu_code, v_src))
            else:
                break
        conn.commit()