From 5fd271994a9060edc2292e733808567edc55fc4b Mon Sep 17 00:00:00 2001 From: "Fanchong(Ivy) Wang" <fw29@cs.washington.edu> Date: Sun, 6 Mar 2022 04:47:19 -0800 Subject: [PATCH] add data for brazil --- daily_data_scripts/daily_sa.py | 75 ++++++++++++++++++++ initial_data_scripts/init_asia.py | 8 +++ initial_data_scripts/init_south_america.py | 79 ++++++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 daily_data_scripts/daily_sa.py create mode 100644 initial_data_scripts/init_south_america.py diff --git a/daily_data_scripts/daily_sa.py b/daily_data_scripts/daily_sa.py new file mode 100644 index 0000000..5852425 --- /dev/null +++ b/daily_data_scripts/daily_sa.py @@ -0,0 +1,75 @@ +import pandas as pd +import sqlite3 +import sys +import datetime +from datetime import date +import requests + +sys.path.append("..") + +from util import * + +def toint(s): + if pd.isna(s): + s = "NULL" + else: + s = int(s) + return s + +def update_brazil(): + conn = sqlite3.connect('prototype_db') + c = conn.cursor() + + # get country_code for brazil + br_code = get_country_code("Brazil", c) + + #insert and get source id for brazil data + br_src_url = "https://github.com/wcota/covid19br" + br_src = get_source_id(br_src_url, c) + + br = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv") + br_city = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-cities.csv") + + #insert country, state case vaccination data + for index, row in br.iterrows(): + region = row["state"] + case = row["newDeaths"] + death = row["newDeaths"] + recover = row["recovered"] + first = toint(row["vaccinated"]) + second = toint(row["vaccinated_second"]) + third = toint(row["vaccinated_third"]) + if region == "TOTAL": + c.execute('SELECT * FROM Cases_Per_Country WHERE country_code ="' + br_code + '" AND date_collected ="' + str(date1)+ '"') + result = c.fetchall() + if len(result) == 0: + sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(br_code, row["date"], br_src, death, case, recover)) + if (first != "NULL"): + sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(row["date"], first, second, third, br_code, br_src)) + else: + break + else: + region_code = get_region_code(br_code, region, c) + c.execute('SELECT * FROM Cases_Per_Region WHERE region_code ="' + region_code + '" AND date_collected ="' + str(date1)+ '"') + result = c.fetchall() + if len(result) == 0: + sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(region_dict[region], row["date"], br_src, death, case, recover)) + if (first != "NULL"): + sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(row["date"], first, second, third, region_dict[region], br_src)) + else: + break + conn.commit() + + #insert new city case data for brazil + for index, row in br_city.iterrows(): + region = row["state"] + city = row["city"] + region_code = get_region_code(br_code, region, c) + city_dict[region][city] = get_district_code(region_code, city, c) + sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)''' + c.execute(sql,(city_code, row["date"], br_src, row["newDeaths"], row["newCases"])) + conn.commit() \ No newline at end of file diff --git a/initial_data_scripts/init_asia.py b/initial_data_scripts/init_asia.py index 44763e0..24dbd18 100644 --- a/initial_data_scripts/init_asia.py +++ b/initial_data_scripts/init_asia.py @@ -463,6 +463,9 @@ def init_india(): #slow to run def init_china(): + conn = sqlite3.connect('prototype_db') + c = conn.cursor() + # get country_code for china cn_code = get_country_code("China", c) @@ -511,3 +514,8 @@ def init_china(): sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)''' c.execute(sql,(city_dict[region][city], date1, cn_src, row["city_deadCount"], row["city_confirmedCount"], row["city_curedCount"])) conn.commit() + + #insert population data for china + sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)''' + c.execute(sql,(cn_code, 1412600000, datetime.datetime(2021, 5, 1).date())) + conn.commit() diff --git a/initial_data_scripts/init_south_america.py b/initial_data_scripts/init_south_america.py new file mode 100644 index 0000000..333e315 --- /dev/null +++ b/initial_data_scripts/init_south_america.py @@ -0,0 +1,79 @@ +import pandas as pd +import sqlite3 +import sys +import datetime +from datetime import date +import requests + +sys.path.append("..") + +from util import * + +def toint(s): + if pd.isna(s): + s = "NULL" + else: + s = int(s) + return s + +def init_brazil(): + conn = sqlite3.connect('prototype_db') + c = conn.cursor() + + # get country_code for brazil + br_code = get_country_code("Brazil", c) + + #insert and get source id for brazil data + br_src_url = "https://github.com/wcota/covid19br" + set_source(br_src_url, c, conn) + br_src = get_source_id(br_src_url, c) + + br = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv") + br_city = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-cities.csv") + + #insert country, state case vaccination data + region_dict = {} + city_dict = {} + for index, row in br.iterrows(): + region = row["state"] + case = row["newDeaths"] + death = row["newDeaths"] + recover = row["recovered"] + first = toint(row["vaccinated"]) + second = toint(row["vaccinated_second"]) + third = toint(row["vaccinated_third"]) + if region == "TOTAL": + sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(br_code, row["date"], br_src, death, case, recover)) + if (first != "NULL"): + sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(row["date"], first, second, third, br_code, br_src)) + else: + if region not in region_dict: + sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)''' + c.execute(sql,(region, br_code)) + region_dict[region] = get_region_code(br_code, region, c) + city_dict[region] = {} + sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(region_dict[region], row["date"], br_src, death, case, recover)) + if (first != "NULL"): + sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)''' + c.execute(sql,(row["date"], first, second, third, region_dict[region], br_src)) + conn.commit() + + #insert population for brazil + sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)''' + c.execute(sql,(br_code, 210147125, datetime.datetime(2018, 8, 30).date())) + conn.commit() + + #insert city case data for brazil + for index, row in br_city.iterrows(): + region = row["state"] + city = row["city"] + if city not in city_dict[region]: + sql = '''INSERT INTO Districts (district_name, region_code) VALUES (?, ?)''' + c.execute(sql,(city, region_dict[region])) + city_dict[region][city] = get_district_code(region_dict[region], city, c) + sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)''' + c.execute(sql,(city_dict[region][city], row["date"], br_src, row["newDeaths"], row["newCases"])) + conn.commit() \ No newline at end of file -- GitLab