From 5fd271994a9060edc2292e733808567edc55fc4b Mon Sep 17 00:00:00 2001
From: "Fanchong(Ivy) Wang" <fw29@cs.washington.edu>
Date: Sun, 6 Mar 2022 04:47:19 -0800
Subject: [PATCH] add data for brazil

---
 daily_data_scripts/daily_sa.py             | 75 ++++++++++++++++++++
 initial_data_scripts/init_asia.py          |  8 +++
 initial_data_scripts/init_south_america.py | 79 ++++++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 daily_data_scripts/daily_sa.py
 create mode 100644 initial_data_scripts/init_south_america.py

diff --git a/daily_data_scripts/daily_sa.py b/daily_data_scripts/daily_sa.py
new file mode 100644
index 0000000..5852425
--- /dev/null
+++ b/daily_data_scripts/daily_sa.py
@@ -0,0 +1,75 @@
+import pandas as pd
+import sqlite3
+import sys
+import datetime
+from datetime import date
+import requests
+
+sys.path.append("..")
+
+from util import *
+
+def toint(s):
+    if pd.isna(s):
+        s = "NULL"
+    else:
+        s = int(s)
+    return s
+
+def update_brazil():
+    conn = sqlite3.connect('prototype_db')
+    c = conn.cursor()
+    
+    # get country_code for brazil
+    br_code = get_country_code("Brazil", c)
+    
+    #insert and get source id for brazil data
+    br_src_url = "https://github.com/wcota/covid19br"
+    br_src = get_source_id(br_src_url, c)
+    
+    br = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv")
+    br_city = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-cities.csv")
+    
+    #insert country, state case  vaccination data 
+    for index, row in br.iterrows():
+        region = row["state"]
+        case = row["newDeaths"]
+        death = row["newDeaths"]
+        recover = row["recovered"]
+        first = toint(row["vaccinated"])
+        second = toint(row["vaccinated_second"])
+        third = toint(row["vaccinated_third"])
+        if region == "TOTAL":
+            c.execute('SELECT * FROM Cases_Per_Country WHERE country_code ="' + br_code + '" AND date_collected ="' + str(date1)+ '"')
+            result = c.fetchall()
+            if len(result) == 0:
+                sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
+                c.execute(sql,(br_code, row["date"], br_src, death, case, recover))
+                if (first != "NULL"):
+                    sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
+                    c.execute(sql,(row["date"], first, second, third, br_code, br_src))
+            else:
+                break
+        else:
+            region_code = get_region_code(br_code, region, c)
+            c.execute('SELECT * FROM Cases_Per_Region WHERE region_code ="' + region_code + '" AND date_collected ="' + str(date1)+ '"')
+            result = c.fetchall()
+            if len(result) == 0: 
+                sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
+                c.execute(sql,(region_dict[region], row["date"], br_src, death, case, recover))
+                if (first != "NULL"):
+                    sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
+                    c.execute(sql,(row["date"], first, second, third, region_dict[region], br_src))
+            else:
+                break
+    conn.commit()
+    
+    #insert new city case data for brazil
+    for index, row in br_city.iterrows():
+        region = row["state"]
+        city = row["city"]
+        region_code = get_region_code(br_code, region, c)
+        city_dict[region][city] = get_district_code(region_code, city, c)
+        sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
+        c.execute(sql,(city_code, row["date"], br_src, row["newDeaths"], row["newCases"]))
+    conn.commit()
\ No newline at end of file
diff --git a/initial_data_scripts/init_asia.py b/initial_data_scripts/init_asia.py
index 44763e0..24dbd18 100644
--- a/initial_data_scripts/init_asia.py
+++ b/initial_data_scripts/init_asia.py
@@ -463,6 +463,9 @@ def init_india():
 
 #slow to run
 def init_china():
+    conn = sqlite3.connect('prototype_db')
+    c = conn.cursor()
+    
     # get country_code for china
     cn_code = get_country_code("China", c)
     
@@ -511,3 +514,8 @@ def init_china():
                     sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
                     c.execute(sql,(city_dict[region][city], date1, cn_src, row["city_deadCount"], row["city_confirmedCount"], row["city_curedCount"]))
     conn.commit()
+
+    #insert population data for china
+    sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
+    c.execute(sql,(cn_code,  1412600000, datetime.datetime(2021, 5, 1).date()))
+    conn.commit()
diff --git a/initial_data_scripts/init_south_america.py b/initial_data_scripts/init_south_america.py
new file mode 100644
index 0000000..333e315
--- /dev/null
+++ b/initial_data_scripts/init_south_america.py
@@ -0,0 +1,79 @@
+import pandas as pd
+import sqlite3
+import sys
+import datetime
+from datetime import date
+import requests
+
+sys.path.append("..")
+
+from util import *
+
+def toint(s):
+    if pd.isna(s):
+        s = "NULL"
+    else:
+        s = int(s)
+    return s
+
+def init_brazil():
+    conn = sqlite3.connect('prototype_db')
+    c = conn.cursor()
+    
+    # get country_code for brazil
+    br_code = get_country_code("Brazil", c)
+    
+    #insert and get source id for brazil data
+    br_src_url = "https://github.com/wcota/covid19br"
+    set_source(br_src_url, c, conn)
+    br_src = get_source_id(br_src_url, c)
+    
+    br = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv")
+    br_city = pd.read_csv("https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-cities.csv")
+    
+    #insert country, state case  vaccination data 
+    region_dict = {}
+    city_dict = {}
+    for index, row in br.iterrows():
+        region = row["state"]
+        case = row["newDeaths"]
+        death = row["newDeaths"]
+        recover = row["recovered"]
+        first = toint(row["vaccinated"])
+        second = toint(row["vaccinated_second"])
+        third = toint(row["vaccinated_third"])
+        if region == "TOTAL":
+            sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
+            c.execute(sql,(br_code, row["date"], br_src, death, case, recover))
+            if (first != "NULL"):
+                sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
+                c.execute(sql,(row["date"], first, second, third, br_code, br_src))
+        else:
+            if region not in region_dict:
+                sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
+                c.execute(sql,(region, br_code))
+                region_dict[region] = get_region_code(br_code, region, c)
+                city_dict[region] = {} 
+            sql = '''INSERT INTO Cases_Per_Region (region_code, date_collected, source_id, death_numbers, case_numbers, recovery_numbers) VALUES (?, ?, ?, ?, ?, ?)'''
+            c.execute(sql,(region_dict[region], row["date"], br_src, death, case, recover))
+            if (first != "NULL"):
+                sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number,  third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
+                c.execute(sql,(row["date"], first, second, third, region_dict[region], br_src))
+    conn.commit()
+    
+    #insert population for brazil
+    sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
+    c.execute(sql,(br_code, 210147125, datetime.datetime(2018, 8, 30).date()))
+    conn.commit()
+    
+    #insert city case data for brazil
+    for index, row in br_city.iterrows():
+        region = row["state"]
+        city = row["city"]
+        if city not in city_dict[region]:
+            sql = '''INSERT INTO Districts (district_name, region_code) VALUES (?, ?)'''
+            c.execute(sql,(city, region_dict[region]))
+            city_dict[region][city] = get_district_code(region_dict[region], city, c)
+        sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
+        c.execute(sql,(city_dict[region][city], row["date"], br_src, row["newDeaths"], row["newCases"]))
+    conn.commit()
\ No newline at end of file
-- 
GitLab