Skip to content
Snippets Groups Projects
Commit 42059c6d authored by Ivy Wang's avatar Ivy Wang
Browse files

fix small bugs for japan and us

parent 3360c8de
No related branches found
No related tags found
No related merge requests found
......@@ -196,28 +196,39 @@ def update_japan():
age_group = row
break
japan_age = japan_age[::-1]
japan_age.head(20)
for index,row in japan_age.iterrows():
d = row[0].find("~")
date1 = row[0][d + 1:]
date1 = datetime.datetime.strptime(row[0][d + 1:], "%Y/%m/%d").date()
c.execute('SELECT * FROM Age_Per_Country WHERE country_id ="' + japan_code + '" AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
for i in range(0, len(cities)):
if cities[i].find("Unnamed") == -1:
if cities[i] == "ALL":
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, japan_code, japan_src1, age, case))
else:
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
if pd.isna(case) or case == "*":
case = null
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, region_dict[cities[i]], japan_src1, age, case))
date1 = datetime.datetime.strptime(row[0][:d], "%Y/%m/%d").date()
date2 = datetime.datetime.strptime(row[0][d + 1:], "%Y/%m/%d").date()
while date1 != date2 + datetime.timedelta(days=1):
for i in range(0, len(cities)):
if cities[i].find("Unnamed") == -1:
if cities[i] == "ALL":
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
if pd.isna(case) or case == "*":
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, japan_code, japan_src1, age, case))
else:
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
if pd.isna(case) or case == "*":
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, region_dict[cities[i]], japan_src1, age, case))
date1 = date1 + datetime.timedelta(days=1)
else:
break
conn.commit()
......
......@@ -4,7 +4,6 @@ import sys
import datetime
import requests
from datetime import date
from datetime import datetime
sys.path.append("..")
from util import *
......@@ -32,7 +31,6 @@ def update_us():
us_country = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us.csv")
#us_state = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv")
#just use recent data for counties otherwise too large(can change later)
us_county = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-counties-recent.csv")
us_sv = pd.read_csv("https://data.cdc.gov/api/views/rh2h-3yt2/rows.csv")
......@@ -58,15 +56,14 @@ def update_us():
region_dict[result[i][1]] = result[i][0]
#insert county code and data
county_dict = {}
us_county = us_county[::-1]
for index, row in us_county.iterrows():
state = row["state"]
county = row["county"]
print(region_dict[state], county)
county_code = get_district_code(region_dict[state], county, c)
date1 = row['date']
c.execute('SELECT * FROM Cases_Per_District WHERE district_code' + str(county_code) + 'AND date_collected ="' + str(date1)+ '"')
c.execute('SELECT * FROM Cases_Per_District WHERE district_code=' + str(county_code) + ' AND date_collected ="' + str(date1)+ '"')
result = c.fetchall()
if len(result) == 0:
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
......
......@@ -59,7 +59,7 @@ def init_japan():
conn.commit()
#get region_code for Japan city
c.execute("SELECT region_code, region_name from Regions Where country_code = 'JP' ")
c.execute("SELECT region_code, region_name from Regions Where country_code = 'JP'")
result = c.fetchall()
japan_region = []
region_dict = {}
......@@ -81,32 +81,41 @@ def init_japan():
age_group = row
break
for index,row in japan_age.iterrows():
d = row[0].find("~")
date1 = row[0][d + 1:]
if index >= 1:
for i in range(0, len(cities)):
if cities[i].find("Unnamed") == -1:
if cities[i] == "ALL":
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, japan_code, japan_src1, age, case))
else:
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
if pd.isna(case) or case == "*":
case = null
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, region_dict[cities[i]], japan_src1, age, case))
d = row[0].find("~")
date1 = datetime.datetime.strptime(row[0][:d], "%Y/%m/%d").date()
date2 = datetime.datetime.strptime(row[0][d + 1:], "%Y/%m/%d").date()
while date1 != date2 + datetime.timedelta(days=1):
for i in range(0, len(cities)):
if cities[i].find("Unnamed") == -1:
if cities[i] == "ALL":
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
if pd.isna(case) or case == "*":
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Country (date_collected, country_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, japan_code, japan_src1, age, case))
else:
for j in range(0, 20):
age = age_group[i + j]
case = row[i + j]
if pd.isna(case) or case == "*":
case = null
else:
case = round(int(row[i + j]) / 7)
sql = '''INSERT INTO Age_Per_Region (date_collected, region_id, source_id, age_group, case_number) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(date1, region_dict[cities[i]], japan_src1, age, case))
date1 = date1 + datetime.timedelta(days=1)
conn.commit()
#get Japan vaccianation data(include population data)
japan_vs = pd.ExcelFile("https://www.kantei.go.jp/jp/content/kenbetsu-vaccination_data2.xlsx")
sheets = japan_vs.sheet_names
japan_v = pd.read_excel(japan_vs, sheets[2])
#insert vaccianation data and population data for Japan
for index, row in japan_v.iterrows():
......@@ -117,7 +126,7 @@ def init_japan():
sql = '''INSERT INTO Vaccinations_Per_Country (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, country_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(date.today(), rate1, rate2, rate3, japan_code, japan_src2))
sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(japan_code, row[12], date.today()))
c.execute(sql,(japan_code, row[len(row) - 1], date.today()))
break
conn.commit()
......@@ -132,7 +141,7 @@ def init_japan():
sql = '''INSERT INTO Vaccinations_Per_Region (date_collected, first_vaccination_number, second_vaccination_number, third_vaccination_number, region_code, source_id) VALUES (?, ?, ?, ?, ?, ?)'''
c.execute(sql,(date.today(), rate1, rate2, rate3, region_dict[city], japan_src2))
sql = '''INSERT INTO Population_Per_Region (region_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(region_dict[city], row[12], date.today()))
c.execute(sql,(region_dict[city], row[len(row) - 1], date.today()))
conn.commit()
c.close()
......
......@@ -5,7 +5,6 @@ import sys
import datetime
from datetime import date
import requests
from datetime import datetime
sys.path.append("..")
from util import *
......@@ -34,8 +33,7 @@ def init_us():
us_src_v = get_source_id(us_src_v, c)
us_country = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us.csv")
#us_state = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv")
#just use recent data for counties otherwise too large(can change later)
us_state = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv")
us_county = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-counties-recent.csv")
us_sv = pd.read_csv("https://data.cdc.gov/api/views/rh2h-3yt2/rows.csv")
......@@ -45,6 +43,7 @@ def init_us():
c.execute(sql,(us_code, row["date"], us_src, row["deaths"], row["cases"]))
conn.commit()
county_dict = {}
region_dict = {}
#get state code for US
c.execute("SELECT region_code, region_name from Regions Where country_code = 'US'")
......@@ -52,9 +51,10 @@ def init_us():
for i in range(0,len(result)):
region_dict[result[i][1]] = result[i][0]
county_dict[result[i][1]] = {}
#insert county code and data
county_dict = {}
for index, row in us_county.iterrows():
state = row["state"]
county = row["county"]
......@@ -62,13 +62,15 @@ def init_us():
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
c.execute(sql,(state, us_code))
region_dict[state] = get_region_code(us_code, state, c)
if county not in county_dict:
county_dict[state] = {}
if county not in county_dict[state]:
sql = '''INSERT INTO Districts (district_name, region_code) VALUES (?, ?)'''
c.execute(sql,(county, region_dict[state]))
county_dict[county] = get_district_code(region_dict[state], county, c)
county_dict[state][county] = get_district_code(region_dict[state], county, c)
sql = '''INSERT INTO Cases_Per_District (district_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(county_dict[county], row["date"], us_src, row["deaths"], row["cases"]))
c.execute(sql,(county_dict[state][county], row["date"], us_src, row["deaths"], row["cases"]))
conn.commit()
print(county_dict)
#get and insert population data
abb = {}
......@@ -90,7 +92,7 @@ def init_us():
c.execute(sql,(state, us_code))
region_dict[state] = get_region_code(us_code, state, c)
sql = '''INSERT INTO Population_Per_Region (region_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(region_dict[state], row[5], datetime(2020, 4, 1).date()))
c.execute(sql,(region_dict[state], row[5], datetime.datetime(2020, 4, 1).date()))
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
c.execute(sql,("Palau", us_code))
region_dict["Palau"] = get_region_code(us_code, "Palau", c)
......@@ -106,7 +108,7 @@ def init_us():
conn.commit()
sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(us_code, 334735155, datetime(2020, 4, 1).date()))
c.execute(sql,(us_code, 334735155, datetime.datetime(2020, 4, 1).date()))
conn.commit()
#insert vaccination data for country and state
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment