Skip to content
Snippets Groups Projects
Commit 650f1390 authored by Ivy Wang's avatar Ivy Wang
Browse files

organize init_japan and add code for init_korea

parent c7a489db
No related branches found
No related tags found
No related merge requests found
File added
......@@ -39,7 +39,7 @@ CREATE TABLE Sources(
-- information on cases, recovery numbers, and deaths
-- per countries
CREATE TABLE Cases_Per_Country(
country_code VARCHAR(2) PRIMARY KEY,
country_code VARCHAR(2),
date_collected DATETIME2 NOT NULL,
source_id BIGINT NOT NULL,
death_numbers INT NULL,
......@@ -53,7 +53,7 @@ CREATE TABLE Cases_Per_Country(
-- information on cases, recovery numbers, and deaths
-- per region
CREATE TABLE Cases_Per_Region(
region_code BIGINT PRIMARY KEY,
region_code BIGINT,
date_collected DATETIME2 NOT NULL,
source_id BIGINT NOT NULL,
death_numbers INT NULL,
......@@ -67,7 +67,7 @@ CREATE TABLE Cases_Per_Region(
-- information on cases, recovery numbers, and deaths
-- per district
CREATE TABLE Cases_Per_District(
district_code BIGINT PRIMARY KEY,
district_code BIGINT,
date_collected DATETIME2 NOT NULL,
source_id BIGINT NOT NULL,
death_numbers INT NULL,
......@@ -80,7 +80,9 @@ CREATE TABLE Cases_Per_District(
-- keeps track of vaccinations per Country
CREATE TABLE Vaccinations_Per_Country(
vaccination_rate INT NOT NULL,
first_vaccination_rate FLOAT NOT NULL,
second_vaccination_rate FLOAT NOT NULL,
third_vaccination_rate FLOAT NOT NULL,
country_code VARCHAR(2) PRIMARY KEY,
source_id BIGINT NOT NULL,
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
......@@ -89,7 +91,9 @@ CREATE TABLE Vaccinations_Per_Country(
-- keeps track of vaccinations per Region
CREATE TABLE Vaccinations_Per_Region(
vaccination_rate INT NOT NULL,
first_vaccination_rate FLOAT NOT NULL,
second_vaccination_rate FLOAT NOT NULL,
third_vaccination_rate FLOAT NOT NULL,
region_code BIGINT PRIMARY KEY,
source_id BIGINT NOT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code),
......@@ -98,7 +102,9 @@ CREATE TABLE Vaccinations_Per_Region(
-- keeps track of vaccinations per District
CREATE TABLE Vaccinations_Per_District(
vaccination_rate INT NOT NULL,
first_vaccination_rate FLOAT NOT NULL,
second_vaccination_rate FLOAT NOT NULL,
third_vaccination_rate FLOAT NOT NULL,
district_code BIGINT PRIMARY KEY,
source_id BIGINT NOT NULL,
FOREIGN KEY (district_code) REFERENCES Districts(district_code),
......
import pandas as pd
import sqlite3
import sys
sys.path.append("..")
from util import *
#install translator
!pip install google_trans_new
from google_trans_new import google_translator
translator = google_translator()
def init_japan():
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
# get country_code for Japan
japan_code = get_country_code("Japan", c)
#insert and get source id for Japan data
japan_src1_url = "https://covid19.mhlw.go.jp/en/"
set_source(japan_src1_url, c, conn)
japan_src2_url = "https://www.kantei.go.jp/jp/headline/kansensho/vaccine.html"
set_source(japan_src2_url, c, conn)
japan_src1 = get_source_id(japan_src1_url, c)
japan_src2 = get_source_id(japan_src2_url, c)
#get newly confirmed data and death data for Japan
japan = pd.read_csv("https://covid19.mhlw.go.jp/public/opendata/newly_confirmed_cases_daily.csv")
japan_death = pd.read_csv("https://covid19.mhlw.go.jp/public/opendata/number_of_deaths_daily.csv")
japan_all = pd.merge(japan, japan_death, on=["Date"])
#insert regions tables
for col in japan.columns:
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
if col != "Date" and col != "ALL":
c.execute(sql,(col, japan_code))
conn.commit()
#insert daily data for Japan
for row in japan_all.itertuples(index=True, name='Pandas'):
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(japan_code, row.Date, japan_src1, row.ALL_y, row.ALL_x))
conn.commit()
#get region_code for Japan city
c.execute("SELECT region_code, region_name from Regions")
result = c.fetchall()
japan_region = []
region_dict = {}
for i in range(0,len(result)):
japan_region.append([result[i][0], result[i][1] + "_x", result[i][1] + "_y"])
region_dict[result[i][1]] = result[i][0]
#insert region daily case data
for index, row in japan_all.iterrows():
sql = '''INSERT INTO Cases_Per_Region(region_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
for city in japan_region:
c.execute(sql,(city[0], row['Date'], japan_src1, row[city[2]], row[city[1]]))
conn.commit()
#get Japan vaccianation data(include population data)
japan_vs = pd.ExcelFile("https://www.kantei.go.jp/jp/content/kenbetsu-vaccination_data2.xlsx")
sheets = japan_vs.sheet_names
japan_v = pd.read_excel(japan_vs, sheets[2])
#insert vaccianation data and population data for Japan
from datetime import date
for index, row in japan_v.iterrows():
if index == 5:
rate = row[3]
sql = '''INSERT INTO Vaccinations_Per_Country (vaccination_rate, country_code, source_id) VALUES (?, ?, ?)'''
c.execute(sql,(rate, japan_code, japan_src2))
sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(japan_code, row[12], date.today()))
break
conn.commit()
#insert vaccianation data and population data for cities of Japan
for index, row in japan_v.iterrows():
if index >=6 and index <= 52:
city = translator.translate(row[0])
city = city.split()[1]
rate = row[3]
sql = '''INSERT INTO Vaccinations_Per_Region (vaccination_rate, region_code, source_id) VALUES (?, ?, ?)'''
c.execute(sql,(rate, region_dict[city], japan_src2))
sql = '''INSERT INTO Population_Per_Region (region_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(region_dict[city], row[12], date.today()))
conn.commit()
conn.close()
def init_korea():
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
# get country_code for Korea
korea_code = get_country_code("Korea, Republic of", c)
#insert and get source id for Japan data
korea_src_url = "http://ncov.mohw.go.kr/index.jsp"
set_source(korea_src_url, c, conn)
korea_src = get_source_id(korea_src_url, c)
#get korea data
korea = pd.ExcelFile("http://ncov.mohw.go.kr/upload/ncov/file/202202/1645425583350_20220221153943.xlsx")
sheets = korea.sheet_names
korea_case = pd.read_excel(korea, sheets[3])
korea_death = pd.read_excel(korea, sheets[0])
korea_death = korea_death.replace("-", 0)
korea_case = korea_case.replace("-", 0)
korea = pd.merge(korea_case, korea_death, on=["Unnamed: 0"])
#insert region table
index_region = {}
region_dict = {}
for index, row in korea_case.iterrows():
if index == 3:
for i in range(2, len(row) - 1):
city = translator.translate(row[i])
if city == "game ":
city = "Gyeonggi"
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
c.execute(sql,(city, korea_code))
city_code = get_region_code(korea_code, city, c)
index_region[i] = (city, city_code)
region_dict[city] = city_code
conn.commit()
#insert data for korea and region of it
for index, row in korea.iterrows():
if index >= 11:
date = row[0]
cases = row[1]
death = row["Unnamed: 4_y"]
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(korea_code, date, korea_src, death, cases))
sql = '''INSERT INTO Cases_Per_Region(region_code, date_collected, source_id, case_numbers) VALUES (?, ?, ?, ?)'''
for i in range(2, 18):
city = index_region[i][0]
city_code = index_region[i][1]
c.execute(sql,(city_code, date, korea_src, row[i]))
conn.commit()
conn.close()
\ No newline at end of file
No preview for this file type
import sqlite3
import pandas as pd
conn = sqlite3.connect('prototype_db')
c = conn.cursor()
# get country_code for Japan
c.execute("SELECT country_code from Countries where country_name = 'Japan'")
result = c.fetchall()
japan_code = result[0][0]
print(result[0][0])
# insert and get source id for japan source1
c.execute('''INSERT INTO Sources (source_information)
VALUES('https://covid19.mhlw.go.jp/en/');''')
conn.commit()
c.execute("SELECT source_id from Sources where source_information = 'https://covid19.mhlw.go.jp/en/'")
result = c.fetchall()
japan_source1 = result[0][0]
#get newly confirmed data and death data for Japan
japan = pd.read_csv("https://covid19.mhlw.go.jp/public/opendata/newly_confirmed_cases_daily.csv")
japan_death = pd.read_csv("https://covid19.mhlw.go.jp/public/opendata/number_of_deaths_daily.csv")
japan_all = pd.merge(japan, japan_death, on=["Date"])
#insert regions tables
for col in japan.columns:
sql = '''INSERT INTO Regions (region_name, country_code) VALUES (?, ?)'''
if col != "Date" and col != "ALL":
c.execute(sql,(col, japan_code))
conn.commit()
#insert daily data for Japan
for row in japan_all.itertuples(index=True, name='Pandas'):
sql = '''INSERT INTO Cases_Per_Country (country_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
c.execute(sql,(japan_code, row.Date, japan_source1, row.ALL_y, row.ALL_x))
conn.commit()
#get region_code for Japan city
c.execute("SELECT region_code, region_name from Regions")
result = c.fetchall()
japan_region = []
region_dict = {}
for i in range(0,len(result)):
japan_region.append([result[i][0], result[i][1] + "_x", result[i][1] + "_y"])
region_dict[result[i][1]] = result[i][0]
#insert region daily case data
for index, row in japan_all.iterrows():
sql = '''INSERT INTO Cases_Per_Region(region_code, date_collected, source_id, death_numbers, case_numbers) VALUES (?, ?, ?, ?, ?)'''
for city in japan_region:
c.execute(sql,(city[0], row['Date'], japan_source1, row[city[2]], row[city[1]]))
conn.commit()
#insert and get source id for Japan vaccianation data
c.execute('''INSERT INTO Sources (source_information)
VALUES('https://www.kantei.go.jp/jp/headline/kansensho/vaccine.html');''')
conn.commit()
c.execute("SELECT source_id from Sources where source_information = 'https://www.kantei.go.jp/jp/headline/kansensho/vaccine.html'")
result = c.fetchall()
japan_source2 = result[0][0]
#install translator
pip install google_trans_new
from google_trans_new import google_translator
translator = google_translator()
#get Japan vaccianation data(include population data)
japan_vs = pd.ExcelFile("https://www.kantei.go.jp/jp/content/kenbetsu-vaccination_data2.xlsx")
sheets = japan_vs.sheet_names
japan_v = pd.read_excel(japan_vs, sheets[2])
#insert vaccianation data and population data for Japan
from datetime import date
for index, row in japan_v.iterrows():
if index == 5:
rate = row[3]
sql = '''INSERT INTO Vaccinations_Per_Country (vaccination_rate, country_code, source_id) VALUES (?, ?, ?)'''
c.execute(sql,(rate, japan_code, japan_source2))
sql = '''INSERT INTO Population_Per_Country (country_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(japan_code, row[12], date.today()))
break
conn.commit()
#insert vaccianation data and population data for cities of Japan
for index, row in japan_v.iterrows():
if index >=6 and index <= 52:
city = translator.translate(row[0])
city = city.split()[1]
rate = row[3]
sql = '''INSERT INTO Vaccinations_Per_Region (vaccination_rate, region_code, source_id) VALUES (?, ?, ?)'''
c.execute(sql,(rate, region_dict[city], japan_source2))
sql = '''INSERT INTO Population_Per_Region (region_code, population_amount, date_collected) VALUES (?, ?, ?)'''
c.execute(sql,(region_dict[city], row[12], date.today()))
conn.commit()
\ No newline at end of file
......@@ -83,7 +83,9 @@ c.execute('''
c.execute('''
CREATE TABLE Vaccinations_Per_Country(
vaccination_rate FLOAT NOT NULL,
first_vaccination_rate FLOAT NOT NULL,
second_vaccination_rate FLOAT NOT NULL,
third_vaccination_rate FLOAT NOT NULL,
country_code VARCHAR(2) PRIMARY KEY,
source_id BIGINT NOT NULL,
FOREIGN KEY (country_code) REFERENCES Countries(country_code),
......@@ -93,7 +95,9 @@ c.execute('''
c.execute('''
CREATE TABLE Vaccinations_Per_Region(
vaccination_rate FLOAT NOT NULL,
first_vaccination_rate FLOAT NOT NULL,
second_vaccination_rate FLOAT NOT NULL,
third_vaccination_rate FLOAT NOT NULL,
region_code BIGINT PRIMARY KEY,
source_id BIGINT NOT NULL,
FOREIGN KEY (region_code) REFERENCES Regions(region_code),
......@@ -103,7 +107,9 @@ c.execute('''
c.execute('''
CREATE TABLE Vaccinations_Per_District(
vaccination_rate FLOAT NOT NULL,
first_vaccination_rate FLOAT NOT NULL,
second_vaccination_rate FLOAT NOT NULL,
third_vaccination_rate FLOAT NOT NULL,
district_code BIGINT PRIMARY KEY,
source_id BIGINT NOT NULL,
FOREIGN KEY (district_code) REFERENCES Districts(district_code),
......@@ -145,3 +151,4 @@ countries.to_sql('Countries',con=conn, if_exists = 'append', index=False)
c.close()
from initial_data_scripts.init_europe import init_italy, init_ukraine
from initial_data_scripts.init_asia import init_japan, init_korea
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment