Commit 451e8d1d authored by emma fritzberg's avatar emma fritzberg
Browse files

add files developed on JupyterLab

parent 9ec0026f
This diff is collapsed.
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
%% Cell type:code id: tags:
from datetime import datetime, timedelta
from geopy import distance
# Returns datetime object from the given timestamp string in the format used by Sofar, e.g. '2021-08-28T12:28:16.000Z'
def get_datetime(timestamp):
return datetime.strptime(timestamp, SOFAR_TIMESTAMP_FORMAT)
# Returns distance between two coordinate sets in kilometers, as a floating point number
def distance_travelled(start_location, end_location):
return distance.distance(start_location, end_location).km
# Returns time delta between two timestamps in hours
def time_elapsed(start_timestamp, end_timestamp):
return (get_datetime(end_timestamp) - get_datetime(start_timestamp)) / timedelta(hours=1)
import csv
import math
from ocean_drifters_utils import get_datetime, distance_travelled, time_elapsed
# Parse CSV data into global var spotters with a dict representing spotters and their paths.
# Each key is a spotter ID and its associated value is a dict representing the
# path of the spotter with that ID, in which keys are timestamp strings and
# values are tuples containing the location in the format (latitude, longitude).
# key: value:
# spotter ID --> [
# sub-key: sub-value:
# timestamp --> (latitude, longitude),
# timestamp --> (latitude, longitude),
# timestamp --> (latitude, longitude),
# ]
spotters = {}
with open('data/raw_spotter_data.csv') as raw_data:
reader = csv.DictReader(raw_data)
for row in reader:
spotter_id = row['spotterId']
timestamp = row['timestamp']
latitude = float(row['latitude'])
longitude = float(row['longitude'])
if spotter_id in spotters:
# Assumption: each spotter has only one recorded location for a given timestamp
spotters[spotter_id][timestamp] = (latitude, longitude)
path = dict([(timestamp, (latitude, longitude))])
spotters[spotter_id] = path
with open('data/enriched_spotter_data.csv', 'w', newline='') as enriched_data:
writer = csv.DictWriter(
fieldnames=['spotter_id', 'timestamp', 'latitude', 'longitude', 'velocity', 'coastline_distance'],
for spotter_id in spotters.keys():
path = spotters[spotter_id]
timestamps = list(path.keys())
timestamps.sort(key=(lambda e: get_datetime(e)))
def centered_in_time_velocity(timestamp_index):
mod_aware_timestamp_index = timestamp_index % len(timestamps)
if mod_aware_timestamp_index == 0 or mod_aware_timestamp_index == len(timestamps) - 1:
return math.nan
prev_timestamp = timestamps[timestamp_index - 1]
next_timestamp = timestamps[timestamp_index + 1]
dx = distance_travelled(path[prev_timestamp], path[next_timestamp])
dt = time_elapsed(prev_timestamp, next_timestamp)
if dt <= 0:
return math.nan
return dx / dt
for i in range(0, len(timestamps) - 1):
(latitude, longitude) = path[timestamps[i]]
'spotter_id': spotter_id,
'timestamp': timestamps[i],
'latitude': latitude,
'longitude': longitude,
'velocity': centered_in_time_velocity(timestamp_index=i),
# TODO: calculate distance from nearest coastline
'coastline_distance': math.nan,
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment