Skip to content

Latest commit

 

History

History
1426 lines (1374 loc) · 33.4 KB

File metadata and controls

1426 lines (1374 loc) · 33.4 KB

Raw data importing

Our primary goal here is to import, interpolate, and extend our bike riding GPS data. It'll be somewhat complicated as it is not small task on code, but using vectorization on all processes will keep execution really fast. We have for this task about 1.7M datapoints. Each datapoint corresponds to a fancy one point in space time, i.e. one point made of 4 dimensions of timestamp, latitude, longitude and elevation. We hope that connecting each one sequentially on time will render a path that a biker rode. To guarantee book keeping and post processing capabilities, we will record the point id, ride id and rider id.

# import needed packages
import gpxpy
import datetime 
import time
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import os
import numpy as np
import re
import sys
import pandas as pd
import pickle as pk
from tqdm import tqdm_notebook as tqdm
from geopy.distance import great_circle
# from scipy.signal import gaussian
# from scipy.ndimage import filters
from scipy.ndimage.filters import gaussian_filter
from mpl_toolkits.axes_grid1 import make_axes_locatable
import nvector as nv
from pandas.plotting import andrews_curves
# import the gpx data
gpx_filename_pattern = re.compile("(gpx)$")
def build_meta(gpx): 
    meta = {
        "tracks": 0,
        "segments": 0,
        "points": 0
    }
    for track in gpx.tracks:
        meta["tracks"] += 1
        for segment in track.segments:
            meta["segments"] += 1
            for point in segment.points:
                meta["points"] += 1
    return meta

def import_gpx_file(path, filename):
    build_meta
    file = open(path+'/'+filename, 'r')
    gpx = gpxpy.parse(file)
    meta = build_meta(gpx)
    data = np.zeros((meta['points'], 5))
    n = 0
    for track in gpx.tracks:
        for segment in track.segments:
            for point in segment.points:
                t = time.mktime(point.time.timetuple())
                data[n, ] = [point.latitude, point.longitude, point.elevation, t, n]
                n += 1
    return pd.DataFrame({
        'latitude': data[:,0],
        'longitude': data[:,1],
        'elevation': data[:,2],
        'timestamp': data[:,3],
        'point': data[:,4],
        'ride': 0,
        'rider': ""
    })

def import_gpx_dir(path):
    filenames = [f for f in os.listdir(path) if gpx_filename_pattern.search(f)]    
    data = pd.DataFrame()
    n = 0
    for filename in tqdm(filenames, total=len(filenames)):
        dt = import_gpx_file(path, filename)
        dt['ride'] = n
        n += 1
        data = data.append(dt, ignore_index=True)
    return data

def interpret_gpx(path, name):
    data = import_gpx_dir(path)
    data["rider"] = name
    data.elevation = data.elevation.astype(int)
    data.timestamp = data.timestamp.astype(int)
    data.point = data.point.astype(int)
    #save to cache
    cache = open(str(name)+'.pkl', 'wb')
    pk.dump(data, cache)
    cache.close()
    return data

def load_gpx(path):
    cache = open(path, 'rb')
    data = pk.load(cache)
    cache.close()
    return data

def save_data(data, name):
    cache = open(name+".pkl", 'wb')
    pk.dump(data, cache)
    cache.close()
    
def load_data(name):
    cache = open(name+".pkl", 'rb')
    data = pk.load(cache)
    cache.close()
    return data

if(os.path.exists("0.pkl")):
    data1 = load_gpx("0.pkl")
    data2 = load_gpx("1.pkl")
else:
    data1 = interpret_gpx("/Users/esse/activ/rodrigo_sztelzer", 0)
    data2 = interpret_gpx("/Users/esse/activ/marcos_paulo", 1)

imported = data1.append(data2, ignore_index=True)
print(imported.shape)
(1739684, 7)

Data preprocessing

There will have some steps to have a gooda data shaped to most processes. The idea is to cleanup dirt distances introduced by bad GPS and interpolate the points as to make all rows 1 meter of distance, that will simplify most phisical calculations.

Create destination columns

This will transform the list in a origin/destination list of points, so the resulting list is [M-1, N*2]. These new columns are equal to the next row. Finally remove the last element of each ride that has value zero.

def haversine(lon1, lat1, lon2, lat2):
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    a = np.sin((lat2-lat1)/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
    return 6372800 * 2 * np.arcsin(np.sqrt(a))

def destiny(dt):
    # create a column with the next points
    dt = dt.sort_values(['timestamp'])
    dt['duration'] = np.roll(dt.timestamp, -1) - dt['timestamp']
    dt.drop(dt.tail(1).index, inplace=True) 

    #repeat the data, set repeats to nan, interpolate nans.
    dt = dt.loc[dt.index.repeat(dt.duration)]   # repeat rows n times based on n duration
    dt['subindex'] = dt.groupby(level=0).cumcount() # creat an intern index of the repeats
    dt[dt.subindex > 0] = np.nan
    dt = dt.interpolate()                           # interpolate the nans
    dt = dt.reset_index()
    del(dt['index'])
    del(dt['duration'])
    del(dt['subindex'])

    dt['ilon'] = dt['longitude']
    dt['ilat'] = dt['latitude']
    dt['ielev'] = dt['elevation']
    dt['itime'] = dt['timestamp']
    dt['flon'] = np.roll(dt.longitude, -1)
    dt['flat'] = np.roll(dt.latitude, -1)
    dt['felev'] = np.roll(dt.elevation, -1)    
    dt['ftime'] = np.roll(dt.timestamp, -1)
    
    del(dt['longitude'])
    del(dt['latitude'])
    del(dt['timestamp'])
    del(dt['elevation'])

    dt['dist'] = haversine(dt.ilon, dt.ilat, dt.flon, dt.flat)

    dt = dt[dt.dist > 0]
    
    dt['heading'] = np.arctan2(dt.flon-dt.ilon, dt.flat-dt.ilat) * 180 / np.pi
    dt['ddist'] = np.roll(dt.dist, -1, 0) - dt.dist
    dt.loc[dt.point==0, 'ddist'] = dt.dist
    dt['delev'] = dt.felev - dt.ielev
    dt['delev'] = np.clip(dt.delev, -1, 1)
    dt['climb'] = dt.delev / dt.dist
    dt['climb'] = dt['climb'].fillna(0)
    
    dt.drop(dt.tail(1).index, inplace=True) 
    
    return dt
data = imported.groupby(['rider', 'ride'], as_index=True, sort=False).apply(destiny)
save_data(data, "grouped")
ride = 0
head = data.loc[0].loc[ride].head(210)
tail = data.loc[0].loc[ride].tail(220)

plt.figure(figsize=(10,10))
plt.quiver(head.ilon, head.ilat, head.flon-head.ilon, head.flat-head.ilat, color="blue", units='xy', angles='xy', scale=1, pivot='tail')
plt.quiver(tail.ilon, tail.ilat, tail.flon-tail.ilon, tail.flat-tail.ilat, color="red", units='xy', angles='xy', scale=1, pivot='tail')
plt.axis('equal')
plt.show()

png

data.loc[1].loc[0]
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
point ride rider ilon ilat ielev itime flon flat felev ftime dist heading ddist delev climb
0 0.000000 0.0 1.0 -46.685961 -23.594525 736.000000 1.450308e+09 -46.685954 -23.594531 735.947368 1.450308e+09 0.989036 131.338064 9.890356e-01 -0.052632 -0.053215
1 0.026316 0.0 1.0 -46.685954 -23.594531 735.947368 1.450308e+09 -46.685947 -23.594537 735.894737 1.450308e+09 0.989036 131.338064 -2.463006e-08 -0.052632 -0.053215
2 0.052632 0.0 1.0 -46.685947 -23.594537 735.894737 1.450308e+09 -46.685940 -23.594543 735.842105 1.450308e+09 0.989036 131.338064 -2.298186e-08 -0.052632 -0.053215
3 0.078947 0.0 1.0 -46.685940 -23.594543 735.842105 1.450308e+09 -46.685933 -23.594550 735.789474 1.450308e+09 0.989036 131.338064 -2.558778e-08 -0.052632 -0.053215
4 0.105263 0.0 1.0 -46.685933 -23.594550 735.789474 1.450308e+09 -46.685926 -23.594556 735.736842 1.450308e+09 0.989035 131.338064 -2.320463e-08 -0.052632 -0.053215
5 0.131579 0.0 1.0 -46.685926 -23.594556 735.736842 1.450308e+09 -46.685919 -23.594562 735.684211 1.450308e+09 0.989035 131.338064 -2.487507e-08 -0.052632 -0.053215
6 0.157895 0.0 1.0 -46.685919 -23.594562 735.684211 1.450308e+09 -46.685912 -23.594568 735.631579 1.450308e+09 0.989035 131.338064 -2.369461e-08 -0.052632 -0.053215
7 0.184211 0.0 1.0 -46.685912 -23.594568 735.631579 1.450308e+09 -46.685905 -23.594574 735.578947 1.450308e+09 0.989035 131.338064 -2.438511e-08 -0.052632 -0.053215
8 0.210526 0.0 1.0 -46.685905 -23.594574 735.578947 1.450308e+09 -46.685898 -23.594580 735.526316 1.450308e+09 0.989035 131.338064 -2.371685e-08 -0.052632 -0.053215
9 0.236842 0.0 1.0 -46.685898 -23.594580 735.526316 1.450308e+09 -46.685891 -23.594587 735.473684 1.450308e+09 0.989035 131.338064 -2.460788e-08 -0.052632 -0.053215
10 0.263158 0.0 1.0 -46.685891 -23.594587 735.473684 1.450308e+09 -46.685884 -23.594593 735.421053 1.450308e+09 0.989035 131.338064 -2.369463e-08 -0.052632 -0.053215
11 0.289474 0.0 1.0 -46.685884 -23.594593 735.421053 1.450308e+09 -46.685877 -23.594599 735.368421 1.450308e+09 0.989035 131.338064 -2.512008e-08 -0.052632 -0.053215
12 0.315789 0.0 1.0 -46.685877 -23.594599 735.368421 1.450308e+09 -46.685870 -23.594605 735.315789 1.450308e+09 0.989035 131.338064 -2.320467e-08 -0.052632 -0.053215
13 0.342105 0.0 1.0 -46.685870 -23.594605 735.315789 1.450308e+09 -46.685863 -23.594611 735.263158 1.450308e+09 0.989035 131.338064 -2.512009e-08 -0.052632 -0.053215
14 0.368421 0.0 1.0 -46.685863 -23.594611 735.263158 1.450308e+09 -46.685856 -23.594617 735.210526 1.450308e+09 0.989035 131.338064 -2.320468e-08 -0.052632 -0.053215
15 0.394737 0.0 1.0 -46.685856 -23.594617 735.210526 1.450308e+09 -46.685849 -23.594624 735.157895 1.450308e+09 0.989035 131.338064 -2.416240e-08 -0.052632 -0.053215
16 0.421053 0.0 1.0 -46.685849 -23.594624 735.157895 1.450308e+09 -46.685842 -23.594630 735.105263 1.450308e+09 0.989035 131.338064 -2.465237e-08 -0.052632 -0.053215
17 0.447368 0.0 1.0 -46.685842 -23.594630 735.105263 1.450308e+09 -46.685835 -23.594636 735.052632 1.450308e+09 0.989035 131.338064 -2.414018e-08 -0.052632 -0.053215
18 0.473684 0.0 1.0 -46.685835 -23.594636 735.052632 1.450308e+09 -46.685828 -23.594642 735.000000 1.450308e+09 0.989035 131.338064 -2.440740e-08 -0.052632 -0.053215
19 0.500000 0.0 1.0 -46.685828 -23.594642 735.000000 1.450308e+09 -46.685821 -23.594648 734.947368 1.450308e+09 0.989035 131.338064 -2.369468e-08 -0.052632 -0.053215
20 0.526316 0.0 1.0 -46.685821 -23.594648 734.947368 1.450308e+09 -46.685814 -23.594654 734.894737 1.450308e+09 0.989035 131.338064 -2.438518e-08 -0.052632 -0.053215
21 0.552632 0.0 1.0 -46.685814 -23.594654 734.894737 1.450308e+09 -46.685807 -23.594660 734.842105 1.450308e+09 0.989035 131.338064 -2.347193e-08 -0.052632 -0.053215
22 0.578947 0.0 1.0 -46.685807 -23.594660 734.842105 1.450308e+09 -46.685800 -23.594667 734.789474 1.450308e+09 0.989035 131.338064 -2.509791e-08 -0.052632 -0.053215
23 0.605263 0.0 1.0 -46.685800 -23.594667 734.789474 1.450308e+09 -46.685793 -23.594673 734.736842 1.450308e+09 0.989035 131.338064 -2.369470e-08 -0.052632 -0.053215
24 0.631579 0.0 1.0 -46.685793 -23.594673 734.736842 1.450308e+09 -46.685786 -23.594679 734.684211 1.450308e+09 0.989035 131.338064 -2.463018e-08 -0.052632 -0.053215
25 0.657895 0.0 1.0 -46.685786 -23.594679 734.684211 1.450308e+09 -46.685779 -23.594685 734.631579 1.450308e+09 0.989035 131.338064 -2.344972e-08 -0.052632 -0.053215
26 0.684211 0.0 1.0 -46.685779 -23.594685 734.631579 1.450308e+09 -46.685772 -23.594691 734.578947 1.450308e+09 0.989035 131.338064 -2.487518e-08 -0.052632 -0.053215
27 0.710526 0.0 1.0 -46.685772 -23.594691 734.578947 1.450308e+09 -46.685765 -23.594697 734.526316 1.450308e+09 0.989035 131.338064 -2.322698e-08 -0.052632 -0.053215
28 0.736842 0.0 1.0 -46.685765 -23.594697 734.526316 1.450308e+09 -46.685758 -23.594704 734.473684 1.450308e+09 0.989035 131.338064 -2.509795e-08 -0.052632 -0.053215
29 0.763158 0.0 1.0 -46.685758 -23.594704 734.473684 1.450308e+09 -46.685751 -23.594710 734.421053 1.450308e+09 0.989035 131.338064 -2.344975e-08 -0.052632 -0.053215
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1426 1017.000000 0.0 1.0 -46.642827 -23.587794 797.000000 1.450309e+09 -46.642825 -23.587779 797.500000 1.450309e+09 1.625616 7.853313 3.069071e-09 0.500000 0.307576
1427 1017.500000 0.0 1.0 -46.642825 -23.587779 797.500000 1.450309e+09 -46.642823 -23.587765 798.000000 1.450309e+09 1.625616 7.853313 8.687803e-01 0.500000 0.307576
1428 1018.000000 0.0 1.0 -46.642823 -23.587765 798.000000 1.450309e+09 -46.642836 -23.587746 798.000000 1.450309e+09 2.494396 -34.380345 1.012410e-07 0.000000 0.000000
1429 1018.500000 0.0 1.0 -46.642836 -23.587746 798.000000 1.450309e+09 -46.642849 -23.587727 798.000000 1.450309e+09 2.494396 -34.380345 8.256844e-01 0.000000 0.000000
1430 1019.000000 0.0 1.0 -46.642849 -23.587727 798.000000 1.450309e+09 -46.642865 -23.587701 798.000000 1.450309e+09 3.320081 -31.607502 1.590601e-07 0.000000 0.000000
1431 1019.500000 0.0 1.0 -46.642865 -23.587701 798.000000 1.450309e+09 -46.642881 -23.587675 798.000000 1.450309e+09 3.320081 -31.607502 2.099193e+00 0.000000 0.000000
1432 1020.000000 0.0 1.0 -46.642881 -23.587675 798.000000 1.450309e+09 -46.642906 -23.587632 799.000000 1.450309e+09 5.419274 -30.173520 3.936607e-01 1.000000 0.184527
1433 1021.000000 0.0 1.0 -46.642906 -23.587632 799.000000 1.450309e+09 -46.642935 -23.587587 799.000000 1.450309e+09 5.812934 -32.799531 -3.449872e-01 0.000000 0.000000
1434 1022.000000 0.0 1.0 -46.642935 -23.587587 799.000000 1.450309e+09 -46.642961 -23.587544 800.000000 1.450309e+09 5.467947 -31.159305 2.088392e-01 1.000000 0.182884
1435 1023.000000 0.0 1.0 -46.642961 -23.587544 800.000000 1.450309e+09 -46.642991 -23.587501 800.000000 1.450309e+09 5.676786 -34.902496 2.424259e-01 0.000000 0.000000
1436 1024.000000 0.0 1.0 -46.642991 -23.587501 800.000000 1.450309e+09 -46.643022 -23.587456 801.000000 1.450309e+09 5.919212 -34.562525 -5.978414e-01 1.000000 0.168941
1437 1025.000000 0.0 1.0 -46.643022 -23.587456 801.000000 1.450309e+09 -46.643047 -23.587414 801.000000 1.450309e+09 5.321371 -30.762720 -3.412469e-01 0.000000 0.000000
1438 1026.000000 0.0 1.0 -46.643047 -23.587414 801.000000 1.450309e+09 -46.643071 -23.587375 802.000000 1.450309e+09 4.980124 -31.607502 8.672684e-03 1.000000 0.200798
1439 1027.000000 0.0 1.0 -46.643071 -23.587375 802.000000 1.450309e+09 -46.643097 -23.587337 802.000000 1.450309e+09 4.988797 -34.380345 5.489145e-02 0.000000 0.000000
1440 1028.000000 0.0 1.0 -46.643097 -23.587337 802.000000 1.450309e+09 -46.643124 -23.587299 802.000000 1.450309e+09 5.043688 -35.394796 6.737592e-01 0.000000 0.000000
1441 1029.000000 0.0 1.0 -46.643124 -23.587299 802.000000 1.450309e+09 -46.643153 -23.587255 803.000000 1.450309e+09 5.717447 -33.388514 2.017680e-01 1.000000 0.174903
1442 1030.000000 0.0 1.0 -46.643153 -23.587255 803.000000 1.450309e+09 -46.643184 -23.587210 803.000000 1.450309e+09 5.919215 -34.562525 3.169620e-01 0.000000 0.000000
1443 1031.000000 0.0 1.0 -46.643184 -23.587210 803.000000 1.450309e+09 -46.643187 -23.587154 804.000000 1.450309e+09 6.236177 -3.066486 -2.801442e-01 1.000000 0.160355
1444 1032.000000 0.0 1.0 -46.643187 -23.587154 804.000000 1.450309e+09 -46.643215 -23.587107 804.000000 1.450309e+09 5.956033 -30.784147 -5.367544e-01 0.000000 0.000000
1445 1033.000000 0.0 1.0 -46.643215 -23.587107 804.000000 1.450309e+09 -46.643240 -23.587064 804.000000 1.450309e+09 5.419279 -30.173520 -8.299216e-01 0.000000 0.000000
1446 1034.000000 0.0 1.0 -46.643240 -23.587064 804.000000 1.450309e+09 -46.643262 -23.587028 804.000000 1.450309e+09 4.589357 -31.429566 -7.813802e-01 0.000000 0.000000
1447 1035.000000 0.0 1.0 -46.643262 -23.587028 804.000000 1.450309e+09 -46.643280 -23.586998 804.000000 1.450309e+09 3.807977 -30.963757 2.268681e+00 0.000000 0.000000
1448 1036.000000 0.0 1.0 -46.643280 -23.586998 804.000000 1.450309e+09 -46.643337 -23.586982 804.500000 1.450309e+09 6.076658 -74.320476 6.772585e-07 0.500000 0.082282
1449 1036.500000 0.0 1.0 -46.643337 -23.586982 804.500000 1.450309e+09 -46.643394 -23.586966 805.000000 1.450309e+09 6.076659 -74.320476 -3.148050e+00 0.500000 0.082282
1450 1037.000000 0.0 1.0 -46.643394 -23.586966 805.000000 1.450309e+09 -46.643421 -23.586975 805.000000 1.450309e+09 2.928609 -108.434949 -1.776300e-07 0.000000 0.000000
1451 1037.500000 0.0 1.0 -46.643421 -23.586975 805.000000 1.450309e+09 -46.643448 -23.586984 805.000000 1.450309e+09 2.928609 -108.434949 -4.881016e-01 0.000000 0.000000
1452 1038.000000 0.0 1.0 -46.643448 -23.586984 805.000000 1.450309e+09 -46.643470 -23.586991 805.000000 1.450309e+09 2.440507 -108.434949 -1.233072e-07 0.000000 0.000000
1453 1038.500000 0.0 1.0 -46.643470 -23.586991 805.000000 1.450309e+09 -46.643493 -23.586999 805.000000 1.450309e+09 2.440507 -108.434949 -5.835686e-01 0.000000 0.000000
1454 1039.000000 0.0 1.0 -46.643493 -23.586999 805.000000 1.450309e+09 -46.643510 -23.587005 805.500000 1.450309e+09 1.856938 -109.440035 -7.320583e-08 0.500000 0.269260
1455 1039.500000 0.0 1.0 -46.643510 -23.587005 805.500000 1.450309e+09 -46.643527 -23.587011 806.000000 1.450309e+09 1.856938 -109.440035 4.403480e+03 0.500000 0.269260

1456 rows × 16 columns