.fit file data munging

Posted on Mon 19 February 2018 in Projects

New coolest unit: semicircles

  • Conversion: semicircles = degrees / ( 2^31 / 180 )
  • 32-bit unsigned integer represents full 360 deg of longitude
  • maximum precision from 32 bits (~double that of floating point), and integer arithmetic

details: https://msdn.microsoft.com/en-us/library/cc510650.aspx

.fit file parsing, data munging

In [1]:
from fitparse import FitFile, FitParseError
import pandas as pd
import gmaps
import os
import matplotlib.pyplot as plt
from matplotlib import cm
import sys
In [2]:
try:
    fitfile = FitFile('2363427903.fit')
    fitfile.parse()
except FitParseError, e:
    print "Error while parsing .FIT file: %s" % e
    sys.exit(1)
In [ ]:
fitfile.messages
In [3]:
fitfile.profile_version
Out[3]:
10.12
In [4]:
fitfile.protocol_version
Out[4]:
1.0
In [5]:
fitfile.messages[0].name
Out[5]:
'file_id'
In [6]:
fitfile.messages[0].get_values()
Out[6]:
{'garmin_product': 'fr220',
 'manufacturer': 'garmin',
 'number': None,
 'serial_number': 3875181366L,
 'time_created': datetime.datetime(2017, 12, 3, 15, 0, 6),
 'type': 'activity'}
In [7]:
fitfile.messages[0].get_value('garmin_product')
Out[7]:
'fr220'
In [8]:
fitfile.messages[9].get('altitude').value
Out[8]:
101.20000000000005
In [9]:
fitfile.messages[9].get('altitude').units
Out[9]:
'm'
In [10]:
# enumerate samples of all recorded data

gathered_names = []
d = {}
frames = []
for i in xrange(len(fitfile.messages)):
    if fitfile.messages[i].name not in gathered_names:
        gathered_names.append(fitfile.messages[i].name)
        d = fitfile.messages[i].get_values()
        frames.append(pd.DataFrame.from_dict(d, orient = 'index'))

df = pd.concat(frames, keys=gathered_names)
df.columns= ["Data Available (if multiple entries, only 1st is shown)"]

from IPython.display import display
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)
Data Available (if multiple entries, only 1st is shown)
file_id number None
time_created 2017-12-03 15:00:06
garmin_product fr220
serial_number 3875181366
type activity
manufacturer garmin
file_creator hardware_version NaN
software_version 310
event timestamp 2017-12-03 15:00:06
event_group 0
timer_trigger manual
event timer
event_type start
device_info device_index creator
cum_operating_time None
software_version 3.1
timestamp 2017-12-03 15:00:06
battery_voltage None
hardware_version None
unknown_8 None
unknown_9 None
garmin_product fr220
source_type local
unknown_15 None
unknown_16 None
ant_network None
ant_transmission_type None
serial_number 3875181366
ant_device_number None
device_type None
unknown_23 None
battery_status None
manufacturer garmin
unknown_22 unknown_4 NaN
unknown_5 0
unknown_2 2
unknown_3 1
unknown_0 1
unknown_1 1
unknown_253 8.81248e+08
unknown_79 unknown_6 185
unknown_7 1
unknown_4 1
unknown_5 50
unknown_2 178
unknown_3 700
unknown_0 14185
unknown_1 31
unknown_8 0
unknown_9 0
unknown_253 881247606
unknown_141 unknown_4 NaN
unknown_5 NaN
unknown_2 8.81345e+08
unknown_3 NaN
unknown_0 1
unknown_1 8.81107e+08
unknown_253 8.81248e+08
record distance 0
timestamp 2017-12-03 15:00:06
altitude 101.2
position_long -1445618441
fractional_cadence 0
enhanced_speed 0
position_lat 461754027
speed 0
enhanced_altitude 101.2
cadence 60
lap max_fractional_cadence 0
event_type stop
total_timer_time 347.265
avg_heart_rate None
total_elapsed_time 347.265
max_running_cadence 95
avg_stance_time None
end_position_long -1445682620
sport running
start_position_long -1445618441
event lap
unknown_30 -1445682620
enhanced_avg_speed 4.634
unknown_72 None
message_index 0
sub_sport None
total_calories 121
enhanced_max_speed 4.964
end_position_lat 461610201
timestamp 2017-12-03 15:05:54
start_time 2017-12-03 15:00:06
start_position_lat 461754027
max_speed 4.964
unknown_29 461600588
unknown_28 -1445618440
total_distance 1609.35
total_fractional_cycles None
avg_fractional_cadence 0.796875
unknown_27 461754027
event_group None
total_descent 32
avg_vertical_oscillation None
avg_stance_time_percent None
lap_trigger distance
intensity None
wkt_step_index None
total_ascent 8
total_strides 511
max_heart_rate None
avg_speed 4.634
avg_running_cadence 87
session max_fractional_cadence 0
event_type stop
total_timer_time 9297.78
first_lap_index 0
avg_heart_rate None
total_elapsed_time 9297.78
max_running_cadence 95
avg_stance_time None
num_laps 27
sport running
start_position_long -1445618441
event lap
message_index 0
sub_sport generic
total_calories 3136
trigger activity_end
nec_long -1445618440
total_ascent 332
total_training_effect None
enhanced_avg_speed 4.553
swc_long -1449521792
timestamp 2017-12-03 17:38:19
start_time 2017-12-03 15:00:06
swc_lat 460090247
start_position_lat 461754027
max_speed 4.964
total_distance 42330.6
total_fractional_cycles None
avg_fractional_cadence 0.25
event_group None
total_descent 435
avg_vertical_oscillation None
avg_running_cadence 88
avg_stance_time_percent None
unknown_81 0
enhanced_max_speed 4.964
total_strides 13677
max_heart_rate None
avg_speed 4.553
nec_lat 461754027
activity num_sessions 1
event_type stop
total_timer_time 9297.78
timestamp 2017-12-03 17:38:19
local_timestamp 2017-12-03 09:38:19
type manual
event activity
event_group None
unknown_113 unknown_4 881247606
unknown_5 0
unknown_2 4219500
unknown_3 9297780
unknown_0 0
unknown_1 1
unknown_253 881257099
In [11]:
# for plotting, clean up the data from 'record' (ie. lat, lon, altitude, distance, cadence, speed)

d = []  # initialize dict
r = 0   # initialize record counter

# degrees = semicircles * ( 180 / 2^31 )
# semicircles = degrees * ( 2^31 / 180 )
def deg(s):
    return s*(180./(2**31))

# Get all data messages that are of type record
for record in fitfile.get_messages('record'):
    r += 1

    # Go through all the data entries in this record
    for record_data in record:
        if record_data.name in ['position_lat','position_long']:             # if data is a lat or lon..
            d.append((r, record_data.name, deg(record_data.value), 'deg'))   # convert semicircles to degrees
        else:
            d.append((r, record_data.name, record_data.value, record_data.units))

df = pd.DataFrame(d, columns=('record','name', 'value','units'))
dfmini = df[df['record'] == 1]
df.head(12)
Out[11]:
record name value units
0 1 altitude 101.2 m
1 1 cadence 60 rpm
2 1 distance 0 m
3 1 enhanced_altitude 101.2 m
4 1 enhanced_speed 0 m/s
5 1 fractional_cadence 0 rpm
6 1 position_lat 38.7038 deg
7 1 position_long -121.17 deg
8 1 speed 0 m/s
9 1 timestamp 2017-12-03 15:00:06 None
10 2 altitude 101 m
11 2 cadence 60 rpm
In [12]:
df['value'][6]   # full GPS precision is maintained on converted lats, lons
Out[12]:
38.703775433823466
In [13]:
cadence = df[df['name'].str.match('cadence')].reset_index().filter(regex='value').rename(columns={'value':'cadence'})
altitude = df[df['name'].str.match('altitude')].reset_index().filter(regex='value').rename(columns={'value':'altitude'})
distance = df[df['name'].str.match('distance')].reset_index().filter(regex='value').rename(columns={'value':'distance'})
speed = df[df['name'].str.match('speed')].reset_index().filter(regex='value').rename(columns={'value':'speed'})


lats = df[df['name'].str.match('position_lat')].reset_index().filter(regex='value').rename(columns={'value':'lat'})
lons = df[df['name'].str.match('position_long')].reset_index().filter(regex='value').rename(columns={'value':'lon'})
locs = pd.concat([lats, lons], axis=1)

locs.head()
Out[13]:
lat lon
0 38.7038 -121.17
1 38.7036 -121.17
2 38.7034 -121.17
3 38.7032 -121.17
4 38.7031 -121.171

Plot stuff

In [14]:
filename = '2363427903.fit'
filename.rsplit('.', 1)[0].lower()
Out[14]:
'2363427903'
In [15]:
plt.figure(figsize=(9,6))

plt.subplot(3, 1, 1)
plt.plot(distance/1600, cadence)
plt.axis([0, distance.max()[0]/1600, cadence.mean()[0]*0.95, cadence.mean()[0]*1.05])
plt.axhline(y=cadence.mean()[0], xmin=0, xmax=1, c='r', label = 'mean = ' + str(cadence.mean()[0].round(2)), ls='-', lw=2)
plt.xlabel('distance (mi)')
plt.ylabel('cadence')
# plt.title('cadence')
plt.legend(loc='best')

plt.subplot(3, 1, 2)
plt.plot(distance/1600, altitude)
plt.axis([0, distance.max()[0]/1600, altitude.min()[0], altitude.max()[0]])
plt.axhline(y=altitude.mean()[0], xmin=0, xmax=1, c='r', label = 'mean = ' + str(altitude.mean()[0].round(2)), ls='-', lw=2)
plt.xlabel('distance (mi)')
plt.ylabel('altitude (m)')
# plt.title('altitude')
plt.legend(loc='best')

plt.subplot(3, 1, 3)
plt.plot(distance/1600, speed)
plt.axis([0, distance.max()[0]/1600, speed.mean()[0]*0.9, speed.mean()[0]*1.1])
plt.axhline(y=speed.mean()[0], xmin=0, xmax=1, c='r', label = 'mean = ' + str(speed.mean()[0].round(2)), ls='-', lw=2)
plt.xlabel('distance (mi)')
plt.ylabel('speed (m/s)')
# plt.title('speed')
plt.legend(loc='best')

plt.tight_layout()
plt.show()
In [16]:
plt.figure(figsize=(11,3))
plt.scatter(locs.lat, locs.lon, c = cadence, s=800, vmin=86, vmax=92, marker='.', edgecolor='none', alpha=0.08, cmap=cm.hot_r)
plt.xlabel('Latitude')
plt.ylabel('Longitude')
plt.title('Long slow death by running (finish line is on the left)')
plt.colorbar().set_label('Cadence', labelpad=-20, y=1.1, rotation=0)

That projection sucks (its raw lat, lon), so use your fav mapper..

Gmaps

In [18]:
gmaps.configure(api_key="XXXXXXXXXXXXXXXXXXXXXXXXXXXX") # Your Google API key
In [19]:
fig = gmaps.figure()  # zoom_level=2
data = gmaps.symbol_layer(locs, scale = 1)
fig.add_layer(data)
# fig

Calling fig here spits out a gmap of the route

Its zoomable, clickable, pan-able.. but its static html, and I don't want to upload that to GitHub, so here's a screenshot:

Try using Bokeh

In [17]:
from bokeh.models import GMapPlot, GMapOptions, ColumnDataSource, Circle, Range1d, PanTool, WheelZoomTool, ResetTool, SaveTool
from bokeh.io import show, output_notebook
from bokeh.embed import components
output_notebook()
Loading BokehJS ...
In [29]:
# calculate middle lat and lon, for map centering
midlat = locs['lat'].min()+((locs['lat'].max()-locs['lat'].min())/2)
midlon = locs['lon'].min()+((locs['lon'].max()-locs['lon'].min())/2)
In [27]:
map_options = GMapOptions(lat=midlat, lng=midlon, map_type="roadmap", zoom=11)
activitymap = GMapPlot(x_range=Range1d(), y_range=Range1d(), map_options=map_options)
activitymap.api_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
source = ColumnDataSource(data=locs)
circle = Circle(x="lon", y="lat", size=3, fill_color="blue", fill_alpha=0.9, line_color=None)
activitymap.add_glyph(source, circle)
activitymap.add_tools(PanTool(), WheelZoomTool(), ResetTool())
In [28]:
show(activitymap)

Does the map show up above this prompt? Probably not..

That's a Pelican --> HTML problem. Works fine in Jupyter notebooks. Too lazy to fix it here, just use the web app instead