2015-01-06 21:38:45 -08:00
# Calculate level completion rates via mixpanel export API
2015-01-28 16:07:24 -08:00
# TODO: why are our 'time' fields in PST time?
targetLevels = [ ' dungeons-of-kithgard ' , ' the-raised-sword ' , ' endangered-burl ' ]
eventFunnel = [ ' Started Level ' , ' Saw Victory ' ]
2015-01-06 21:38:45 -08:00
import sys
2015-01-28 16:07:24 -08:00
from datetime import datetime , timedelta
2015-01-06 21:38:45 -08:00
from mixpanel import Mixpanel
try :
import json
except ImportError :
import simplejson as json
# NOTE: mixpanel dates are by day and inclusive
# E.g. '2014-12-08' is any date that day, up to 2014-12-09 12am
if __name__ == ' __main__ ' :
if not len ( sys . argv ) is 3 :
print " Script format: <script> <api_key> <api_secret> "
else :
2015-01-28 16:07:24 -08:00
scriptStart = datetime . now ( )
2015-01-06 21:38:45 -08:00
api_key = sys . argv [ 1 ]
api_secret = sys . argv [ 2 ]
api = Mixpanel (
api_key = api_key ,
api_secret = api_secret
)
2015-01-28 16:07:24 -08:00
# startDate = '2015-01-11'
# endDate = '2015-01-17'
startDate = ' 2015-01-23 '
endDate = ' 2015-01-23 '
# endDate = '2015-01-28'
startEvent = eventFunnel [ 0 ]
endEvent = eventFunnel [ - 1 ]
2015-01-06 21:38:45 -08:00
print ( " Requesting data for {0} to {1} " . format ( startDate , endDate ) )
data = api . request ( [ ' export ' ] , {
2015-01-28 16:07:24 -08:00
' event ' : eventFunnel ,
2015-01-06 21:38:45 -08:00
' from_date ' : startDate ,
' to_date ' : endDate
} )
2015-01-28 16:07:24 -08:00
# Map ordering: level, user, event, day
userDataMap = { }
2015-01-06 21:38:45 -08:00
lines = data . split ( ' \n ' )
print " Received %d entries " % len ( lines )
for line in lines :
try :
if len ( line ) is 0 : continue
eventData = json . loads ( line )
eventName = eventData [ ' event ' ]
2015-01-28 16:07:24 -08:00
if not eventName in eventFunnel :
2015-01-06 21:38:45 -08:00
print ' Unexpected event ' + eventName
break
2015-01-28 16:07:24 -08:00
if not ' properties ' in eventData : continue
2015-01-06 21:38:45 -08:00
properties = eventData [ ' properties ' ]
2015-01-28 16:07:24 -08:00
if not ' distinct_id ' in properties : continue
user = properties [ ' distinct_id ' ]
if not ' time ' in properties : continue
time = properties [ ' time ' ]
pst = datetime . fromtimestamp ( int ( properties [ ' time ' ] ) )
utc = pst + timedelta ( 0 , 8 * 60 * 60 )
dateCreated = utc . isoformat ( )
day = dateCreated [ 0 : 10 ]
if day < startDate or day > endDate :
print " Skipping {0} " . format ( day )
continue
2015-01-06 21:38:45 -08:00
if ' levelID ' in properties :
2015-01-28 16:07:24 -08:00
level = properties [ ' levelID ' ]
2015-01-06 21:38:45 -08:00
elif ' level ' in properties :
2015-01-28 16:07:24 -08:00
level = properties [ ' level ' ] . lower ( ) . replace ( ' ' , ' - ' )
2015-01-06 21:38:45 -08:00
else :
2015-01-28 16:07:24 -08:00
print ( " Unkonwn level for " , eventName )
2015-01-06 21:38:45 -08:00
print ( properties )
break
2015-01-28 16:07:24 -08:00
if not level in targetLevels :
continue
# print level
if not level in userDataMap : userDataMap [ level ] = { }
if not user in userDataMap [ level ] : userDataMap [ level ] [ user ] = { }
if not eventName in userDataMap [ level ] [ user ] or userDataMap [ level ] [ user ] [ eventName ] > day :
userDataMap [ level ] [ user ] [ eventName ] = day
2015-01-06 21:38:45 -08:00
except :
print " Unexpected error: " , sys . exc_info ( ) [ 0 ]
print line
break
2015-01-28 16:07:24 -08:00
# print(userDataMap)
levelFunnelData = { }
for level in userDataMap :
for user in userDataMap [ level ] :
funnelStartDay = None
for event in userDataMap [ level ] [ user ] :
day = userDataMap [ level ] [ user ] [ event ]
if not level in levelFunnelData : levelFunnelData [ level ] = { }
if not day in levelFunnelData [ level ] : levelFunnelData [ level ] [ day ] = { }
if not event in levelFunnelData [ level ] [ day ] : levelFunnelData [ level ] [ day ] [ event ] = 0
if eventFunnel [ 0 ] == event :
levelFunnelData [ level ] [ day ] [ event ] + = 1
funnelStartDay = day
break
if funnelStartDay :
for event in userDataMap [ level ] [ user ] :
if not event in levelFunnelData [ level ] [ funnelStartDay ] :
levelFunnelData [ level ] [ funnelStartDay ] [ event ] = 0
if not eventFunnel [ 0 ] == event :
levelFunnelData [ level ] [ funnelStartDay ] [ event ] + = 1
for i in range ( 1 , len ( eventFunnel ) ) :
event = eventFunnel [ i ]
if not event in levelFunnelData [ level ] [ funnelStartDay ] :
levelFunnelData [ level ] [ funnelStartDay ] [ event ] = 0
# print(levelFunnelData)
totals = { }
for level in levelFunnelData :
for day in levelFunnelData [ level ] :
if startEvent in levelFunnelData [ level ] [ day ] :
started = levelFunnelData [ level ] [ day ] [ startEvent ]
else :
started = 0
if endEvent in levelFunnelData [ level ] [ day ] :
finished = levelFunnelData [ level ] [ day ] [ endEvent ]
else :
finished = 0
if not level in totals : totals [ level ] = { }
if not startEvent in totals [ level ] : totals [ level ] [ startEvent ] = 0
if not endEvent in totals [ level ] : totals [ level ] [ endEvent ] = 0
totals [ level ] [ startEvent ] + = started
totals [ level ] [ endEvent ] + = finished
if started > 0 :
print ( " {0} \t {1} \t {2} \t {3} \t {4} % " . format ( level , day , started , finished , float ( finished ) / started * 100 ) )
else :
print ( " {0} \t {1} \t {2} \t {3} \t " . format ( level , day , started , finished ) )
for level in totals :
started = totals [ level ] [ startEvent ]
finished = totals [ level ] [ endEvent ]
2015-01-06 21:38:45 -08:00
if started > 0 :
2015-01-28 16:07:24 -08:00
print ( " {0} \t {1} \t {2} \t {3} % " . format ( level , started , finished , float ( finished ) / started * 100 ) )
2015-01-06 21:38:45 -08:00
else :
2015-01-28 16:07:24 -08:00
print ( " {0} \t {1} \t {2} \t " . format ( level , started , finished ) )
print ( " Script runtime: {0} " . format ( datetime . now ( ) - scriptStart ) )