codecombat/scripts/analytics/mixpanelGetEvent.py
Matt Lott c2abefb637 Updating internal analytics scripts
Result of some data sleuthing.  They are only used for investigations,
and not production aggregation.  Hence, a bit ugly and unfinished.
2015-01-28 16:07:27 -08:00

184 lines
7.3 KiB
Python

# Get mixpanel event data via export API
# Useful for debugging Mixpanel data weirdness
targetLevels = ['dungeons-of-kithgard', 'the-raised-sword', 'endangered-burl']
targetLevels = ['dungeons-of-kithgard']
eventFunnel = ['Started Level', 'Saw Victory']
# eventFunnel = ['Saw Victory']
# eventFunnel = ['Started Level']
import sys
from pprint import pprint
from datetime import datetime, timedelta
from mixpanel import Mixpanel
try:
import json
except ImportError:
import simplejson as json
# NOTE: mixpanel dates are by day and inclusive
# E.g. '2014-12-08' is any date that day, up to 2014-12-09 12am
if __name__ == '__main__':
if not len(sys.argv) is 3:
print "Script format: <script> <api_key> <api_secret>"
else:
scriptStart = datetime.now()
api_key = sys.argv[1]
api_secret = sys.argv[2]
api = Mixpanel(
api_key = api_key,
api_secret = api_secret
)
startDate = '2015-01-01'
endDate = '2015-01-26'
startEvent = eventFunnel[0]
endEvent = eventFunnel[-1]
print("Requesting data for {0} to {1}".format(startDate, endDate))
data = api.request(['export'], {
# 'where': '"539c630f30a67c3b05d98d95" == properties["id"]',
# 'where': "('539c630f30a67c3b05d98d95' == properties['id'] or '539c630f30a67c3b05d98d95' == properties['distinct_id'])",
'event': eventFunnel,
'from_date': startDate,
'to_date': endDate
})
weirdUserIDs = []
eventUsers = {}
levelEventUserDayMap = {}
levelUserEventDayMap = {}
lines = data.split('\n')
print "Received %d entries" % len(lines)
for line in lines:
try:
if len(line) is 0: continue
eventData = json.loads(line)
# pprint(eventData)
# break
eventName = eventData['event']
if not eventName in eventFunnel:
print 'Unexpected event ' + eventName
break
if not 'properties' in eventData:
print('no properties, skpping')
continue
properties = eventData['properties']
if not 'distinct_id' in properties:
print('no distinct_id, skpping')
continue
user = properties['distinct_id']
if not 'time' in properties:
print('no time, skpping')
continue
time = properties['time']
pst = datetime.fromtimestamp(int(properties['time']))
utc = pst + timedelta(0, 8 * 60 * 60)
dateCreated = utc.isoformat()
day = dateCreated[0:10]
if day < startDate or day > endDate:
print "Skipping {0}".format(day)
continue
if 'levelID' in properties:
level = properties['levelID']
elif 'level' in properties:
level = properties['level'].lower().replace(' ', '-')
else:
print("Unkonwn level for", eventName)
print(properties)
break
if not level in targetLevels: continue
# if user != "539c630f30a67c3b05d98d95": continue
pprint(eventData)
# if user == "54c1fc3a08652d5305442c6b":
# pprint(eventData)
# break
# if '-' in user:
# weirdUserIDs.append(user)
# # pprint(eventData)
# # break
# continue
# print level
if not level in levelEventUserDayMap: levelEventUserDayMap[level] = {}
if not eventName in levelEventUserDayMap[level]: levelEventUserDayMap[level][eventName] = {}
if not user in levelEventUserDayMap[level][eventName] or levelEventUserDayMap[level][eventName][user] > day:
levelEventUserDayMap[level][eventName][user] = day
if not user in eventUsers: eventUsers[user] = True
if not level in levelUserEventDayMap: levelUserEventDayMap[level] = {}
if not user in levelUserEventDayMap[level]: levelUserEventDayMap[level][user] = {}
if not eventName in levelUserEventDayMap[level][user] or levelUserEventDayMap[level][user][eventName] > day:
levelUserEventDayMap[level][user][eventName] = day
except:
print "Unexpected error:", sys.exc_info()[0]
print line
break
# pprint(levelEventUserDayMap)
print("Weird user IDs: {0}".format(len(weirdUserIDs)))
for level in levelEventUserDayMap:
for event in levelEventUserDayMap[level]:
print("{0} {1} {2}".format(level, event, len(levelEventUserDayMap[level][event])))
print("Users: {0}".format(len(eventUsers)))
noStartDayUsers = []
levelFunnelData = {}
for level in levelUserEventDayMap:
for user in levelUserEventDayMap[level]:
# 6455
# for event in levelUserEventDayMap[level][user]:
# day = levelUserEventDayMap[level][user][event]
# if not level in levelFunnelData: levelFunnelData[level] = {}
# if not day in levelFunnelData[level]: levelFunnelData[level][day] = {}
# if not event in levelFunnelData[level][day]: levelFunnelData[level][day][event] = 0
# levelFunnelData[level][day][event] += 1
# 5382
funnelStartDay = None
for event in levelUserEventDayMap[level][user]:
day = levelUserEventDayMap[level][user][event]
if not level in levelFunnelData: levelFunnelData[level] = {}
if not day in levelFunnelData[level]: levelFunnelData[level][day] = {}
if not event in levelFunnelData[level][day]: levelFunnelData[level][day][event] = 0
if eventFunnel[0] == event:
levelFunnelData[level][day][event] += 1
funnelStartDay = day
break
if funnelStartDay:
for event in levelUserEventDayMap[level][user]:
if not event in levelFunnelData[level][funnelStartDay]:
levelFunnelData[level][funnelStartDay][event] = 0
if eventFunnel[0] != event:
levelFunnelData[level][funnelStartDay][event] += 1
for i in range(1, len(eventFunnel)):
event = eventFunnel[i]
if not event in levelFunnelData[level][funnelStartDay]:
levelFunnelData[level][funnelStartDay][event] = 0
else:
noStartDayUsers.append(user)
pprint(levelFunnelData)
print("No start day count: {0}".format(len(noStartDayUsers)))
noStartDayUsers.sort()
for i in range(len(noStartDayUsers)):
if i > 50: break
print(noStartDayUsers[i])
print("Script runtime: {0}".format(datetime.now() - scriptStart))