From b33e3a93bad89a89c8f59bbe0694417dbae4378d Mon Sep 17 00:00:00 2001 From: Matt Lott <mattlott@live.com> Date: Wed, 24 Dec 2014 11:09:54 -0800 Subject: [PATCH] Level completion rates script --- scripts/analytics/mixpanelABSubscribeCopy.py | 2 +- .../mongodb/queries/CodeLanguageUsage.js | 25 ++- .../mongodb/queries/SignupsPerDay.js | 22 ++- .../mongodb/queries/helpVideoStylesABTest.js | 6 +- .../analytics/mongodb/queries/levelRates.js | 175 ++++++++++++++++++ 5 files changed, 208 insertions(+), 22 deletions(-) create mode 100644 scripts/analytics/mongodb/queries/levelRates.js diff --git a/scripts/analytics/mixpanelABSubscribeCopy.py b/scripts/analytics/mixpanelABSubscribeCopy.py index d6ca6801c..16f406ed9 100644 --- a/scripts/analytics/mixpanelABSubscribeCopy.py +++ b/scripts/analytics/mixpanelABSubscribeCopy.py @@ -23,7 +23,7 @@ if __name__ == '__main__': ) startDate = '2014-12-14' - endDate = '2014-12-15' + endDate = '2014-12-21' print("Requesting data for {0} to {1}".format(startDate, endDate)) data = api.request(['export'], { 'event' : ['Show subscription modal', 'Started subscription purchase', 'Finished subscription purchase'], diff --git a/scripts/analytics/mongodb/queries/CodeLanguageUsage.js b/scripts/analytics/mongodb/queries/CodeLanguageUsage.js index e7ec2fc12..c44e59d4c 100644 --- a/scripts/analytics/mongodb/queries/CodeLanguageUsage.js +++ b/scripts/analytics/mongodb/queries/CodeLanguageUsage.js @@ -1,8 +1,12 @@ // Print out code language usage based on level session data + +// Usage: +// mongo <address>:<port>/<database> <script file> -u <username> -p <password> + var total = 0; var languages = {}; var startDate = new ISODate("2014-12-01T00:00:00.000Z") -db['level.sessions'].aggregate( +var cursor = db['level.sessions'].aggregate( [ { $match : { //$and: [{codeLanguage: {$exists: true}}, {created : { $gte: startDate}}] @@ -16,15 +20,16 @@ db['level.sessions'].aggregate( } }, { $sort : { total : -1} } -] -).result.forEach( function (myDoc) { - //print(myDoc) - total += myDoc.total; - if (!languages[myDoc._id]) - languages[myDoc._id] = 0 - languages[myDoc._id] += myDoc.total -}) +]); + +while (cursor.hasNext()) { + var myDoc = cursor.next(); + total += myDoc.total; + if (!languages[myDoc._id]) + languages[myDoc._id] = 0 + languages[myDoc._id] += myDoc.total +} print("Total sessions with code languages", total); for (key in languages) { - print(key, languages[key], languages[key] / total * 100); + print(key + "\t" + languages[key] + "\t" + (languages[key] / total * 100).toFixed(2) + "%"); } \ No newline at end of file diff --git a/scripts/analytics/mongodb/queries/SignupsPerDay.js b/scripts/analytics/mongodb/queries/SignupsPerDay.js index 1af5aca15..5f65b1c9a 100644 --- a/scripts/analytics/mongodb/queries/SignupsPerDay.js +++ b/scripts/analytics/mongodb/queries/SignupsPerDay.js @@ -1,5 +1,10 @@ // Print out signup conversions per day +// Usage: +// mongo <address>:<port>/<database> <script file> -u <username> -p <password> + +// TODO: Dec 18th has more signups finished than started. Too good to be true. + var match={ "$match" : { $or: [ {"event" : 'Started Signup'}, {"event" : 'Finished Signup'}] @@ -73,13 +78,14 @@ var group={"$group" : { }; var conversionsPerDay = {}; var sort = {$sort: { "_id.d" : -1}}; -db['analytics.log.events'].aggregate(match, proj1, proj2, group, sort).result.forEach( function (myDoc) { -// print({day: myDoc._id.d, amount: myDoc._id.m, count: myDoc.count}) - if (!conversionsPerDay[myDoc._id.d]) - conversionsPerDay[myDoc._id.d] = {} - conversionsPerDay[myDoc._id.d][myDoc._id.m] = myDoc.count; -}) +var cursor = db['analytics.log.events'].aggregate(match, proj1, proj2, group, sort); + +while (cursor.hasNext()) { + var myDoc = cursor.next(); + var key = myDoc._id.d.toDateString() + if (!conversionsPerDay[key]) conversionsPerDay[key] = {} + conversionsPerDay[key][myDoc._id.m] = myDoc.count; +} for (key in conversionsPerDay) { - print(key, conversionsPerDay[key]['Started Signup'], conversionsPerDay[key]['Finished Signup'], conversionsPerDay[key]['Finished Signup'] / conversionsPerDay[key]['Started Signup']); -// print("Signup Conversion:", (finished / started * 100), "%"); + print(key + "\t" + conversionsPerDay[key]['Started Signup'] + "\t" + conversionsPerDay[key]['Finished Signup'] + "\t" + (conversionsPerDay[key]['Finished Signup'] / conversionsPerDay[key]['Started Signup'] * 100).toFixed(2) + "%"); } \ No newline at end of file diff --git a/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js b/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js index b8425a765..203a722f2 100644 --- a/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js +++ b/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js @@ -652,7 +652,7 @@ function printHelpClicksPostHaunted() { initVideoEventCounts(); printVideoCompletionRates(); -// printWatchedAnotherVideoRates(); -// printLevelCompletionRates(); -// printSubConversionTotals(); +printWatchedAnotherVideoRates(); +printLevelCompletionRates(); +printSubConversionTotals(); printHelpClicksPostHaunted(); diff --git a/scripts/analytics/mongodb/queries/levelRates.js b/scripts/analytics/mongodb/queries/levelRates.js new file mode 100644 index 000000000..e9ff387d2 --- /dev/null +++ b/scripts/analytics/mongodb/queries/levelRates.js @@ -0,0 +1,175 @@ +// Print out level completion rates + +// Usage: +// mongo <address>:<port>/<database> <script file> -u <username> -p <password> + +// Bucketize start/finish events into days, then bucketize into levels? + +// TODO: Why do a small number of 'Started level' not have properties.levelID set? + +// TODO: spot check the data: NaN, only some 0.0 dates, etc. +// TODO: exclude levels with no interesting data? + +// TODO: average playtime? + +var today = new Date(); +today = today.toISOString().substr(0, 10); +print("Today is " + today); + +var todayMinus6 = new Date(); +todayMinus6.setDate(todayMinus6.getUTCDate() - 6); +var startDate = todayMinus6.toISOString().substr(0, 10) + "T00:00:00.000Z"; +print("Start date is " + startDate) + +var match={ + "$match" : { + $and: [ + {"created": { $gte: ISODate(startDate)}}, + {$or: [ {"properties.level": {$exists: true}}, {"properties.levelID": {$exists: true}}]}, + {$or: [ {"event" : 'Started Level'}, {"event" : 'Saw Victory'}]} + ] + } +}; + +// TODO: project level to level slug + +var proj0 = {"$project": { + "_id" : 0, + "event" : 1, + "level" : { $ifNull : ["$properties.level", "$properties.levelID"]}, + "created": { "$concat": [{"$substr" : ["$created", 0, 4]}, "-", {"$substr" : ["$created", 5, 2]}, "-", {"$substr" : ["$created", 8, 2]}]} +}}; + +// var proj0={"$project" : { +// "_id" : 0, +// "event" : 1, +// "created" : 1, +// "level" : "$properties.level", +// "h" : {"$hour" : "$created"}, +// "m" : {"$minute" : "$created"}, +// "s" : {"$second" : "$created"}, +// "ml" : {"$millisecond" : "$created"} +// }}; + +// var proj1={"$project" : { +// "_id" : 0, +// "created" : 1, +// "event" : 1, +// "level" : 1, +// "h" : {"$hour" : "$created"}, +// "m" : {"$minute" : "$created"}, +// "s" : {"$second" : "$created"}, +// "ml" : {"$millisecond" : "$created"} +// }}; +// +// var proj2={"$project" : { +// "_id" : 0, +// "event" : 1, +// "level" : 1, +// "created" : { +// "$subtract" : [ +// "$created", +// {"$add" : ["$ml",{"$multiply" : ["$s", 1000]}, {"$multiply" : ["$m",60,1000]}, {"$multiply" : ["$h",60,60,1000]}]} +// ]} +// }}; + +var group={"$group" : { + "_id" : { + "event" : "$event", + "created" : "$created", + "level": "$level" + }, + "count" : { + "$sum" : 1 + } +}}; + +// TODO: sort by level, date, +// var sort = {$sort: { "_id.level" : 1, "_id.created" : -1}}; +//var cursor = db['analytics.log.events'].aggregate(match, proj0, proj1, proj2, group, sort); +// var cursor = db['analytics.log.events'].aggregate(match, proj0, group, sort); +var cursor = db['analytics.log.events'].aggregate(match, proj0, group); + +// <level><date><data> +var levelData = {}; +var longestLevelName = -1; +while (cursor.hasNext()) { + var doc = cursor.next(); + var created = doc._id.created; + var event = doc._id.event; + var level = doc._id.level; + + if (event === 'Saw Victory') level = level.toLowerCase().replace(/ /g, '-'); + if (level.length > longestLevelName) longestLevelName = level.length; + if (!levelData[level]) levelData[level] = {}; + if (!levelData[level][created]) levelData[level][created] = {}; + if (event === 'Started Level') levelData[level][created]['started'] = doc.count; + else levelData[level][created]['finished'] = doc.count; +} +longestLevelName += 2; + +var levelRates = []; +var dates = []; +for (level in levelData) { + var dateData = []; + var dateIndex = 0; + for (created in levelData[level]) { + dateData.push({ + level: level, + created: created, + started: levelData[level][created]['started'] ? levelData[level][created]['started'] : 0, + finished: levelData[level][created]['finished'] ? levelData[level][created]['finished'] : 0 + }); + if (dates.length === dateIndex) dates.push(created.substring(5)); + dateIndex++; + } + levelRates.push(dateData); +} +// printjson(levelRates); + +levelRates.sort(function(a,b) {return a[0].level < b[0].level ? -1 : 1}); +for (levelKey in levelRates) levelRates[levelKey].sort(function(a,b) {return a.created < b.created ? 1 : -1}); + +// Print out all data +for (levelKey in levelRates) { + for (dateKey in levelRates[levelKey]) { + var created = levelRates[levelKey][dateKey].created; + var level = levelRates[levelKey][dateKey].level; + var started = levelRates[levelKey][dateKey].started; + var finished = levelRates[levelKey][dateKey].finished; + var rate = finished / started; + var levelSpacer = new Array(longestLevelName - level.length).join(' '); + print(level + levelSpacer + created + "\t" + started + "\t" + finished + "\t" + (finished / started * 100).toFixed(2) + "%"); + // print(levelRates[key].level + "\t" + started + "\t" + finished + "\t" + (levelRates[key].rate * 100).toFixed(2) + "%"); + } +} + +// Print out a nice grid of levels with 7 days of data +print(new Array(longestLevelName).join(' ') + dates.join('\t')); +for (levelKey in levelRates) { + var hasStarted = false; + for (dateKey in levelRates[levelKey]) { + if (levelRates[levelKey][dateKey].started > 0) { + hasStarted = true; + break; + } + } + if (!hasStarted) continue; + + if (levelRates[levelKey].length < 7) continue; + + var level = levelRates[levelKey][0].level; + var levelSpacer = new Array(longestLevelName - level.length).join(' '); + var msg = level + levelSpacer; + + for (dateKey in levelRates[levelKey]) { + var created = levelRates[levelKey][dateKey].created; + var started = levelRates[levelKey][dateKey].started; + var finished = levelRates[levelKey][dateKey].finished; + var rate = finished / started; + msg += (finished / started * 100).toFixed(2) + "\t"; + // print(level + levelSpacer + started + "\t" + finished + "\t" + (finished / started * 100).toFixed(2) + "%"); + // print(levelRates[key].level + "\t" + started + "\t" + finished + "\t" + (levelRates[key].rate * 100).toFixed(2) + "%"); + } + print(msg); +}