From b33e3a93bad89a89c8f59bbe0694417dbae4378d Mon Sep 17 00:00:00 2001
From: Matt Lott <mattlott@live.com>
Date: Wed, 24 Dec 2014 11:09:54 -0800
Subject: [PATCH] Level completion rates script

---
 scripts/analytics/mixpanelABSubscribeCopy.py  |   2 +-
 .../mongodb/queries/CodeLanguageUsage.js      |  25 ++-
 .../mongodb/queries/SignupsPerDay.js          |  22 ++-
 .../mongodb/queries/helpVideoStylesABTest.js  |   6 +-
 .../analytics/mongodb/queries/levelRates.js   | 175 ++++++++++++++++++
 5 files changed, 208 insertions(+), 22 deletions(-)
 create mode 100644 scripts/analytics/mongodb/queries/levelRates.js

diff --git a/scripts/analytics/mixpanelABSubscribeCopy.py b/scripts/analytics/mixpanelABSubscribeCopy.py
index d6ca6801c..16f406ed9 100644
--- a/scripts/analytics/mixpanelABSubscribeCopy.py
+++ b/scripts/analytics/mixpanelABSubscribeCopy.py
@@ -23,7 +23,7 @@ if __name__ == '__main__':
         )
         
         startDate = '2014-12-14'
-        endDate = '2014-12-15'
+        endDate = '2014-12-21'
         print("Requesting data for {0} to {1}".format(startDate, endDate))
         data = api.request(['export'], {
             'event' : ['Show subscription modal', 'Started subscription purchase', 'Finished subscription purchase'],
diff --git a/scripts/analytics/mongodb/queries/CodeLanguageUsage.js b/scripts/analytics/mongodb/queries/CodeLanguageUsage.js
index e7ec2fc12..c44e59d4c 100644
--- a/scripts/analytics/mongodb/queries/CodeLanguageUsage.js
+++ b/scripts/analytics/mongodb/queries/CodeLanguageUsage.js
@@ -1,8 +1,12 @@
 // Print out code language usage based on level session data
+
+// Usage:
+// mongo <address>:<port>/<database> <script file> -u <username> -p <password>
+
 var total = 0;
 var languages = {};
 var startDate = new ISODate("2014-12-01T00:00:00.000Z")
-db['level.sessions'].aggregate(
+var cursor = db['level.sessions'].aggregate(
 [ 
     { $match : {
         //$and: [{codeLanguage: {$exists: true}}, {created : { $gte: startDate}}]
@@ -16,15 +20,16 @@ db['level.sessions'].aggregate(
         }
     },
     {  $sort : { total : -1} }
-]
-).result.forEach( function (myDoc) { 
-    //print(myDoc)
-    total += myDoc.total;
-    if (!languages[myDoc._id])
-        languages[myDoc._id] = 0
-    languages[myDoc._id] += myDoc.total
-})
+]);
+
+while (cursor.hasNext()) {
+  var myDoc = cursor.next();
+  total += myDoc.total;
+  if (!languages[myDoc._id])
+      languages[myDoc._id] = 0
+  languages[myDoc._id] += myDoc.total
+}
 print("Total sessions with code languages", total);
 for (key in languages) {
-    print(key, languages[key], languages[key] / total * 100);
+    print(key + "\t" + languages[key] + "\t" + (languages[key] / total * 100).toFixed(2) + "%");
 }
\ No newline at end of file
diff --git a/scripts/analytics/mongodb/queries/SignupsPerDay.js b/scripts/analytics/mongodb/queries/SignupsPerDay.js
index 1af5aca15..5f65b1c9a 100644
--- a/scripts/analytics/mongodb/queries/SignupsPerDay.js
+++ b/scripts/analytics/mongodb/queries/SignupsPerDay.js
@@ -1,5 +1,10 @@
 // Print out signup conversions per day
 
+// Usage:
+// mongo <address>:<port>/<database> <script file> -u <username> -p <password>
+
+// TODO: Dec 18th has more signups finished than started. Too good to be true.
+
 var match={
     "$match" : {
         $or: [ {"event" : 'Started Signup'}, {"event" : 'Finished Signup'}]
@@ -73,13 +78,14 @@ var group={"$group" : {
 };
 var conversionsPerDay = {};
 var sort = {$sort: { "_id.d" : -1}};
-db['analytics.log.events'].aggregate(match, proj1, proj2, group, sort).result.forEach( function (myDoc) { 
-//    print({day: myDoc._id.d, amount: myDoc._id.m, count: myDoc.count}) 
-    if (!conversionsPerDay[myDoc._id.d])
-        conversionsPerDay[myDoc._id.d] = {}
-    conversionsPerDay[myDoc._id.d][myDoc._id.m] = myDoc.count;
-})
+var cursor = db['analytics.log.events'].aggregate(match, proj1, proj2, group, sort);
+
+while (cursor.hasNext()) {
+  var myDoc = cursor.next();
+  var key = myDoc._id.d.toDateString()
+  if (!conversionsPerDay[key]) conversionsPerDay[key] = {}
+  conversionsPerDay[key][myDoc._id.m] = myDoc.count;
+}
 for (key in conversionsPerDay) {
-    print(key, conversionsPerDay[key]['Started Signup'], conversionsPerDay[key]['Finished Signup'], conversionsPerDay[key]['Finished Signup'] / conversionsPerDay[key]['Started Signup']);
-//    print("Signup Conversion:", (finished / started * 100), "%");
+    print(key + "\t" + conversionsPerDay[key]['Started Signup'] + "\t" + conversionsPerDay[key]['Finished Signup'] + "\t" + (conversionsPerDay[key]['Finished Signup'] / conversionsPerDay[key]['Started Signup'] * 100).toFixed(2) + "%");
 }
\ No newline at end of file
diff --git a/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js b/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js
index b8425a765..203a722f2 100644
--- a/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js
+++ b/scripts/analytics/mongodb/queries/helpVideoStylesABTest.js
@@ -652,7 +652,7 @@ function printHelpClicksPostHaunted() {
 initVideoEventCounts();
 printVideoCompletionRates();
 
-// printWatchedAnotherVideoRates();
-// printLevelCompletionRates();
-// printSubConversionTotals();
+printWatchedAnotherVideoRates();
+printLevelCompletionRates();
+printSubConversionTotals();
 printHelpClicksPostHaunted();
diff --git a/scripts/analytics/mongodb/queries/levelRates.js b/scripts/analytics/mongodb/queries/levelRates.js
new file mode 100644
index 000000000..e9ff387d2
--- /dev/null
+++ b/scripts/analytics/mongodb/queries/levelRates.js
@@ -0,0 +1,175 @@
+// Print out level completion rates
+
+// Usage:
+// mongo <address>:<port>/<database> <script file> -u <username> -p <password>
+
+// Bucketize start/finish events into days, then bucketize into levels?
+
+// TODO: Why do a small number of 'Started level' not have properties.levelID set?
+
+// TODO: spot check the data: NaN, only some 0.0 dates, etc.
+// TODO: exclude levels with no interesting data?
+
+// TODO: average playtime?
+
+var today = new Date();
+today = today.toISOString().substr(0, 10);
+print("Today is " + today);
+
+var todayMinus6 = new Date();
+todayMinus6.setDate(todayMinus6.getUTCDate() - 6);
+var startDate = todayMinus6.toISOString().substr(0, 10) + "T00:00:00.000Z";
+print("Start date is " + startDate)
+
+var match={
+  "$match" : {
+    $and: [
+    {"created": { $gte: ISODate(startDate)}},
+    {$or: [ {"properties.level": {$exists: true}}, {"properties.levelID": {$exists: true}}]},
+    {$or: [ {"event" : 'Started Level'}, {"event" : 'Saw Victory'}]}
+    ]
+  }
+};
+
+// TODO: project level to level slug
+
+var proj0 = {"$project": {
+  "_id" : 0,
+  "event" : 1,
+  "level" : { $ifNull : ["$properties.level", "$properties.levelID"]},
+  "created": { "$concat": [{"$substr" :  ["$created", 0, 4]}, "-", {"$substr" :  ["$created", 5, 2]}, "-", {"$substr" :  ["$created", 8, 2]}]}
+}};
+
+// var proj0={"$project" : {
+//   "_id" : 0,
+//   "event" : 1,
+//   "created" : 1,
+//   "level" : "$properties.level",
+//   "h" : {"$hour" : "$created"},
+//   "m" : {"$minute" : "$created"},
+//   "s" : {"$second" : "$created"},
+//   "ml" : {"$millisecond" : "$created"}
+// }};
+
+// var proj1={"$project" : {
+//   "_id" : 0,
+//   "created" : 1,
+//   "event" : 1,
+//   "level" : 1,
+//   "h" : {"$hour" : "$created"},
+//   "m" : {"$minute" : "$created"},
+//   "s" : {"$second" : "$created"},
+//   "ml" : {"$millisecond" : "$created"}
+// }};
+// 
+// var proj2={"$project" : {
+//   "_id" : 0,
+//   "event" : 1,
+//   "level" : 1,
+//   "created" : {
+//     "$subtract" : [
+//     "$created",
+//     {"$add" : ["$ml",{"$multiply" : ["$s", 1000]}, {"$multiply" : ["$m",60,1000]}, {"$multiply" : ["$h",60,60,1000]}]}
+//     ]}
+// }};
+
+var group={"$group" : {
+  "_id" : {
+    "event" : "$event",
+    "created" : "$created",
+    "level": "$level"
+  },
+  "count" : {
+    "$sum" : 1
+  }
+}};
+
+// TODO: sort by level, date, 
+// var sort = {$sort: { "_id.level" : 1, "_id.created" : -1}};
+//var cursor = db['analytics.log.events'].aggregate(match, proj0, proj1, proj2, group, sort);
+// var cursor = db['analytics.log.events'].aggregate(match, proj0, group, sort);
+var cursor = db['analytics.log.events'].aggregate(match, proj0, group);
+
+// <level><date><data>
+var levelData = {};
+var longestLevelName = -1;
+while (cursor.hasNext()) {
+  var doc = cursor.next();
+  var created = doc._id.created;
+  var event = doc._id.event;
+  var level = doc._id.level;
+  
+  if (event === 'Saw Victory') level = level.toLowerCase().replace(/ /g, '-');
+  if (level.length > longestLevelName) longestLevelName = level.length;
+  if (!levelData[level]) levelData[level] = {};
+  if (!levelData[level][created]) levelData[level][created] = {};
+  if (event === 'Started Level') levelData[level][created]['started'] = doc.count;
+  else levelData[level][created]['finished'] = doc.count;
+}
+longestLevelName += 2;
+
+var levelRates = [];
+var dates = [];
+for (level in levelData) {
+  var dateData = [];
+  var dateIndex = 0;
+  for (created in levelData[level]) {
+    dateData.push({
+      level: level,
+      created: created,
+      started: levelData[level][created]['started'] ? levelData[level][created]['started'] : 0,
+      finished: levelData[level][created]['finished'] ? levelData[level][created]['finished'] : 0
+    });
+    if (dates.length === dateIndex) dates.push(created.substring(5));
+    dateIndex++;
+  }
+  levelRates.push(dateData);
+}
+// printjson(levelRates);
+
+levelRates.sort(function(a,b) {return a[0].level < b[0].level ? -1 : 1});
+for (levelKey in levelRates) levelRates[levelKey].sort(function(a,b) {return a.created < b.created ? 1 : -1});
+
+// Print out all data
+for (levelKey in levelRates) {
+  for (dateKey in levelRates[levelKey]) {
+    var created = levelRates[levelKey][dateKey].created;
+    var level = levelRates[levelKey][dateKey].level;
+    var started = levelRates[levelKey][dateKey].started;
+    var finished = levelRates[levelKey][dateKey].finished;
+    var rate = finished / started;
+    var levelSpacer = new Array(longestLevelName - level.length).join(' ');
+    print(level + levelSpacer + created + "\t" + started + "\t" + finished + "\t" + (finished / started * 100).toFixed(2) + "%");
+    // print(levelRates[key].level + "\t" + started + "\t" + finished + "\t" + (levelRates[key].rate * 100).toFixed(2) + "%");
+  }
+}
+
+// Print out a nice grid of levels with 7 days of data
+print(new Array(longestLevelName).join(' ') + dates.join('\t'));
+for (levelKey in levelRates) {
+  var hasStarted = false;
+  for (dateKey in levelRates[levelKey]) {
+    if (levelRates[levelKey][dateKey].started > 0) {
+      hasStarted = true;
+      break;
+    }
+  }
+  if (!hasStarted) continue;
+
+  if (levelRates[levelKey].length < 7) continue;
+  
+  var level = levelRates[levelKey][0].level;
+  var levelSpacer = new Array(longestLevelName - level.length).join(' ');
+  var msg = level + levelSpacer;
+  
+  for (dateKey in levelRates[levelKey]) {
+    var created = levelRates[levelKey][dateKey].created;
+    var started = levelRates[levelKey][dateKey].started;
+    var finished = levelRates[levelKey][dateKey].finished;
+    var rate = finished / started;
+    msg += (finished / started * 100).toFixed(2) + "\t";
+    // print(level + levelSpacer + started + "\t" + finished + "\t" + (finished / started * 100).toFixed(2) + "%");
+    // print(levelRates[key].level + "\t" + started + "\t" + finished + "\t" + (levelRates[key].rate * 100).toFixed(2) + "%");
+  }
+  print(msg);
+}