mirror of
https://github.com/codeninjasllc/codecombat.git
synced 2025-03-14 07:00:01 -04:00
Analytics aggregation script perf
Speeding up db finds. Reducing memory usage via string caches.
This commit is contained in:
parent
352e8283a7
commit
c721d863f8
2 changed files with 152 additions and 76 deletions
|
@ -6,75 +6,112 @@
|
|||
try {
|
||||
logDB = new Mongo("localhost").getDB("analytics")
|
||||
var scriptStartTime = new Date();
|
||||
|
||||
var StringCache = function() {
|
||||
this.lookup = {};
|
||||
this.strings = [];
|
||||
}
|
||||
StringCache.prototype.get = function(index) {
|
||||
return this.strings[parseInt(index)];
|
||||
}
|
||||
StringCache.prototype.set = function(str) {
|
||||
if (!this.lookup.hasOwnProperty(str)) {
|
||||
this.lookup[str] = this.strings.length;
|
||||
this.strings.push(str);
|
||||
}
|
||||
return this.lookup[str];
|
||||
}
|
||||
|
||||
var dayCache = new StringCache();
|
||||
var eventCache = new StringCache();
|
||||
var levelCache = new StringCache();
|
||||
var userCache = new StringCache();
|
||||
|
||||
// TODO: convert to StringCache?
|
||||
var analyticsStringCache = {};
|
||||
|
||||
// This needs to be enough days to encompass the start and finish events for most levels
|
||||
var numDays = 20;
|
||||
|
||||
var startDay = new Date();
|
||||
today = startDay.toISOString().substr(0, 10);
|
||||
startDay.setUTCDate(startDay.getUTCDate() - numDays);
|
||||
startDay = startDay.toISOString().substr(0, 10);
|
||||
|
||||
var levelCompletionFunnel = ['Started Level', 'Saw Victory'];
|
||||
|
||||
var today = new Date().toISOString().substr(0, 10);
|
||||
log("Today is " + today);
|
||||
log("Start day is " + startDay);
|
||||
log("numDays " + numDays);
|
||||
|
||||
var campaignLevelSlugs = getCampaignLevelSlugs();
|
||||
|
||||
log("Getting level drop counts...");
|
||||
var levelDropCounts = getLevelDropCounts(startDay, levelCompletionFunnel);
|
||||
var levelDropCounts = getLevelDropCounts(numDays, levelCompletionFunnel, campaignLevelSlugs);
|
||||
log("Inserting level drop counts...");
|
||||
for (level in levelDropCounts) {
|
||||
for (day in levelDropCounts[level]) {
|
||||
if (today === day) continue; // Never save data for today because it's incomplete
|
||||
insertLevelEventCount('User Dropped', level, day, levelDropCounts[level][day]);
|
||||
for (var level in levelDropCounts) {
|
||||
for (var day in levelDropCounts[level]) {
|
||||
if (today === dayCache.get(day)) continue; // Never save data for today because it's incomplete
|
||||
// print('User Dropped', levelCache.get(level), dayCache.get(day), levelDropCounts[level][day]);
|
||||
insertLevelEventCount(numDays, 'User Dropped', levelCache.get(level), dayCache.get(day), levelDropCounts[level][day]);
|
||||
}
|
||||
}
|
||||
|
||||
log("Script runtime: " + (new Date() - scriptStartTime));
|
||||
log("Script runtime: " + (new Date().getTime() - scriptStartTime.getTime()));
|
||||
}
|
||||
catch(err) {
|
||||
log("ERROR: " + err);
|
||||
printjson(err);
|
||||
}
|
||||
|
||||
function getLevelDropCounts(startDay, events) {
|
||||
function getLevelDropCounts(startDay, eventFunnel) {
|
||||
// How many unique users did one of these events last?
|
||||
// Return level/day breakdown
|
||||
|
||||
if (!startDay || !events || events.length === 0) return {};
|
||||
// Faster to request analytics db data in batches of days
|
||||
var dayIncrement = 3;
|
||||
var startDate = new Date();
|
||||
startDate.setUTCDate(startDate.getUTCDate() - numDays);
|
||||
var startDay = startDate.toISOString().substr(0, 10);
|
||||
var endDate = new Date();
|
||||
endDate.setUTCDate(endDate.getUTCDate() - numDays + dayIncrement);
|
||||
var endDay = endDate.toISOString().substr(0, 10);
|
||||
|
||||
var startObj = objectIdWithTimestamp(ISODate(startDay + "T00:00:00.000Z"));
|
||||
var queryParams = {$and: [{_id: {$gte: startObj}},{"event": {$in: events}}]};
|
||||
var cursor = logDB['log'].find(queryParams);
|
||||
log("Start day is " + startDay);
|
||||
|
||||
var userProgression = {};
|
||||
while (cursor.hasNext()) {
|
||||
var doc = cursor.next();
|
||||
var created = doc._id.getTimestamp().toISOString();
|
||||
var event = doc.event;
|
||||
var properties = doc.properties;
|
||||
var user = doc.user;
|
||||
var level;
|
||||
while (startDay < today) {
|
||||
// log(startDay + " " + endDay);
|
||||
var startObj = objectIdWithTimestamp(ISODate(startDay + "T00:00:00.000Z"));
|
||||
var endObj = objectIdWithTimestamp(ISODate(endDay + "T00:00:00.000Z"));
|
||||
|
||||
// TODO: Switch to properties.levelID for 'Saw Victory'
|
||||
if (event === 'Saw Victory' && properties.level) level = slugify(properties.level);
|
||||
else if (properties.levelID) level = properties.levelID
|
||||
else continue
|
||||
for (var i = 0; i < eventFunnel.length; i++) {
|
||||
var queryParams = {$and: [{_id: {$gte: startObj}}, {_id: {$lt: endObj}}, {"event": eventFunnel[i]}, {'properties.levelID': {$in: campaignLevelSlugs}}]};
|
||||
var selectParams = {event: 1, 'properties.levelID': 1, user: 1};
|
||||
var cursor = logDB['log'].find(queryParams);
|
||||
while (cursor.hasNext()) {
|
||||
var doc = cursor.next();
|
||||
if (!doc.properties || !doc.properties.levelID) continue;
|
||||
|
||||
if (!userProgression[user]) userProgression[user] = [];
|
||||
userProgression[user].push({
|
||||
created: created,
|
||||
event: event,
|
||||
level: level
|
||||
});
|
||||
var created = doc._id.getTimestamp().toISOString();
|
||||
var event = eventCache.set(doc.event);
|
||||
var level = levelCache.set(doc.properties.levelID);
|
||||
var user = userCache.set(doc.user);
|
||||
|
||||
if (!userProgression[user]) userProgression[user] = [];
|
||||
userProgression[user].push({
|
||||
created: created,
|
||||
event: event,
|
||||
level: level
|
||||
});
|
||||
}
|
||||
}
|
||||
startDate.setUTCDate(startDate.getUTCDate() + dayIncrement);
|
||||
startDay = startDate.toISOString().substr(0, 10);
|
||||
endDate.setUTCDate(endDate.getUTCDate() + dayIncrement);
|
||||
endDay = endDate.toISOString().substr(0, 10);
|
||||
}
|
||||
|
||||
var levelDropCounts = {};
|
||||
for (user in userProgression) {
|
||||
for (var user in userProgression) {
|
||||
userProgression[user].sort(function (a,b) {return a.created < b.created ? -1 : 1});
|
||||
var lastEvent = userProgression[user][userProgression[user].length - 1];
|
||||
var level = lastEvent.level;
|
||||
var day = lastEvent.created.substring(0, 10);
|
||||
var day = dayCache.set(lastEvent.created.substring(0, 10));
|
||||
if (!levelDropCounts[level]) levelDropCounts[level] = {};
|
||||
if (!levelDropCounts[level][day]) levelDropCounts[level][day] = 0
|
||||
levelDropCounts[level][day]++;
|
||||
|
@ -152,7 +189,11 @@ function insertLevelEventCount(event, level, day, count) {
|
|||
var levelID = getAnalyticsString(level);
|
||||
var filterID = getAnalyticsString('all');
|
||||
|
||||
var startDate = new Date();
|
||||
startDate.setUTCDate(startDate.getUTCDate() - numDays);
|
||||
var startDay = startDate.toISOString().substr(0, 10);
|
||||
var startObj = objectIdWithTimestamp(new Date(startDay + "T00:00:00.000Z"));
|
||||
|
||||
var queryParams = {$and: [{d: day}, {e: eventID}, {l: levelID}, {f: filterID}]};
|
||||
var doc = db['analytics.perdays'].findOne(queryParams);
|
||||
if (doc && doc.c === count) return;
|
||||
|
@ -179,3 +220,15 @@ function insertLevelEventCount(event, level, day, count) {
|
|||
// }
|
||||
}
|
||||
}
|
||||
|
||||
function getCampaignLevelSlugs() {
|
||||
var campaignLevelSlugMap = {};
|
||||
var cursor = db.campaigns.find({}, {levels: 1});
|
||||
while (cursor.hasNext()) {
|
||||
var doc = cursor.next();
|
||||
for (var levelID in doc.levels) {
|
||||
campaignLevelSlugMap[doc.levels[levelID].slug] = true;
|
||||
}
|
||||
}
|
||||
return Object.keys(campaignLevelSlugMap);
|
||||
}
|
||||
|
|
|
@ -11,15 +11,35 @@
|
|||
try {
|
||||
logDB = new Mongo("localhost").getDB("analytics")
|
||||
var scriptStartTime = new Date();
|
||||
|
||||
var StringCache = function() {
|
||||
this.lookup = {};
|
||||
this.strings = [];
|
||||
}
|
||||
StringCache.prototype.get = function(index) {
|
||||
return this.strings[parseInt(index)];
|
||||
}
|
||||
StringCache.prototype.set = function(str) {
|
||||
if (!this.hasOwnProperty(str)) {
|
||||
this.lookup[str] = this.strings.length;
|
||||
this.strings.push(str);
|
||||
}
|
||||
return this.lookup[str];
|
||||
}
|
||||
|
||||
var dayCache = new StringCache();
|
||||
var eventCache = new StringCache();
|
||||
var levelCache = new StringCache();
|
||||
var userCache = new StringCache();
|
||||
|
||||
// TODO: convert to StringCache?
|
||||
var analyticsStringCache = {};
|
||||
|
||||
// This needs to be enough days to encompass the start and finish events for most levels
|
||||
var numDays = 20;
|
||||
|
||||
var today = new Date().toISOString().substr(0, 10);
|
||||
|
||||
var levelCompletionFunnel = ['Started Level', 'Saw Victory'];
|
||||
|
||||
var today = new Date().toISOString().substr(0, 10);
|
||||
log("Today is " + today);
|
||||
log("numDays " + numDays);
|
||||
|
||||
|
@ -28,36 +48,24 @@ try {
|
|||
log("Getting level completion data...");
|
||||
var levelCompletionData = getLevelFunnelData(numDays, levelCompletionFunnel, campaignLevelSlugs);
|
||||
log("Inserting aggregated level completion data...");
|
||||
for (level in levelCompletionData) {
|
||||
for (day in levelCompletionData[level]) {
|
||||
if (today === day) continue; // Never save data for today because it's incomplete
|
||||
for (event in levelCompletionData[level][day]) {
|
||||
insertLevelEventCount(event, level, day, levelCompletionData[level][day][event]);
|
||||
for (var level in levelCompletionData) {
|
||||
for (var day in levelCompletionData[level]) {
|
||||
if (today === dayCache.get(day)) continue; // Never save data for today because it's incomplete
|
||||
for (var event in levelCompletionData[level][day]) {
|
||||
// print(numDays, eventCache.get(event), levelCache.get(level), dayCache.get(day), levelCompletionData[level][day][event]);
|
||||
insertLevelEventCount(numDays, eventCache.get(event), levelCache.get(level), dayCache.get(day), levelCompletionData[level][day][event]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log("Script runtime: " + (new Date() - scriptStartTime));
|
||||
log("Script runtime: " + (new Date().getTime() - scriptStartTime.getTime()));
|
||||
}
|
||||
catch(err) {
|
||||
log("ERROR: " + err);
|
||||
printjson(err);
|
||||
}
|
||||
|
||||
function getCampaignLevelSlugs() {
|
||||
var campaignLevelSlugMap = {};
|
||||
var cursor = db.campaigns.find({}, {levels: 1});
|
||||
while (cursor.hasNext()) {
|
||||
var doc = cursor.next();
|
||||
for (var levelID in doc.levels) {
|
||||
campaignLevelSlugMap[doc.levels[levelID].slug] = true;
|
||||
}
|
||||
}
|
||||
return Object.keys(campaignLevelSlugMap);
|
||||
}
|
||||
|
||||
function getLevelFunnelData(numDays, eventFunnel, campaignLevelSlugs) {
|
||||
|
||||
// Faster to request analytics db data in batches of days
|
||||
var dayIncrement = 3;
|
||||
var startDate = new Date();
|
||||
|
@ -82,17 +90,18 @@ function getLevelFunnelData(numDays, eventFunnel, campaignLevelSlugs) {
|
|||
var cursor = logDB.log.find(queryParams, selectParams);
|
||||
while (cursor.hasNext()) {
|
||||
var doc = cursor.next();
|
||||
var created = doc._id.getTimestamp().toISOString();
|
||||
var day = created.substring(0, 10);
|
||||
var event = doc.event;
|
||||
var level = doc.properties ? doc.properties.levelID : null;
|
||||
var user = doc.user;
|
||||
if (!doc.properties || !doc.properties.levelID) continue;
|
||||
|
||||
if (!level) continue;
|
||||
var created = doc._id.getTimestamp().toISOString();
|
||||
var dayStr = created.substring(0, 10);
|
||||
var day = dayCache.set(dayStr);
|
||||
var event = eventCache.set(doc.event);
|
||||
var level = levelCache.set(doc.properties.levelID);
|
||||
var user = userCache.set(doc.user);
|
||||
|
||||
if (!userDataMap[level]) userDataMap[level] = {};
|
||||
if (!userDataMap[level][user]) userDataMap[level][user] = {};
|
||||
if (!userDataMap[level][user][event] || userDataMap[level][user][event].localeCompare(day) > 0) {
|
||||
if (!userDataMap[level][user][event] || dayCache.get(userDataMap[level][user][event]).localeCompare(dayStr) > 0) {
|
||||
userDataMap[level][user][event] = day;
|
||||
}
|
||||
}
|
||||
|
@ -103,21 +112,19 @@ function getLevelFunnelData(numDays, eventFunnel, campaignLevelSlugs) {
|
|||
endDay = endDate.toISOString().substr(0, 10);
|
||||
}
|
||||
|
||||
log("Finished getting events");
|
||||
|
||||
// Data: level, day, event
|
||||
var levelFunnelData = {};
|
||||
for (level in userDataMap) {
|
||||
for (user in userDataMap[level]) {
|
||||
for (var level in userDataMap) {
|
||||
for (var user in userDataMap[level]) {
|
||||
|
||||
// Find first event date
|
||||
var funnelStartDay = null;
|
||||
for (event in userDataMap[level][user]) {
|
||||
for (var event in userDataMap[level][user]) {
|
||||
var day = userDataMap[level][user][event];
|
||||
if (!levelFunnelData[level]) levelFunnelData[level] = {};
|
||||
if (!levelFunnelData[level][day]) levelFunnelData[level][day] = {};
|
||||
if (!levelFunnelData[level][day][event]) levelFunnelData[level][day][event] = 0;
|
||||
if (eventFunnel[0] === event) {
|
||||
if (eventFunnel[0] === eventCache.get(event)) {
|
||||
// First event gets attributed to current date
|
||||
levelFunnelData[level][day][event]++;
|
||||
funnelStartDay = day;
|
||||
|
@ -127,13 +134,13 @@ function getLevelFunnelData(numDays, eventFunnel, campaignLevelSlugs) {
|
|||
|
||||
if (funnelStartDay) {
|
||||
// Add remaining funnel steps/events to first step's date
|
||||
for (event in userDataMap[level][user]) {
|
||||
for (var event in userDataMap[level][user]) {
|
||||
if (!levelFunnelData[level][funnelStartDay][event]) levelFunnelData[level][funnelStartDay][event] = 0;
|
||||
if (eventFunnel[0] != event) levelFunnelData[level][funnelStartDay][event]++;
|
||||
if (eventFunnel[0] !== eventCache.get(event)) levelFunnelData[level][funnelStartDay][event]++;
|
||||
}
|
||||
// Zero remaining funnel events
|
||||
for (var i = 1; i < eventFunnel.length; i++) {
|
||||
var event = eventFunnel[i];
|
||||
var event = eventCache.set(eventFunnel[i]);
|
||||
if (!levelFunnelData[level][funnelStartDay][event]) levelFunnelData[level][funnelStartDay][event] = 0;
|
||||
}
|
||||
}
|
||||
|
@ -205,7 +212,7 @@ function getAnalyticsString(str) {
|
|||
throw new Error("ERROR: Did not find analytics.strings insert for: " + str);
|
||||
}
|
||||
|
||||
function insertLevelEventCount(event, level, day, count) {
|
||||
function insertLevelEventCount(numDays, event, level, day, count) {
|
||||
// analytics.perdays schema in server/analytics/AnalyticsPeryDay.coffee
|
||||
day = day.replace(/-/g, '');
|
||||
|
||||
|
@ -213,7 +220,11 @@ function insertLevelEventCount(event, level, day, count) {
|
|||
var levelID = getAnalyticsString(level);
|
||||
var filterID = getAnalyticsString('all');
|
||||
|
||||
var startDate = new Date();
|
||||
startDate.setUTCDate(startDate.getUTCDate() - numDays);
|
||||
var startDay = startDate.toISOString().substr(0, 10);
|
||||
var startObj = objectIdWithTimestamp(new Date(startDay + "T00:00:00.000Z"));
|
||||
|
||||
var queryParams = {$and: [{d: day}, {e: eventID}, {l: levelID}, {f: filterID}]};
|
||||
var doc = db['analytics.perdays'].findOne(queryParams);
|
||||
if (doc && doc.c === count) return;
|
||||
|
@ -240,3 +251,15 @@ function insertLevelEventCount(event, level, day, count) {
|
|||
// }
|
||||
}
|
||||
}
|
||||
|
||||
function getCampaignLevelSlugs() {
|
||||
var campaignLevelSlugMap = {};
|
||||
var cursor = db.campaigns.find({}, {levels: 1});
|
||||
while (cursor.hasNext()) {
|
||||
var doc = cursor.next();
|
||||
for (var levelID in doc.levels) {
|
||||
campaignLevelSlugMap[doc.levels[levelID].slug] = true;
|
||||
}
|
||||
}
|
||||
return Object.keys(campaignLevelSlugMap);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue