Merge branch 'master' into production

This commit is contained in:
Matt Lott 2015-12-08 14:24:56 -08:00
commit 238cccd00a
5 changed files with 312 additions and 12 deletions

Binary file not shown.

View file

@ -264,6 +264,13 @@ module.exports = class SpellView extends CocoView
if me.level() < 20 or aceConfig.indentGuides
# Add visual ident guides
language = @spell.language
ensureLineStartsBlock = (line) ->
return false unless language is "python"
match = /^\s*([^#]+)/.exec(line)
return false if not match?
return /:\s*$/.test(match[1])
@aceSession.addDynamicMarker
update: (html, markerLayer, session, config) =>
Range = ace.require('ace/range').Range
@ -287,6 +294,8 @@ module.exports = class SpellView extends CocoView
guess = startOfRow(row)
docRange = new Range(row,guess,row,guess+4)
continue unless ensureLineStartsBlock(lines[row])
if /^\s+$/.test lines[docRange.end.row+1]
docRange.end.row += 1

View file

@ -0,0 +1,91 @@
// Latest class owners (teachers)
// Course instance owners assumed to be teachers unless hourOfCode=1
// Usage:
// mongo <address>:<port>/<database> <script file> -u <username> -p <password>
var startDay = '2015-10-01';
var endDay = '2016-10-01';
print('Date range:', startDay, endDay);
var userIDs = getClassOwners(startDay, endDay);
print('Class owners found:', userIDs.length);
var userEmails = getUserEmails(userIDs);
print('User emails found:', userEmails.length);
for (var i = 0; i < userEmails.length; i++) {
print(userEmails[i]);
}
function objectIdWithTimestamp(timestamp) {
// Convert string date to Date object (otherwise assume timestamp is a date)
if (typeof(timestamp) == 'string') timestamp = new Date(timestamp);
// Convert date object to hex seconds since Unix epoch
var hexSeconds = Math.floor(timestamp/1000).toString(16);
// Create an ObjectId with that hex timestamp
var constructedObjectId = ObjectId(hexSeconds + "0000000000000000");
return constructedObjectId
}
function getClassOwners(startDay, endDay) {
var userIDs = [];
var startObj = objectIdWithTimestamp(ISODate(startDay + "T00:00:00.000Z"));
var endObj = objectIdWithTimestamp(ISODate(endDay + "T00:00:00.000Z"))
var cursor = db.classrooms.find(
{$and:
[
{_id: {$gte: startObj}},
{_id: {$lt: endObj}}
]
},
{ownerID: 1}
);
while (cursor.hasNext()) {
var myDoc = cursor.next();
if (myDoc.ownerID) {
userIDs.push(myDoc.ownerID);
}
else {
print('No classroom owner!');
printjson(myDoc);
break;
}
}
cursor = db.course.instances.find(
{$and:
[
{_id: {$gte: startObj}},
{_id: {$lt: endObj}}
]
},
{hourOfCode: 1, ownerID: 1}
);
while (cursor.hasNext()) {
var myDoc = cursor.next();
if (myDoc.hourOfCode) continue;
if (myDoc.ownerID) {
userIDs.push(myDoc.ownerID);
}
else {
print('No course.instance owner!');
printjson(myDoc);
break;
}
}
return userIDs;
}
function getUserEmails(userIDs) {
var cursor = db['users'].find({_id: {$in: userIDs}}, {emailLower: 1});
var userEmails = [];
while (cursor.hasNext()) {
var myDoc = cursor.next();
if (myDoc.emailLower) {
userEmails.push(myDoc.emailLower);
}
}
userEmails.sort()
return userEmails;
}

View file

@ -0,0 +1,83 @@
// Latest teacher trial requests
// Usage:
// mongo <address>:<port>/<database> <script file> -u <username> -p <password>
var startDay = '2015-10-01';
var endDay = '2016-10-01';
print('Date range:', startDay, endDay);
var userIDs = getTrialRequestApplicants(startDay, endDay);
print('Trial request applicants found:', userIDs.length);
var userEmails = getUserEmails(userIDs);
print('User emails found:', userEmails.length);
for (var i = 0; i < userEmails.length; i++) {
print(userEmails[i]);
}
function objectIdWithTimestamp(timestamp) {
// Convert string date to Date object (otherwise assume timestamp is a date)
if (typeof(timestamp) == 'string') timestamp = new Date(timestamp);
// Convert date object to hex seconds since Unix epoch
var hexSeconds = Math.floor(timestamp/1000).toString(16);
// Create an ObjectId with that hex timestamp
var constructedObjectId = ObjectId(hexSeconds + "0000000000000000");
return constructedObjectId
}
function getTrialRequestApplicants(startDay, endDay) {
var startObj = objectIdWithTimestamp(ISODate(startDay + "T00:00:00.000Z"));
var endObj = objectIdWithTimestamp(ISODate(endDay + "T00:00:00.000Z"))
var cursor = db['trial.requests'].find(
{$and:
[
{_id: {$gte: startObj}},
{_id: {$lt: endObj}}
]
},
{applicant: 1}
);
var applicantIDs = [];
var orphanedTrialRequests = [];
while (cursor.hasNext()) {
var myDoc = cursor.next();
if (myDoc.applicant) {
applicantIDs.push(myDoc.applicant);
}
else {
orphanedTrialRequests.push(myDoc._id);
}
}
// May have orphaned trial requests due to previous external import of requests from Google form
if (orphanedTrialRequests.length > 0) {
cursor = db.prepaids.find({'properties.trialRequestID': {$in: orphanedTrialRequests}}, {creator: 1});
while (cursor.hasNext()) {
var myDoc = cursor.next();
if (myDoc.creator) {
applicantIDs.push(myDoc.creator);
}
else {
print('No creator!');
printjson(myDoc);
break;
}
}
}
return applicantIDs;
}
function getUserEmails(userIDs) {
var cursor = db['users'].find({_id: {$in: userIDs}}, {emailLower: 1});
var userEmails = [];
while (cursor.hasNext()) {
var myDoc = cursor.next();
if (myDoc.emailLower) {
userEmails.push(myDoc.emailLower);
}
}
userEmails.sort()
return userEmails;
}

View file

@ -17,14 +17,15 @@ database.connect()
UserHandler = require '../server/users/user_handler'
User = require '../server/users/User'
startDate = new Date 2015, 11, 1
#startDate = new Date 2015, 11, 1
startDate = new Date 2015, 11, 8 # Testing
query = dateCreated: {$gt: startDate}, emailLower: {$exists: true}
selection = 'name emailLower schoolName courseInstances clans ageRange dateCreated referrer points'
selection = 'name emailLower schoolName courseInstances clans ageRange dateCreated referrer points lastIP hourOfCode preferredLanguage lastLevel'
User.find(query).select(selection).lean().exec (err, users) ->
usersWithSchools = _.filter users, 'schoolName'
schoolNames = _.uniq (u.schoolName for u in usersWithSchools)
log.info "Found #{usersWithSchools.length} users of #{users.length} users registered after #{startDate} with schools like:\n#{schoolNames.slice(0, 10).join('\n')}"
log.info "Found #{usersWithSchools.length} users of #{users.length} users registered after #{startDate} with schools like:\n\t#{schoolNames.slice(0, 10).join('\n\t')}"
# For each user, come up with a confidence that their school is correct.
# For users with low confidence, look for similarities to other users with high confidence.
@ -42,22 +43,138 @@ User.find(query).select(selection).lean().exec (err, users) ->
# ... with the same lastIP that doesn't cover the lastIP of students from multiple schools.
# If we find a school-district-formatted email domain, we could try to match to other schoolNames in that domain, but I doubt that will be helpful until we have a lot of data and a lot of time to manually look things up.
# TODO: do all this work when we actually have a bunch of schoolNames in the system, or these heuristics won't be well-calibrated.
nextPrompt users
nextPrompt = (users) ->
nextPrompt = (users, question) ->
sortUsers users
return console.log('Done.') or process.exit() unless [userToSchool, suggestions] = findUserToSchool users
prompt "What should the school for #{JSON.stringify(userToSchool)} be?\nSuggestions: #{suggestions}\n", (answer) ->
question ?= formatSuggestions userToSchool, suggestions
prompt question, (answer) ->
return console.log('Bye.') or process.exit() if answer in ['q', 'quit']
console.log "You said #{answer}, so we should do something about that."
answer = answer.trim()
if answer is ''
users = _.without users, userToSchool
else unless _.isNaN(num = parseInt(answer, 10))
schoolName = if num then suggestions[num - 1]?.schoolName else userToSchool.schoolName
return finalizePrompt userToSchool, suggestions, schoolName, users
else if answer.length < 10
console.log "#{answer}? That's kind of short--I don't think school names and locations can be this short. What should it really be?"
return nextPrompt users, "> "
else
return finalizePrompt userToSchool, suggestions, answer, users
nextPrompt users
finalizePrompt = (userToSchool, suggestions, schoolName, users) ->
console.log "Selected schoolName: \"#{schoolName}\""
question = "Also apply this to other users? Ex.: 'all', '0 1 2 5', 'all -3 -4 -5', '0' to just do this one, or blank to retype school name.\n> "
prompt question, (answer) ->
answer = answer.trim()
if answer is ''
console.log "Should just do", userToSchool._id, userToSchool.emailLower, userToSchool.schoolName
targets = [userToSchool]
else if answer is 'all'
targets = [userToSchool].concat (s.user for s in suggestions)
console.log "Doing all #{targets.length} users..."
else if /^all/.test answer
targets = [userToSchool].concat (s.user for s in suggestions)
numbers = _.filter (parseInt(d, 10) for d in answer.split(/ *-/)), (n) -> not _.isNaN n
for number in numbers
skip = if number then suggestions[number - 1].user else userToSchool
targets = _.without targets, skip
console.log "Doing all #{targets.length} users without #{numbers}..."
else
numbers = _.filter (parseInt(d, 10) for d in answer.split(/ +/)), (n) -> not _.isNaN n
targets = ((if number then suggestions[number - 1].user else userToSchool) for number in numbers)
console.log "Doing #{targets.length} users for #{numbers}..."
#User.update {_id: {$in: (_.map targets, '_id')}}, {schoolName: schoolName}, {multi: true}, (err, result) ->
User.update {_id: {$in: []}}, {schoolName: schoolName}, {multi: true}, (err, result) ->
if err
console.error "Ran into error doing the save:", err
return finalizePrompt userToSchool, suggestions, schoolName, users
console.log "Updated users' schoolNames. Result:", result
remainingUsers = _.without users, targets...
nextPrompt remainingUsers
formatUser = (user) ->
# TODO: replace date string with relative time since signup compared to target user
_.values(_.pick(user, ['name', 'emailLower', 'ageRange', 'dateCreated', 'lastLevel', 'points', 'referrer', 'hourOfCode'])).join(' ')
formatSuggestions = (userToSchool, suggestions) ->
suggestionPrompts = ("#{_.str.rpad(i + 1, 3)} #{_.str.rpad(s.schoolName, 50)} #{s.reasons.join(' + ')}\tfrom user: #{formatUser(s.user)}" for s, i in suggestions).join('\n')
"""
What should the school for this user be?
0 #{_.str.rpad(userToSchool.schoolName, 50)} #{formatUser(userToSchool)}
Suggestions:
#{suggestionPrompts}
Choose a number, type a name, enter to skip, or q to quit.
> """
findUserToSchool = (users) ->
users.sort (a, b) -> b.points - a.points
usersWithSchools = _.filter users, 'schoolName'
schoolNames = _.uniq (u.schoolName for u in usersWithSchools)
return [users[0], schoolNames]
# TODO: don't show users where everyone in the suggestion already has the same school (because we have already done this group)
[bestTarget, bestTargetSuggestions, mostReasons] = [null, [], 0]
for field, groups of topGroups
largestGroup = groups[0]
target = userCategories[field][largestGroup][0]
suggestions = findSuggestions target
reasons = _.reduce suggestions, ((sum, suggestion) -> sum + (if suggestion.schoolName then suggestion.reasons.length else 0)), 0
if reasons > mostReasons
bestTarget = target
bestTargetSuggestions = suggestions
mostReasons = reasons
return [bestTarget, bestTargetSuggestions]
findSuggestions = (target) ->
suggestions = []
if target.lastIP
for otherUser in userCategories.lastIP[target.lastIP] when otherUser isnt target
suggestions.push schoolName: otherUser.schoolName, reasons: ["IP match"], user: otherUser
for leagueType in ['courseInstances', 'clans']
if target[leagueType]?.length
for league in target[leagueType]
for otherUser in userCategories[leagueType][league] when otherUser isnt target
reason = "#{_.str.humanize(leagueType)} match"
if existingSuggestion = _.find(suggestions, user: otherUser)
existingSuggestion.reasons.push reason
else
suggestions.push schoolName: otherUser.schoolName, reasons: [reason], user: otherUser
if target.schoolName?.length > 5
nameMatches = []
for otherSchoolName in topGroups.schoolName
score = stringScore otherSchoolName, target.schoolName, 0.8
continue if score < 0.25
nameMatches.push schoolName: otherSchoolName, score: score
nameMatches = (match.schoolName for match in (_.sortBy nameMatches, (match) -> -match.score))
for match in nameMatches.slice(0, 10)
reason = "Name match"
for otherUser in userCategories.schoolName[match] when otherUser isnt target
if existingSuggestion = _.find(suggestions, user: otherUser)
existingSuggestion.reasons.push reason
else
suggestions.push schoolName: match, reasons: [reason], user: otherUser
return _.uniq suggestions, 'user'
userCategories = {}
topGroups = {}
usersCategorized = {}
sortUsers = (users) ->
users = _.sortBy users, (u) -> -u.points
users = _.sortBy users, ['schoolName', 'lastIP']
# TODO: also match users by shared school email domains when we can identify those
for field in ['courseInstances', 'lastIP', 'schoolName', 'clans']
userCategories[field] = categorizeUsers users, field
topGroups[field] = _.sortBy _.keys(userCategories[field]), (key) -> -userCategories[field][key].length
topGroups[field] = (group for group in topGroups[field] when 2 < userCategories[field][group].length < (if field is 'clans' then 30 else 5000))
categorizeUsers = (users, field) ->
categories = {}
for user in users when value = user[field]
values = if _.isArray(value) then value else [value]
for value in values when value
continue if value.trim and not value.trim()
categories[value] ?= []
categories[value].push user
categories
# https://github.com/joshaven/string_score