Improve school suggestion rankings

This commit is contained in:
Nick Winter 2015-12-16 06:36:17 -08:00 committed by Rob
parent 895e9194e4
commit 68f2a169dd

View file

@ -86,6 +86,7 @@ finalizePrompt = (userToSchool, suggestions, schoolName, users) ->
nextPrompt remainingUsers
findNumbers = (answer, max) ->
answer = answer.replace /,/g, ' '
numbers = (parseInt(d, 10) for d in (' ' + answer + ' ').replace(/ /g, ' ').match(/ (\d+) /g) ? [])
ranges = answer.match(/(\d+-\d+)/g) or []
for range in ranges
@ -116,7 +117,7 @@ formatSuggestions = (userToSchool, suggestions) ->
checkedTopGroups = {}
findUserToSchool = (users) ->
# We find the top user from the top group that we can make the most reasoned suggestions about what the school name would be.
[bestTarget, bestTargetSuggestions, mostReasons, bestGroup] = [null, [], 0, null]
[bestTarget, bestTargetSuggestions, bestSuggestionsScore, bestGroup] = [null, [], 0, null]
for field, groups of topGroups
for nextLargestGroup in groups when not checkedTopGroups[nextLargestGroup]
possibleTargets = userCategories[field][nextLargestGroup]
@ -125,7 +126,7 @@ findUserToSchool = (users) ->
alreadyDone = false
for schoolName in schoolNames when schoolName?.length > 10 and /,.+,/.test schoolName # Long enough school name with location info (two commas)
sharedCount = _.filter(possibleTargets, schoolName: schoolName).length
if sharedCount > 0.5 * possibleTargets.length
if sharedCount > 20 and sharedCount > 0.25 * possibleTargets.length
console.log 'Already done', schoolName, sharedCount, possibleTargets.length, 'for', field, nextLargestGroup
alreadyDone = true
continue if alreadyDone
@ -134,22 +135,11 @@ findUserToSchool = (users) ->
for i in [0 ... nSamples]
target = possibleTargets[Math.floor i * possibleTargets.length / (nSamples + 1)]
suggestions = findSuggestions target
reasons = _.reduce suggestions, ((sum, suggestion) ->
for suggestion in suggestions
for reason in suggestion.reasons
sum += switch reason
when 'Course instances' then 150
when 'IP' then 40
when 'Referrer' then 20
when 'Name' then 15
when 'Domain' then (if getDomain(target) in ['cps.edu', 'mynewcaneyisd.org', 'fsusd.org'] then 1 else 10)
when 'Clans' then 0.01
sum
), 0
if reasons > mostReasons
suggestionsScore = scoreSuggestions suggestions, target
if suggestionsScore > bestSuggestionsScore
bestTarget = target
bestTargetSuggestions = suggestions
mostReasons = reasons
bestSuggestionsScore = suggestionsScore
bestGroup = nextLargestGroup
break
checkedTopGroups[bestGroup] = true
@ -206,9 +196,23 @@ findSuggestions = (target) ->
suggestions.push schoolName: otherUser.schoolName, reasons: [reason], user: otherUser
if debugging then console.log ' Done checking referrer', (new Date()) - t0
suggestions = _.sortBy suggestions, (s) -> (s.schoolName or '').toLowerCase()
suggestions = _.sortBy suggestions, (s) -> -s.reasons.length
suggestions = _.sortBy suggestions, (s) -> -scoreSuggestions [s], target
return suggestions
scoreSuggestions = (suggestions, target) ->
_.reduce suggestions, ((sum, suggestion) ->
for suggestion in suggestions
for reason in suggestion.reasons
sum += switch reason
when 'Course instances' then 150
when 'IP' then 40
when 'Referrer' then 20
when 'Name' then 15
when 'Domain' then (if getDomain(target) in ['cps.edu', 'mynewcaneyisd.org', 'fsusd.org'] then 1 else 10)
when 'Clans' then 0.01
sum
), 0
userCategories = {}
topGroups = {}
usersCategorized = {}