Analytics payment conversion scripts

These are ugly.
This commit is contained in:
Matt Lott 2014-12-15 13:19:20 -08:00
parent c76662c967
commit 30c1b7b31a
3 changed files with 570 additions and 0 deletions

View file

@ -0,0 +1,135 @@
#! /usr/bin/env python
#
# Mixpanel, Inc. -- http://mixpanel.com/
#
# Python API client library to consume mixpanel.com analytics data.
#
# Copyright 2010-2013 Mixpanel, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import urllib
import urllib2
import time
try:
import json
except ImportError:
import simplejson as json
class Mixpanel(object):
# https right?
ENDPOINT = 'http://mixpanel.com/api'
DATA_ENDPOINT = 'http://data.mixpanel.com/api'
VERSION = '2.0'
def __init__(self, api_key, api_secret):
self.api_key = api_key
self.api_secret = api_secret
def request(self, methods, params, format='json'):
"""
methods - List of methods to be joined, e.g. ['events', 'properties', 'values']
will give us http://mixpanel.com/api/2.0/events/properties/values/
params - Extra parameters associated with method
"""
params['api_key'] = self.api_key
params['expire'] = int(time.time()) + 600 # Grant this request 10 minutes.
params['format'] = format
if 'sig' in params: del params['sig']
params['sig'] = self.hash_args(params)
if 'export' in methods:
request_url = '/'.join([self.DATA_ENDPOINT, str(self.VERSION)] + methods) + '/?' + self.unicode_urlencode(params)
else:
request_url = '/'.join([self.ENDPOINT, str(self.VERSION)] + methods) + '/?' + self.unicode_urlencode(params)
request = urllib2.urlopen(request_url, timeout=120)
data = request.read()
# return json.loads(data)
return data
def unicode_urlencode(self, params):
"""
Convert lists to JSON encoded strings, and correctly handle any
unicode URL parameters.
"""
if isinstance(params, dict):
params = params.items()
for i, param in enumerate(params):
if isinstance(param[1], list):
params[i] = (param[0], json.dumps(param[1]),)
return urllib.urlencode(
[(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params]
)
def hash_args(self, args, secret=None):
"""
Hashes arguments by joining key=value pairs, appending a secret, and
then taking the MD5 hex digest.
"""
for a in args:
if isinstance(args[a], list): args[a] = json.dumps(args[a])
args_joined = ''
for a in sorted(args.keys()):
if isinstance(a, unicode):
args_joined += a.encode('utf-8')
else:
args_joined += str(a)
args_joined += '='
if isinstance(args[a], unicode):
args_joined += args[a].encode('utf-8')
else:
args_joined += str(args[a])
hash = hashlib.md5(args_joined)
if secret:
hash.update(secret)
elif self.api_secret:
hash.update(self.api_secret)
return hash.hexdigest()
if __name__ == '__main__':
api = Mixpanel(
api_key = 'YOUR_API_KEY',
api_secret = 'YOUR_API_SECRET'
)
# data = api.request(['events'], {
# 'event' : ['Finished subscription purchase',],
# 'unit' : 'hour',
# 'interval' : 24,
# 'type': 'general'
# })
# data = api.request(['funnels', 'list'], {})
data = api.request(['export'], {
'event' : ['Finished subscription purchase',],
'from_date' : '2014-14-05',
'to_date' : '2014-14-06'
})
for line in data.split('\n'):
try:
if len(line):
event = json.loads(line)
print event['event'], event['properties']['distinct_id']
except:
print 'error'
print line

View file

@ -0,0 +1,188 @@
# Parse subscription conversion rates via Mixpanel raw export API
import sys
from datetime import tzinfo, timedelta, datetime
from mixpanel import Mixpanel
try:
import json
except ImportError:
import simplejson as json
# NOTE: mixpanel dates are by day and inclusive
# E.g. '2014-12-08' is any date that day, up to 2014-12-09 12am
def printPriceConversionRates(api_key, api_secret, startDate, endDate):
# dateCreated is in UTC
# Dec 8th subscribe copy A/B test added
# 599 - 1st HoC 599 sale started: Dec 9 6:23am PST
# 999 - 1st HoC 599 sale ended: Dec 10 4:34pm PST
# 1499 - sub price test starts: Dec 10 5:00pm PST
# Only for dateCreated >= 5pm PST
# 399 - 2nd HoC 399 sale started: Dec 11 7:21pm PST
# 999 - 2nd HoC sale ended: Dec 13 9:30am PST
# UTC is +8 hrs
api = Mixpanel(
api_key = api_key,
api_secret = api_secret
)
print 'Requesting Mixpanel data'
# data = api.request(['events'], {
# 'event' : ['Finished subscription purchase',],
# 'unit' : 'hour',
# 'interval' : 24,
# 'type': 'general'
# })
# data = api.request(['funnels', 'list'], {})
data = api.request(['export'], {
'event' : ['Show subscription modal', 'Finished subscription purchase',],
# 'event' : ['Finished subscription purchase',],
# 'event' : ['Show subscription modal',],
'from_date' : startDate,
'to_date' : endDate
})
prices = {
'399': {
'start': datetime(2014, 12, 12, 3, 21),
'end': datetime(2014, 12, 13, 17, 30)
},
'599': {
'start': datetime(2014, 12, 9, 14, 23),
'end': datetime(2014, 12, 11, 0, 34)
},
'999': {
'start': datetime(2014, 9, 1),
'end': datetime(2014, 12, 9, 14, 23),
'start2': datetime(2014, 12, 11, 0, 34),
'end2': datetime(2014, 12, 12, 3, 21),
'start3': datetime(2014, 12, 13, 17, 30)
},
'1499': {
'start': datetime(2014, 12, 11, 1),
'end': datetime(2014, 12, 12, 3, 21)
}
}
# id vs distinct_id ?
def addEvent(price, event, id):
if not event in price:
price[event] = {}
price[event][id] = True
elif not id in price[event]:
price[event][id] = True
def getPriceStr(eventDateStr, userDateStr):
priceStr = '999'
eventCreated = datetime.utcfromtimestamp(int(eventDateStr))
# Put events in buckets based on creation times
if eventCreated >= prices['599']['start'] and eventCreated < prices['599']['end']:
priceStr = '599'
elif eventCreated >= prices['999']['start2'] and eventCreated < prices['999']['end2']:
# In 999/1499 zone
# Create a datetime from: 2014-12-11T12:37:59
userCreated = datetime(int(userDateStr[0:4]), int(userDateStr[5:7]), int(userDateStr[8:10]), int(userDateStr[11:13]), int(userDateStr[14:16]), int(userDateStr[17:19]))
if userCreated >= prices['1499']['start']:
priceStr = '1499'
elif eventCreated >= prices['399']['start'] and eventCreated < prices['399']['end']:
priceStr = '399'
return priceStr
lines = data.split('\n')
print "Received %d entries" % len(lines)
for line in lines:
try:
if len(line) is 0: continue
event = json.loads(line)
properties = event['properties']
if not event['event'] in ['Show subscription modal', 'Finished subscription purchase']:
print 'Unexpected event ' + event['event']
break
# print 'Processing', event['event'], properties['time'], properties['dateCreated']
if 'dateCreated' in properties and 'time' in properties and 'distinct_id' in properties:
# NOTE: mixpanel conversions don't account for refunds
# NOTE: So we have an extra 1499 hit for mattcc4021@gmaIl.com / 5488ee8a600bc8b206771ba3
if properties['distinct_id'] == '5488ee8a600bc8b206771ba3':
# ch_155tz8KaReE7xLUdQpsa9aqe, cus_5GQqAosNHuRQCQ
# print 'Skipping mattcc4021@gmaIl.com / 5488ee8a600bc8b206771ba3'
# print event['event'], properties['distinct_id']
continue
# if properties['distinct_id'] == '54790dacfd5b8f550584aaf3':
# print 'Found a time example 54790dacfd5b8f550584aaf3'
# print properties['time'], datetime.utcfromtimestamp(int(properties['time']))
priceStr = getPriceStr(properties['time'], properties['dateCreated'])
# if priceStr == '1499' and event['event'] == 'Finished subscription purchase':
# print 'Found a 1499 payment', properties['distinct_id']
addEvent(prices[priceStr], event['event'], properties['distinct_id'])
except:
print "Unexpected error:", sys.exc_info()[0]
print line
break
print 'Price, converted, shown, conversion rate, value per user'
for key, item in prices.iteritems():
# 'Show subscription modal', 'Finished subscription purchase'
converted = shown = 0
if 'Finished subscription purchase' in item:
converted = len(item['Finished subscription purchase'].keys())
if 'Show subscription modal' in item:
shown = len(item['Show subscription modal'].keys())
if shown > 0:
print key, converted, shown, "%.4f%%" % (float(converted) / shown * 100), "%.4f cents" % (float(converted) / shown * int(key))
else:
print key, converted, shown
def getShownSubModal(api_key, api_secret, startDate, endDate):
# print 'Requesting Mixpanel data'
api = Mixpanel(
api_key = api_key,
api_secret = api_secret
)
data = api.request(['export'], {
'event' : ['Show subscription modal',],
'from_date' : startDate,
'to_date' : endDate
})
uniques = set()
# biggestDate = 0
lines = data.split('\n')
# print "Received %d entries" % len(lines)
for line in lines:
try:
if len(line) is 0: continue
event = json.loads(line)
properties = event['properties']
if not event['event'] in ['Show subscription modal']:
print 'Unexpected event ' + event['event']
break
# print 'Processing', event['event'], properties['time'], properties['dateCreated']
if 'distinct_id' in properties and not properties['distinct_id'] in uniques:
uniques.add(properties['distinct_id'])
# if int(properties['time']) > biggestDate:
# biggestDate = int(properties['time'])
except:
print "Unexpected error:", sys.exc_info()[0]
print line
break
# print 'Biggest date:', datetime.utcfromtimestamp(int(properties['time']))
return len(uniques)
if __name__ == '__main__':
if not len(sys.argv) is 3:
print "Script format: <script> <api_key> <api_secret>"
else:
api_key = sys.argv[1]
api_secret = sys.argv[2]
# HoC
printPriceConversionRates(api_key, api_secret, '2014-12-08', '2014-12-13')
# Use these to feed numbers into Stripe parsing script, since Stripe knows better about conversions than Mixpanel
print 'Pre-HoC shown', getShownSubModal(api_key, api_secret, '2014-12-06', '2014-12-07')
print 'Post-HoC shown', getShownSubModal(api_key, api_secret, '2014-12-14', '2014-12-14')

View file

@ -0,0 +1,247 @@
# Parse Stripe payment info via exported payments.csv files
import sys
from datetime import tzinfo, timedelta, datetime
# TODO: use stripe_customers.csv to match payments to our db data
# Stripe file format
# id,Description,Created (UTC),Amount,Amount Refunded,Currency,Converted Amount,Converted Amount Refunded,Fee,Tax,Converted Currency,Mode,Status,Statement Description,Customer ID,Customer Description,Customer Email,Captured,Card Last4,Card Brand,Card Funding,Card Exp Month,Card Exp Year,Card Name,Card Address Line1,Card Address Line2,Card Address City,Card Address State,Card Address Country,Card Address Zip,Card Issue Country,Card Fingerprint,Card CVC Status,Card AVS Zip Status,Card AVS Line1 Status,Disputed Amount,Dispute Status,Dispute Reason,Dispute Date (UTC),Dispute Evidence Due (UTC),Invoice ID,productID (metadata),userID (metadata),gems (metadata),timestamp (metadata)
def getGemCounts(paymentsFile):
gems = {}
with open(paymentsFile) as f:
first = True
for line in f:
if first:
first = False
else:
data = line.split(',')
amount = int(float(data[3]) * 100)
status = data[12]
statementDescription = data[13]
if status == 'Paid' and not statementDescription == 'Sub':
if not amount in gems:
gems[amount] = 1
else:
gems[amount] += 1
return gems
def getSubCounts(paymentsFile):
subs = {}
with open(paymentsFile) as f:
first = True
for line in f:
if first:
first = False
else:
data = line.split(',')
# created = data[2]
amount = int(float(data[3]) * 100)
# amountRefunded = int(float(data[4]) * 100)
# mode = data[11]
status = data[12]
statementDescription = data[13]
# Look for status = 'Paid', and statementDescription = 'Sub'
# print "{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(created, amount, amountRefunded, mode, status, statementDescription)
if status == 'Paid' and statementDescription == 'Sub':
if not amount in subs:
subs[amount] = 1
else:
subs[amount] += 1
return subs
def getHoCPriceConversionRates(paymentsFile):
# Show counts from Mixpanel
prices = {
'399': {
# 'start': datetime(2014, 12, 12, 3, 21),
# 'end': datetime(2014, 12, 13, 17, 30),
'Show subscription modal': 31157,
'Finished subscription purchase': 0
},
'599': {
# 'start': datetime(2014, 12, 9, 14, 23),
# 'end': datetime(2014, 12, 11, 0, 34),
'Show subscription modal': 31044,
'Finished subscription purchase': 0
},
'999': {
# 'start': datetime(2014, 9, 1),
# 'end': datetime(2014, 12, 9, 14, 23),
# 'start2': datetime(2014, 12, 11, 0, 34),
# 'end2': datetime(2014, 12, 12, 3, 21),
# 'start3': datetime(2014, 12, 13, 17, 30),
'Show subscription modal': 45343,
'Finished subscription purchase': 0
},
'1499': {
# 'start': datetime(2014, 12, 11, 1),
# 'end': datetime(2014, 12, 12, 3, 21),
'Show subscription modal': 19519,
'Finished subscription purchase': 0
}
}
# TODO: may be one 1499 sale
priceTest = {
'ch_158LyeKaReE7xLUdnt0m9pjb': True,
'ch_158OPLKaReE7xLUdcqYQ5qst': True,
'ch_158jkBKaReE7xLUd305I3WBy': True
}
# Find 'Finished subscription purchase' event from Stripe data
startDate = datetime(2014, 12, 8)
endDate = datetime(2014, 12, 14)
print startDate, 'to', endDate
with open(paymentsFile) as f:
first = True
for line in f:
if first:
first = False
else:
data = line.split(',')
paymentID = data[0]
created = data[2] # 2014-12-14 06:01
createdDate = datetime(int(created[0:4]), int(created[5:7]), int(created[8:10]), int(created[11:13]), int(created[14:16]))
if createdDate < startDate or createdDate >= endDate:
continue
if paymentID in priceTest:
amount = 1499
else:
amount = int(float(data[3]) * 100)
amountStr = str(amount)
# amountRefunded = int(float(data[4]) * 100)
# mode = data[11]
status = data[12]
statementDescription = data[13]
# Look for status = 'Paid', and statementDescription = 'Sub'
# print "{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(created, amount, amountRefunded, mode, status, statementDescription)
if status == 'Paid' and statementDescription == 'Sub':
prices[amountStr]['Finished subscription purchase'] += 1
# Calculate conversion rates
for key, item in prices.iteritems():
item['Conversion Rate'] = float(item['Finished subscription purchase']) / item['Show subscription modal']
item['Value Per User'] = float(item['Finished subscription purchase']) / item['Show subscription modal'] * int(key)
return prices
def getPreHoCPriceConversionRates(paymentsFile):
# Pre-HoC but after full stop paywall in forest
# Show count from Mixpanel
prices = {
'999': {
'Show subscription modal': 3447,
'Finished subscription purchase': 0
}
}
# Find 'Finished subscription purchase' event from Stripe data
startDate = datetime(2014, 12, 6)
endDate = datetime(2014, 12, 8)
print startDate, 'to', endDate
with open(paymentsFile) as f:
first = True
for line in f:
if first:
first = False
else:
data = line.split(',')
paymentID = data[0]
created = data[2] # 2014-12-14 06:01
createdDate = datetime(int(created[0:4]), int(created[5:7]), int(created[8:10]), int(created[11:13]), int(created[14:16]))
if createdDate < startDate or createdDate >= endDate:
continue
amount = int(float(data[3]) * 100)
amountStr = str(amount)
status = data[12]
statementDescription = data[13]
if status == 'Paid' and statementDescription == 'Sub':
prices[amountStr]['Finished subscription purchase'] += 1
# Calculate conversion rates
for key, item in prices.iteritems():
item['Conversion Rate'] = float(item['Finished subscription purchase']) / item['Show subscription modal']
item['Value Per User'] = float(item['Finished subscription purchase']) / item['Show subscription modal'] * int(key)
return prices
def getPostHoCPriceConversionRates(paymentsFile):
# Pre-HoC but after full stop paywall in forest
# Show count from Mixpanel
prices = {
'999': {
'Show subscription modal': 2935,
'Finished subscription purchase': 0
}
}
# Find 'Finished subscription purchase' event from Stripe data
startDate = datetime(2014, 12, 14)
endDate = datetime(2014, 12, 15)
print startDate, 'to', endDate
with open(paymentsFile) as f:
first = True
for line in f:
if first:
first = False
else:
data = line.split(',')
paymentID = data[0]
created = data[2] # 2014-12-14 06:01
createdDate = datetime(int(created[0:4]), int(created[5:7]), int(created[8:10]), int(created[11:13]), int(created[14:16]))
if createdDate < startDate or createdDate >= endDate:
continue
amount = int(float(data[3]) * 100)
amountStr = str(amount)
status = data[12]
statementDescription = data[13]
if status == 'Paid' and statementDescription == 'Sub':
prices[amountStr]['Finished subscription purchase'] += 1
# Calculate conversion rates
for key, item in prices.iteritems():
item['Conversion Rate'] = float(item['Finished subscription purchase']) / item['Show subscription modal']
item['Value Per User'] = float(item['Finished subscription purchase']) / item['Show subscription modal'] * int(key)
return prices
if __name__ == '__main__':
paymentsFile = 'stripe_payments.csv'
if len(sys.argv) is 2:
paymentsFile = sys.argv[1]
print 'Processing', paymentsFile
print 'Subs'
print getSubCounts(paymentsFile)
print 'Gems'
print getGemCounts(paymentsFile)
print 'Pre-HoC Conversion Rates'
priceConversionRates = getPreHoCPriceConversionRates(paymentsFile)
print 'Price, converted, shown, conversion rate, value per user'
for key, item in priceConversionRates.iteritems():
print key, item['Finished subscription purchase'], item['Show subscription modal'], "%.4f%%" % (item['Conversion Rate'] * 100), "%.4f cents" % (item['Conversion Rate'] * int(key))
print 'HoC Conversion Rates'
priceConversionRates = getHoCPriceConversionRates(paymentsFile)
print 'Price, converted, shown, conversion rate, value per user'
for key, item in priceConversionRates.iteritems():
print key, item['Finished subscription purchase'], item['Show subscription modal'], "%.4f%%" % (item['Conversion Rate'] * 100), "%.4f cents" % (item['Conversion Rate'] * int(key))
print 'Post-HoC Conversion Rates'
priceConversionRates = getPostHoCPriceConversionRates(paymentsFile)
print 'Price, converted, shown, conversion rate, value per user'
for key, item in priceConversionRates.iteritems():
print key, item['Finished subscription purchase'], item['Show subscription modal'], "%.4f%%" % (item['Conversion Rate'] * 100), "%.4f cents" % (item['Conversion Rate'] * int(key))