This repository has been archived by the owner on Jul 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
explore.py
100 lines (74 loc) · 4.09 KB
/
explore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import print_function
import dataset
import explore_utils
import argparse
def main(args):
print("Loading the dataset...")
data = dataset.Dataset(args.debug)
print("Number of rows in the training set: ", len(data.train))
print("Number of columns in the training set: ", len(data.train.columns))
print("Number of rows in the test set: ", len(data.test))
print("Number of columns in the test set: ", len(data.test.columns))
# Number of visits
print("The most visit times for a customer in train set is: ",
explore_utils.find_most_visit(data))
# Customer spending percentiles
percentiles = [95, 97.5, 99, 99.9, 99.99]
percentile_values = explore_utils.find_customer_revenue_percentiles(
data,
percentiles)
for p, pv in zip(percentiles, percentile_values):
print("%2.2f%% of customers spend less than: $%.2f" % (p, pv))
# Total average revenue per trafficSource
print("The revenue per trafficSource is: ")
print(explore_utils.revenue_per_trafficsource(data))
# The fraction of transactions that have non-zero revenue
print("Fraction of transactions that have non-zero revenue: ",
explore_utils.find_fraction_of_transactions_with_non_zero_revenue(data))
# Most common Sources of Traffic and counts
num_of_sources = 6
most_common_sources = explore_utils.find_most_common_traffic_sources(data, num_of_sources)
print("\nThe {} most common sources of traffic are :".format(num_of_sources))
for source in most_common_sources.index:
print(' {}: {}'.format(source, most_common_sources[source]))
# channelGroupings and customer revenue (#31)
counts, means = explore_utils.find_channel_grouping_revenue(data)
print('Channel Grouping Counts:')
print(counts)
print('Mean Total Revenue by Channel Grouping')
print(means)
# transactionRevenue by region
trans_by_region = explore_utils.find_transaction_by_region(data)
print("The transaction revenues by region are: ", trans_by_region)
# Unique Visitor Percentage
print("%2.2f%% of all visitors to the site visit exactly once." % explore_utils.find_one_visit_percent(data))
# Statistics of sales made by first time visitors vs returning visitors
first_and_return_visits = explore_utils.find_return_visit_stats(data)
first_and_return_visits = first_and_return_visits.round(2)
print("\nStatistics of total transactions for unique visitors: \n{}\n"
.format(first_and_return_visits))
# Summary statistics for revenue generated, by device
# First print-out includes sessions that didn't produce revenue
# Second print-out only includes sessions that generated revenue
print("Revenue summary statistics by device, zeroes included: \n",
explore_utils.find_revenue_summary_statistics_for_devices(data, True))
print ("Revenue summary statistics by device, zeroes excluded: \n",
explore_utils.find_revenue_summary_statistics_for_devices(data, False))
# Prints out what percent of revenue generating sessions were accessed via a particular device
print("Percent of revenue generating sessions that used a particular device: \n",
explore_utils.find_percent_sessionIds_using_certain_device(data))
# Prints out the percent of total revenue that can be attributed to sessions
# accessed via a particular device
print("Percent of total revenue attributed to sessions using a particular device: \n",
explore_utils.find_percent_of_total_revenue_by_device(data))
# Prints out the percent of sessions accessed via a particular device taht
# generated revenue
print("Percent of total sessions using a particular device that generated revenue: \n",
explore_utils.find_percent_device_uses_generating_revenue(data))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Explore the Google Analytics dataset.')
parser.add_argument('--debug', dest='debug', action='store_true',
help='run in debug mode')
args = parser.parse_args()
main(args)