forked from brendanahart/MLBDailyProjections
-
Notifications
You must be signed in to change notification settings - Fork 1
/
batterDataScience.py
133 lines (101 loc) · 4.28 KB
/
batterDataScience.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
import pandas as pd
from scipy import stats
import mysql.connector
import os
import datetime as dt
from itertools import chain
import matplotlib.pyplot as plt
import constants
def getDates(day, month, year, numdays, cursor):
base = dt.date(year, month, day)
dateList = [base - dt.timedelta(days=x) for x in range(0, numdays)]
# get date ids from database
gameIDs = []
for date in dateList:
findGame = "SELECT iddates FROM dates WHERE date = %s"
findGameData = (date,)
cursor.execute(findGame, findGameData)
for game in cursor:
gameIDs.append(game[0])
return gameIDs
if __name__ == "__main__":
cnx = mysql.connector.connect(user=constants.databaseUser,
host=constants.databaseHost,
database=constants.databaseName,
password=constants.databasePassword)
cursor = cnx.cursor()
# dates to retrieve data for batter test data
# start date
year = constants.bdsStartYear
month = constants.bdsStartMonth
day = constants.bdsStartDay
numdays = constants.bdsNumDays
gameIDs = getDates(day, month, year, numdays, cursor)
# select data with cooresponding game id and other constraints
batterConstraints = ['paL', 'paR', 'dkPointsPred', 'dkPointsPredSKLin', 'dkPointsPredRidge', 'dkPointsPredRidgeP',
'rotogrindersPoints']
batterConstraintsValues = {}
batterConstraintsTypes ={}
for con in batterConstraints:
var1 = raw_input("Enter operand for constraint " + con + ": ")
batterConstraintsTypes[con] = var1
var0 = raw_input("Enter value for constraint " + con + ": ")
batterConstraintsValues[con] = var0
constraintsString = "("
for constraint in batterConstraints:
constraintString = constraint + " " + batterConstraintsTypes[constraint] + " " + batterConstraintsValues[constraint]
if batterConstraints[-1] != constraint:
constraintString = constraintString + ' AND '
constraintsString = constraintsString + constraintString
constraintsString = constraintsString + ")"
features = ['dkPointsPred', 'dkPointsPredSKLin', 'dkPointsPredRidge', 'dkPointsPredRidgeP', 'rotogrindersPoints']
targets = ['battersdaily.dkpoints']
featuresString = ""
for feat in features:
featuresString = featuresString + feat
if features[-1] != feat:
featuresString = featuresString + ", "
targetsString = ""
for tar in targets:
targetsString = targetsString + tar
if targets[-1] != tar:
featuresString = targetsString + ", "
print "Loading data..."
getTestData = "SELECT batterID, "
getTestData = getTestData + featuresString
getTestData = getTestData + ", "
getTestData = getTestData + targetsString
getTestData = getTestData + " FROM battersdaily LEFT JOIN batters ON battersdaily.batterID = batters.idbatters WHERE battersdaily.bgameID = %s AND oppPitcher != 0 AND "
getTestData = getTestData + constraintsString
numpyDataArrays = []
# execute command + load into numpy array
for game in gameIDs:
testVariables = (game, )
cursor.execute(getTestData, testVariables)
results = cursor.fetchall()
numRows = cursor.rowcount
D = np.fromiter(chain.from_iterable(results), dtype=float, count=-1)
D = D.reshape(numRows, -1)
numpyDataArrays.append(D)
iterDataSets = iter(numpyDataArrays)
next(iterDataSets)
testData = numpyDataArrays[0]
for dataArray in iterDataSets:
testData = np.vstack((testData, dataArray))
# Test Coorelations
batterIDs, testX = np.split(testData, [1], 1)
testX, testY = np.split(testX, [len(features)], 1)
featuresToTest = np.shape(testX)[1]
i = 0
while i < featuresToTest:
featureData = testX[:, i]
targetData = testY[:, 0]
coorVariable = stats.pearsonr(featureData, targetData)
print "Linear Coorelation of " + features[i] + " is: " + str(coorVariable[0])
print "Plotting " + features[i] + " versus " + targets[0]
plt.plot(featureData, targetData, 'ro')
plt.xlabel(features[i])
plt.ylabel(targets[0])
plt.show()
i = i + 1