-
Notifications
You must be signed in to change notification settings - Fork 1
/
score_a_ranking_algorithm.py
158 lines (117 loc) · 5.75 KB
/
score_a_ranking_algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import sqlite3
import scipy
from scipy import stats
from ranks import *
def main():
conn = sqlite3.connect('data/data.db')
conn.row_factory = sqlite3.Row
symb_sql = '''
SELECT `symbol`
FROM `symbols`
'''
symb_cur = conn.cursor()
symb_cur.execute(symb_sql)
#units are samples, (days)
whole_history_size = 90 #pull this many days back
slide_size = 5 #when picking windows... shift this much
window_size = 30 #the history size presented to the ranking algorithm
max_own_window = 5 #maximum number of days that a purchase has to get to the predicted value (hold time)
comp_sql = '''
SELECT `date`, `open`,
`high`, `low`, `close`,
`volume`, `adj_close`
FROM `quotes`
WHERE `symbol` = ? AND
`date` > DATE('now','-90 days')
ORDER BY `date` DESC
'''
comp_cur = conn.cursor()
companyIndex = 0
#for each of the possible symbols
overallError = 0.0
allSymbolErrors = []
for company in symb_cur:
#print "PASSED: ", tuple([company[0], whole_history_size])
#comp_cur.execute( comp_sql, tuple([company[0], whole_history_size]) ) #grab entire history
comp_cur.execute(comp_sql, tuple(company)) #grab entire history
####################################################################
## step 1: build vectors of interest
####################################################################
timeVector = []
openVector = []
highVector = []
lowVector = []
closeVector = []
#for each sample in this symbol's history
rowIndex = 0
for row in comp_cur:
timeVector.append(rowIndex) #make this based on row[1], (date entry)
openVector.append(row['open'])
highVector.append(row['high'])
lowVector.append(row['low'])
closeVector.append(row['close'])
rowIndex += 1
timeVector = scipy.array(timeVector)
openVector = scipy.array(openVector)
highVector = scipy.array(highVector)
lowVector = scipy.array(lowVector)
closeVector = scipy.array(closeVector)
vectorOfInterest = closeVector
####################################################################
#potential updates out of necesity
whole_history_size = min(len(timeVector), len(vectorOfInterest)) #pull this many days back
#slide_size = 5 #when picking windows... shift this much
#window_size = 30 #the history size presented to the ranking algorithm
#max_own_window = 5 #maximum number of days that a purchase has to get to the predicted value (hold time)
####################################################################
## step 2: find this symbol's error term
####################################################################
symbolError = 0.0
leftSide = 0
numberOfSlides = 0
while( leftSide < (whole_history_size - window_size - max_own_window) ):
rightSide = leftSide + window_size #inclusive...
currentTimeVector = timeVector[leftSide:(rightSide+1)]
currentWindow = vectorOfInterest[leftSide:(rightSide+1)]
#insert the algorithm function to test
predicted_up_value = expectedRatioGain(currentTimeVector, currentWindow)
#find the value closest to this prediction, within the next few days
#and call their difference the error term
futureValues = vectorOfInterest[(rightSide+1):(rightSide+1+max_own_window)]
currentValue = currentWindow[len(currentWindow)-1] #last element
#print "\n\n"
#print [leftSide, rightSide, len(timeVector), len(vectorOfInterest), len(currentTimeVector), len(currentWindow)]
#print "futureValues: ", futureValues
#print "\n\n"
#find the minimum absolute value difference between
#the actual and predicted over the next few days
#error_term = 0.0
#for each of the next few days
#allActuals = []
allDifference = [] #may want these 2 vectors for simple stats... say variance
allDifference_ABS = []
for futureIndex in range(0,len(futureValues)):
actual_percentage_up = (futureValues[futureIndex] - currentValue) / currentValue #at day index 'futureIndex'
newDiff = predicted_up_value - actual_percentage_up
allDifference.append( newDiff )
allDifference_ABS.append( abs(newDiff) )
#possible_replacement = abs( newDiff ) #possible error replacement
#allActuals.append( actual_percentage_up )
#if( (futureIndex == 0) or (possible_replacement < error_term) ):
# error_term = possible_replacement
error_term = min( allDifference_ABS ) #how close did it get, absolutely
#print predicted_up_value, " ", allActuals, " ", error_term
symbolError = symbolError + error_term
leftSide = leftSide + slide_size
numberOfSlides = numberOfSlides + 1
#expected value of smallest distance between prediction and actual
symbolError = symbolError / numberOfSlides
####################################################################
print company, ": ", symbolError
allSymbolErrors.append( symbolError )
overallError = overallError + symbolError
overallError = overallError / len(allSymbolErrors)
print "allSymbolErrors: ", allSymbolErrors
print "overallError: ", overallError
if __name__ == '__main__':
main()