-
Notifications
You must be signed in to change notification settings - Fork 1
/
part1_user_input_visualization.py
317 lines (259 loc) · 15.8 KB
/
part1_user_input_visualization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
"""CSC111 Winter 2023 Phase 2: Decoding the Secrets of Successful Stocks (Part 1)
Description
==============================================================
This module takes user input from website and calls
part 2, 3, 4, and use visualization function to plot graph.
The function "Run Program" is most important function, thus,
please read carefully.
Copyright and Usage Information
==============================================================
This file is provided solely for the personal and private use of our group
memebers at the University of Toronto St. George campus. All forms of
distribution of this code, whether as given or with any changes, are
expressly prohibited. For more information on copyright for this project,
please consult Yehyun Lee at [email protected].
This file is Copyright (c) 2023 Yehyun Lee, Aung Zwe Maw and Wonjae Lee.
"""
from __future__ import annotations
import datetime
from datetime import timedelta
import plotly.graph_objs as go
import streamlit as st
# from python_ta.contracts import check_contracts
from PIL import Image # Import image from pillow to open images
import part2_factor_data_processing
import part3_recommendation_tree
import part4_investment_simulation
# @check_contracts
def user_input() -> None:
"""
This function runs the Streamlit library and opens up the browser. The purpose
of Streamlit is to display statistics and also takes user input directly from the browser.
This will also call run_program function as well as visualization function.
"""
# Set Title of Web Page
st.set_page_config(page_title="Decoding the Secrets of Successful Stocks")
# Title
st.title("Decoding the Secrets of Successful Stocks")
st.text("Project by Yehyun Lee, Aung Zwe Maw and Wonjae Lee")
st.text("Web page written and hosted by Yehyun Lee")
# This un-indent is needed. Due to st.text reading function tab as indent of texts.
st.text("""Copyright and Usage Information
===============================
This page is provided solely for the personal and private use of our group
memebers at the University of Toronto St. George campus. All forms of
distribution of this code, whether as given or with any changes, are
expressly prohibited. For more information on copyright for this project,
please consult Yehyun Lee at [email protected].
This page is Copyright (c) 2023 Yehyun Lee.""")
# Display Image
img = Image.open("theme.png")
st.image(img, width=400)
url = "https://unsplash.com/@chrisliverani"
st.markdown("*Image credit [Chris Liverani](%s)*" % url)
# Header
st.header("About")
# Subheader
st.subheader("Project Goal")
# Text
# Dev Note: "⠀" is a blank symbol to fix streamlit error not having tab in first sentence.
st.write("""⠀\tAs an investor, the question is, which stocks are most profitable in the long-term, and what factors
contribute to their success? Our team will use a tree data structure to identify the promising factors
and the most likely companies to invest in and utilize the backtesting approach to determine the
profitability of the investments.""")
st.write("""⠀\tWe aim to determine the most promising stocks for investment by
analyzing the correlation between their performance metrics. Based on the analysis, we will ranks the factors
and create a binary tree from the factors. Then, we will filter stocks accordingly and invest in what the
binary tree suggests. Lastly, using backtesting data we will draw conclusions on its performance.""")
# Subheader
st.subheader("Motivation")
# Text
st.write("""⠀\tOne of the reasons for this project is the increase in interest in stocks during and after the
pandemic. Many people have turned to invest in stocks to make passive income or grow their savings during a time of
economic uncertainty. However, with so many companies to choose from, it can be challenging to know which
ones are most likely to provide a good return on investment. By using past datasets, we can analyze
historical data, current market trends, and other relevant factors to identify the companies that are
confused to perform well in the future. This can help investors make more informed ed decisions about where
to put their money and maximize their returns. Another motivation for developing a stock market investment
program is the passion of one of our partners, Yehyun Lee for stock market trends. His passion positively
affected us and we were also willing to explore the trends along with him.""")
url = "https://github.com/YehyunLee/Decoding_Secrets_of_Successful_Stocks"
st.write("For more information about our project, please visit [Yehyun's Github](%s) and check the LaTeX file"
"." % url)
# Header
st.header("User Input")
# Text
st.write("Choose stocks that you might want to invest.")
# Subheader
st.subheader("Choosing Stocks")
st.warning("Please select more than 2 stocks by precondition.")
# Radio Button
status1 = st.radio("Select Methods 👉", ('Select All (Recommended)', 'Options', 'Manual (Expert Only)'))
stocks = part2_factor_data_processing.read_csv()
if status1 == 'Select All (Recommended)':
st.info("Selecting all takes around 5 minutes to run program.")
elif status1 == 'Options':
# Multi select box
stocks = st.multiselect('Select Stocks', stocks)
else:
st.warning("We highly recommend you avoid using this option unless you are well aware of stocks that are "
"supported by APIs the program use. Some stocks may cause error. However, we've put internal work "
"to handle these issues. Give a shot! 🧪")
stocks = st.text_input("Write in list[str] form", "['MSFT', 'META', 'AAPL', 'GOOGL', 'SQQQ']")
stocks = [char.strip() for char in eval(stocks)] # Remove spaces, re-format incorrect input.
# Write the selected options
st.write("You selected", len(stocks), 'stocks')
st.success(stocks)
# Subheader
st.subheader("Choosing Date")
st.info("Default value is recommended.")
end_datetime = st.date_input(
"When would you like to start investing?",
datetime.date(2016, 3, 25)) - timedelta(days=365)
st.write('Program will train from 2009 to', end_datetime, 'and start investing', end_datetime + timedelta(days=365))
end_date = str(end_datetime)
# Subheader
st.subheader("Choosing Risk Percentage")
# Slider
st.info("The program allocates its investments in a number of top-ranked stocks that corresponds "
"to the given risk percentage. "
"If risk percentage is 100%, program invest all stocks including bad performing stocks. "
"If 10%, program invest top 10% of ranked stocks. "
"A lower risk percentage leads to investment of fewer stocks but only top performing stocks. "
"Default value is set to 50%. Try 1% or 10%!")
risk_percent = st.slider("Select Risk Percentage (%)", 1, 100, value=50)
# Print the risk_percent
# format() is used to print value of a variable at a specific position
st.text('Selected: {}%'.format(risk_percent))
# Subheader
st.subheader("Choosing Factors")
st.warning("Choosing many factors cause significant increase of running time due to nature of Recommendation Tree!")
# Radio Button
status2 = st.radio("Select Factors 👉", ('Select All', 'Options (Recommended)'))
st.info("The 'average-price' factor will be included as a minimum requirement.")
factors_to_use = ['pe-ratio', 'price-sales', 'price-book', 'roe', 'roa', 'return-on-tangible-equity',
'number-of-employees', 'current-ratio', 'quick-ratio', 'total-liabilities',
'debt-equity-ratio', 'roi', 'cash-on-hand', 'total-share-holder-equity', 'revenue',
'gross-profit', 'net-income', 'shares-outstanding']
if status2 == 'Options (Recommended)':
# Multi select box
factors_to_use = st.multiselect('Select Factors', factors_to_use)
st.write("You selected", len(factors_to_use) + 1, 'factors')
st.success(f"Factors selected: {factors_to_use + ['average-price']}")
# Subheader
st.subheader("Confirm User Input")
st.warning("Please review your inputs and click 'Run Program' to confirm.")
st.success(f"Stocks: {stocks}")
st.success(f"Start Investing: {end_datetime + timedelta(days=365)}")
st.success(f"Risk Percentage: {risk_percent}%")
st.success(f"Factors: {factors_to_use + ['average-price']}")
# Run Program
if st.button('Run Program'):
st.warning("Program is running...")
st.warning("If error is caused please change your options (e.g. Changing date closer to recent & "
"Risk Percentage to 100%) or make sure your "
"options did not violate preconditions. "
"Highly recommend using default setting as it satisfy all preconditions.")
st.info("Please wait while program is running. "
"Running usually takes 5 ~ 10 minutes, but this depends on user inputs. "
"If you click 'Run Program' again, it will re-run the program with updated options.")
figure = run_program(stocks, end_date, risk_percent, factors_to_use)
st.plotly_chart(figure[0])
st.write("Statistics: ")
st.success(f"Some stocks are not supported by APIs, thus, the program had to filter out the stocks. "
f"Here are stocks that were used to train the model: "
f"{figure[1]}")
st.success("Here are ranked factors and their correlation values that were used to determine "
f"list of buy stocks: {figure[2]}")
st.success("Here are list of stocks that program decided to invest: "
f"{figure[3]}")
# @check_contracts
def run_program(list_of_stocks: list[str], training_end_date: str, risk_percentage: int, factors: list[str]) -> \
tuple[go.Figure, list[str], list[tuple[str, float]], list[str]]:
"""Runs the simulation and returns a graph showing the end results (brings information from part 2, 3 and 4)"""
# MOST IMPORTANT FUNCTION OF THE PROGRAM
# [Part 2] Choosing the Main Influential Factors
# Filter stocks that's supported by API
filter_stocks = part2_factor_data_processing.filter_stocks(list_of_stocks, training_end_date)
# Sort stocks based on their growth
stocks_performance = part2_factor_data_processing.get_percentage_growth_of_stocks(filter_stocks, training_end_date)
# Take top half of stocks. Now, top_ranked_stocks are list of best stocks.
top_ranked_stocks = part2_factor_data_processing.top_half(stocks_performance)
# Determine the factors of best stocks. Sorted based on their values.
best_factors = part2_factor_data_processing.determining_best_factor(top_ranked_stocks, training_end_date, factors)
# [Part 3] Recommendation Tree
# Create a RecommendationTree using sorted factors
recommendation_tree = part3_recommendation_tree.create_recommendation_tree(best_factors, len(best_factors) - 1)
# Insert all user input stocks to RecommendationTree with their corresponding factor values.
# Their correlation value will be compared to best stocks average correlation value and determine whether to go
# left of right of subtrees. (For more information, please check LaTeX.)
recommendation_tree.insert_stocks(filter_stocks, training_end_date, factors)
# Choose list of buy stocks by using user input risk percentage. (For more information, please check LaTeX.)
buy_stocks = part3_recommendation_tree.determining_buy_stocks(recommendation_tree, risk_percentage)
# [Part 4] Investment Simulation
# Run simulation of investment return of benchmark 'NASDAQ'
benchmark_nasdaq_simulation = part4_investment_simulation.benchmark_simulation('^IXIC', training_end_date)
# Run simulation of investment return of benchmark 'S&P500'
benchmark_s_and_p500_simulation = part4_investment_simulation.benchmark_simulation('^GSPC', training_end_date)
# Run simulation of investment return of benchmark 'All User Input Stocks'
benchmark_all_stocks_simulation = part4_investment_simulation.benchmark_simulation(filter_stocks, training_end_date)
# Run simulation of investment return of 'Recommendation Tree Filtered Stocks'
# In other words, this is using statistically significant factors
recommendation_tree_simulation = part4_investment_simulation.recommendation_tree_simulation(
buy_stocks, training_end_date)
# Plot them into one figure
fig = visualization(benchmark_nasdaq_simulation, benchmark_s_and_p500_simulation,
benchmark_all_stocks_simulation,
recommendation_tree_simulation)
# Return graph as index 0, and rest are statistics that will be used to prompt under the graph
return (fig, filter_stocks, best_factors, buy_stocks)
# @check_contracts
def visualization(benchmark_nasdaq_simulation: dict[int, float], benchmark_s_and_p500_simulation: dict[int, float],
benchmark_all_stocks_simulation: dict[int, float], recommendation_tree_simulation: dict[int, float]) \
-> go.Figure:
"""
The function takes dict[int, float] inputs and use them to make a figure data type. The function is used for making
graph figure, meaning visualization is used for visual purpose.
"""
# x, y coordinates from NASDAQ
nasdaq_years = list(benchmark_nasdaq_simulation.keys())
nasdaq_values = list(benchmark_nasdaq_simulation.values())
# x, y coordinations from S&P500
sp500_years = list(benchmark_s_and_p500_simulation.keys())
sp500_values = list(benchmark_s_and_p500_simulation.values())
# x, y coordinates from benchmark of All User Input Stocks
benchmark_all_stocks_years = list(benchmark_all_stocks_simulation.keys())
benchmark_all_stocks_values = list(benchmark_all_stocks_simulation.values())
# x, y coordinates from Recommendation Tree Filtered Stocks
recommendation_tree_years = list(recommendation_tree_simulation.keys())
recommendation_tree_values = list(recommendation_tree_simulation.values())
fig = go.Figure() # Add all coordinates to plot
fig.add_trace(go.Scatter(x=nasdaq_years, y=nasdaq_values, name='NASDAQ'))
fig.add_trace(go.Scatter(x=sp500_years, y=sp500_values, name='S&P500'))
fig.add_trace(go.Scatter(x=benchmark_all_stocks_years, y=benchmark_all_stocks_values,
name='All User Input Stocks'))
fig.add_trace(go.Scatter(x=recommendation_tree_years, y=recommendation_tree_values,
name='Recommendation Tree Filtered Stocks'))
# Add layout names e.g. title, x,y-axis name
fig.update_layout(title='Simulation Results', xaxis_title='Year', yaxis_title='Return on Investment (%)')
# Try fig.show()
return fig
if __name__ == '__main__':
# import doctest
# doctest.testmod(verbose=True)
#
# import python_ta
# python_ta.check_all(config={
# 'extra-imports': ['part2_factor_data_processing', 'part3_recommendation_tree', 'part3_recommendation_tree',
# 'part4_investment_simulation', 'plotly.graph_objs', 'datetime', 'PIL', 'streamlit'],
# # the names (strs) of imported modules
# 'allowed-io': ['user_input'], # the names (strs) of functions that call print/open/input
# 'max-line-length': 120,
# 'disable': ['trailing-whitespace', 'consider-using-f-string', 'too-many-statements', 'eval-used']
# # These disable options are all for streamlit limitation.
# # 'trailing-whitespace': First sentence cannot have tab. Thus, blank symbol is included, then tab is added.
# # 'consider-using-f-string': Markdown cannot have f string.
# # 'too-many-statements' and 'eval-used' is needed for perfectness of website. It's just little details,
# # thus we had to include.
# })
user_input()