-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
50 lines (41 loc) · 1.48 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
df_relative_path = "data_files/"
files = ["coinedition.csv","thedailyhodl.csv", "finbold.csv", "newsbtc.csv", "utoday.csv"]
clear_files = []
for file in files:
clear_files.append(f"clean_{file}")
all_files = files + clear_files
# Delete previous files
import os
from os.path import exists
for file in all_files:
if exists(f"{df_relative_path}{file}"):
os.remove(f"{df_relative_path}{file}")
# Run Spiders
from news_scrapper.spiders.all_news_spider import run_all_spiders
run_all_spiders()
# Data Cleaning
from data_processing import process_date
import pandas as pd
#-----------------------------------------
# Clean the data before saving it to the final file
for file in files:
# print(f"Now processing file: {file}") # When error is encounted, uncomment this to check in which website the issue has originated from
data = pd.read_csv(f"{df_relative_path}{file}", skip_blank_lines = True)
# print(f"{file} is read")
new_data = process_date(data)
# print("***/nfollowing is the data/n***")
print(new_data)
# print(new_data)
if new_data is None:
pass
else:
new_data.to_csv(f"{df_relative_path}clean_{file}")
# print(f"saving clean_{file}.csv at {df_relative_path}")
#-----------------------------------------
# Source file: news_display.py
# Run Streamlit App
import sys
import streamlit.web.cli as stdcli
if __name__ == '__main__':
sys.argv = ["streamlit", "run", "News_Crunch_GUI.py"]
sys.exit(stdcli.main())