-
Notifications
You must be signed in to change notification settings - Fork 13
/
twitter_scraper.py
47 lines (37 loc) · 1.5 KB
/
twitter_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import datetime
import os
# import twint
import pathlib
DATE_START = str(datetime.datetime.today().date() - datetime.timedelta(days=1))
DATA_PATH = pathlib.Path("data/")
DATA_PATH.mkdir(parents=True, exist_ok=True)
# MAX_RESULT = 100
# DATE_END = '2020-05-08'
HASHTAG = 'depression'
JSON_FILENAME = DATA_PATH / str(datetime.datetime.today().date())
def sns_scrape():
os.system(f'snscrape --jsonl --progress --since {DATE_START} twitter-hashtag "{HASHTAG}" > {JSON_FILENAME}.json')
# with end date
# os.system(f'snscrape --jsonl --progress --since {DATE_START} twitter-hashtag "{HASHTAG} until:{DATE_END}" > {JSON_FILENAME}.json')
def scrape_twint():
c = twint.Config()
# c.Until = str(datetime.datetime.today().date() + datetime.timedelta(days=1))
c.Since = str(datetime.datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1))
# c.Username = "test"
c.Search = "#depression"
c.Location=True
c.Images = True
# c.Limit = 50
# c.Custom["user"] = ["id", "tweet", "user_id", "username", "hashtags", "mentions"]
c.User_full = True
c.Store_csv = True
c.Output = "test3.csv"
c.Debug = True
twint.run.Search(c)
if __name__ == "__main__":
# scrape_twint()
sns_scrape()
# reference
# https://betterprogramming.pub/how-to-scrape-tweets-with-snscrape-90124ed006af
# https://github.com/hansheng0512/tweets-scrapping-using-python
# https://github.community/t/can-github-actions-directly-edit-files-in-a-repository/17884/7