Skip to content

Commit

Permalink
add remove x days variable
Browse files Browse the repository at this point in the history
  • Loading branch information
JoseRZapata committed May 1, 2021
1 parent b024f42 commit 2e512e6
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/pycoornet/crowdtangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, api_key):
self.api_key = api_key

def get_shares(self, urls, url_column='url', date_column='date', platforms=('facebook', 'instagram'),
nmax=500, sleep_time=20, clean_urls=False, save_ctapi_output=False, id_column=None):
nmax=500, sleep_time=20, clean_urls=False, save_ctapi_output=False, id_column=None, remove_days=None):
""" Get the URLs shares from CrowdTangle from a list of URLs with publish datetime
Args:
Expand All @@ -40,6 +40,7 @@ def get_shares(self, urls, url_column='url', date_column='date', platforms=('fac
clean_urls (bool, optional): clean the URLs from tracking parameters. Defaults to False.
save_ctapi_output (bool, optional): saves the original CT API output in rawdata/ folder. Defaults to False.
id_column(str,optional): name of the column wherre the id of each URL is stored.
remove_days(int,optional): remove shares performed more than X days from first share
Raises:
Exception: [description]
Exception: [description]
Expand Down Expand Up @@ -153,7 +154,10 @@ def get_shares(self, urls, url_column='url', date_column='date', platforms=('fac


# remove shares performed more than one week from first share
df_full = df_full.loc[(df_full.index <= df_full.index.min()+ pd.Timedelta('7 day'))]
if remove_days:
# ex: '7 day'
days = f"{remove_days} day"
df_full = df_full.loc[(df_full.index <= df_full.index.min()+ pd.Timedelta(days))]

# concat data results in dataframe
ct_shares_df = ct_shares_df.append(df_full, ignore_index=True)
Expand Down

0 comments on commit 2e512e6

Please sign in to comment.