Skip to content

Commit

Permalink
Merge pull request #13 from UPB-SS1/crowdtangle_optional_parameters
Browse files Browse the repository at this point in the history
Crowdtangle optional parameters
  • Loading branch information
JoseRZapata authored May 1, 2021
2 parents dd407c8 + 9bdb52a commit 646be79
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 7 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ CLSB refers to a specific coordinated activity performed by a network of Faceboo
To identify such networks, we designed, implemented and tested an algorithm that detects sets of Facebook public entities which performed CLSB by (1) estimating a time threshold that identifies URLs shares performed by multiple distinguished entities within an unusually short period of time (as compared to the entire dataset), and (2) grouping the entities that repeatedly shared the same news story within this coordination interval. The rationale is that, while it may be common that several entities share the same URLs, it is unlikely, unless a consistent coordination exists, that this occurs within the time threshold and repeatedly.

## Installation

[https://pypi.org/project/pycoornet/](https://pypi.org/project/pycoornet/)

```sh
pip install pycoornet
```
Expand All @@ -36,7 +39,7 @@ def main():
links_df = pd.read_csv('samples/sample_source_links.csv')
# Init CrowdTangle with api key
crowd_tangle = CrowdTangle("abc123def345")
ct_df = crowd_tangle.get_shares(urls=links_df, url_column='clean_url', date_column='date',clean_urls=True, platforms='facebook', sleep_time=1)
ct_df = crowd_tangle.get_shares(urls=links_df, url_column='clean_url', date_column='date',clean_urls=True, platforms='facebook', sleep_time=30)
shared = Shared()
crowtangle_shares_df, shares_graph, q = shared.coord_shares(ct_df, clean_urls=True)

Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

setup(
name="pycoornet",
version="0.2.2",
version="0.2.3",
description="Using Python Given a set of URLs, this packages detects coordinated link sharing behavior on social media and outputs the network of entities that performed such behaviour.",
long_description=long_description,
long_description_content_type="text/markdown",
author = 'Camilo Andres Soto Montoya, Jose R. Zapata',
author_email = '[email protected], ',
author_email = '[email protected], [email protected]',

url="https://github.com/UPB-SS1/PyCooRnet",
packages=find_packages(where='src' ,exclude=["tests", "*.test", "*.tes.*"]),
package_dir={
Expand Down
16 changes: 13 additions & 3 deletions src/pycoornet/crowdtangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def __init__(self, api_key):
raise Exception('Crowdtangle Api Token is missing')
self.api_key = api_key

def get_shares(self, urls, url_column='url', date_column='date', platforms=('facebook', 'instagram'), nmax=500, sleep_time=20, clean_urls=False, save_ctapi_output=False):
def get_shares(self, urls, url_column='url', date_column='date', platforms=('facebook', 'instagram'),
nmax=500, sleep_time=20, clean_urls=False, save_ctapi_output=False, id_column=None, remove_days=None):
""" Get the URLs shares from CrowdTangle from a list of URLs with publish datetime
Args:
Expand All @@ -38,7 +39,8 @@ def get_shares(self, urls, url_column='url', date_column='date', platforms=('fac
depending on the assigned API rate limit. Defaults to 20.
clean_urls (bool, optional): clean the URLs from tracking parameters. Defaults to False.
save_ctapi_output (bool, optional): saves the original CT API output in rawdata/ folder. Defaults to False.
id_column(str,optional): name of the column wherre the id of each URL is stored.
remove_days(int,optional): remove shares performed more than X days from first share
Raises:
Exception: [description]
Exception: [description]
Expand Down Expand Up @@ -146,8 +148,16 @@ def get_shares(self, urls, url_column='url', date_column='date', platforms=('fac
df_full['date'] = pd.to_datetime(df_full['date'])
df_full = df_full.set_index('date', drop=False)

# if id column is specified
if id_column:
df_full["id_column"] = urls.iloc[i, :].loc[id_column]


# remove shares performed more than one week from first share
df_full = df_full.loc[(df_full.index <= df_full.index.min()+ pd.Timedelta('7 day'))]
if remove_days:
# ex: '7 day'
days = f"{remove_days} day"
df_full = df_full.loc[(df_full.index <= df_full.index.min()+ pd.Timedelta(days))]

# concat data results in dataframe
ct_shares_df = ct_shares_df.append(df_full, ignore_index=True)
Expand Down
4 changes: 3 additions & 1 deletion tests/test_pycoornet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ def sample_source_df():
def test_crowdtangle(crowd_token, sample_source_df):
crowd_tangle = CrowdTangle(crowd_token)
shares_df = crowd_tangle.get_shares(urls=sample_source_df, url_column='clean_url', date_column='date',
clean_urls=True, platforms='facebook', sleep_time=1)
clean_urls=True, platforms='facebook', sleep_time=1,
id_column = 'url_rid')
if shares_df.shape[0] > 0:

assert True
else:
assert False
Expand Down

0 comments on commit 646be79

Please sign in to comment.