Skip to content

Commit

Permalink
refactor: add scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
kopardev committed Jul 19, 2024
1 parent 9209f8b commit 342e04d
Show file tree
Hide file tree
Showing 18 changed files with 546 additions and 0 deletions.
13 changes: 13 additions & 0 deletions assets/make_readme/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Use an official Python runtime as a parent image
FROM python:3.11-slim

# Set the working directory in the container
WORKDIR /app

# Update the package list and install bash
RUN apt-get update && \
apt-get install -y bash && \
apt-get clean

# Install the required Python packages
RUN pip install --no-cache-dir pandas requests python-dateutil
9 changes: 9 additions & 0 deletions assets/make_readme/about_us.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

## About Us

- 👋 Hi, we're the [**@CCBR**](https://bioinformatics.ccr.cancer.gov/ccbr/), a group of bioinformatics analysts and engineers
- 📖 We build flexible, reproducible, workflows for next-generation sequencing data
- :bulb: We [collaborate](https://abcs-amp.nih.gov/project/request/CCBR/) with [CCR](https://ccr.cancer.gov/) PIs
- 📫 You can reach us at [[email protected]](mailto:[email protected])
- 🏁 Check out our [release history](#release-history)
- :link: Our [Zenodo](https://zenodo.org/communities/ccbr) community
57 changes: 57 additions & 0 deletions assets/make_readme/add_toc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import re
import argparse

def extract_headers(markdown_content):
"""
Extract headers from the markdown content.
"""
headers = re.findall(r'^(#{1,6})\s*(.*)', markdown_content, re.MULTILINE)
return headers

def generate_toc(headers):
"""
Generate the Table of Contents (TOC) from the headers.
"""
toc_lines = ["## Table of Contents"]
for header in headers:
level = len(header[0])
title = header[1].strip()
anchor = title.lower().replace(' ', '-').replace('.', '')
toc_lines.append(f"{' ' * (level - 1)}- [{title}](#{anchor})")
return '\n'.join(toc_lines)

def insert_toc(markdown_content, toc):
"""
Insert TOC into the markdown content after the first header.
"""
toc_placeholder = "<!-- TOC -->"
if toc_placeholder in markdown_content:
updated_content = markdown_content.replace(toc_placeholder, toc)
else:
first_header_pos = markdown_content.find('\n#')
if first_header_pos == -1:
first_header_pos = 0
updated_content = markdown_content[:first_header_pos] + toc + '\n\n' + markdown_content[first_header_pos:]
return updated_content

def main():
parser = argparse.ArgumentParser(description='Add a Table of Contents (TOC) to a Markdown file.')
parser.add_argument('--input', '-i', required=True, help='Input Markdown file')
parser.add_argument('--output', '-o', required=True, help='Output Markdown file')

args = parser.parse_args()

with open(args.input, 'r') as f:
markdown_content = f.read()

headers = extract_headers(markdown_content)
toc = generate_toc(headers)
updated_content = insert_toc(markdown_content, toc)

with open(args.output, 'w') as f:
f.write(updated_content)

print(f"TOC added to {args.output}")

if __name__ == "__main__":
main()
5 changes: 5 additions & 0 deletions assets/make_readme/back_to_top.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

<hr>
<p align="center">
<a href="##table-of-contents">Back to Top</a>
</p>
2 changes: 2 additions & 0 deletions assets/make_readme/banner.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

[<img src="https://raw.githubusercontent.com/CCBR/.github/main/img/ccbrbanner.png">](https://bioinformatics.ccr.cancer.gov/ccbr/)
20 changes: 20 additions & 0 deletions assets/make_readme/ccbrpipeliner_release_history.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

## Release History

`module load ccbrpipeliner` loads default release of ccbrpipeliner. Each release comprises of a unique combination of the version numbers of the different pipelines offered as part of the ccbrpipeliner suite.

| Release | Tool versions | Released on | Decommissioned on |
| --- | --- | --- | --- |
| 1 | RENEE v2.1 <sup>@#</sup> | July, 10th 2023 | July, 14th 2023 |
| 2 | RENEE v2.2 <sup>@#</sup> | July, 14th 2023 | September, 5th 2023 |
| 3 | RENEE v2.2 <sup>@#</sup>, XAVIER v2.0 <sup>@</sup>| July, 21st 2023 | - |
| 4 | RENEE v2.5 <sup>@#</sup>, XAVIER v3.0 <sup>@#</sup>| September, 5th 2023 | - |
| 5 | RENEE v2.5 <sup>@#</sup>, XAVIER v3.0 <sup>@#</sup>, CARLISLE v2.4 <sup>@</sup>, CHAMPAGNE v0.2 <sup>@</sup>, CRUISE v0.1 <sup>@</sup>, spacesavers2 v0.10 <sup>@</sup>, permfix v0.6 <sup>@</sup> | October, 27th 2023 | - |
| 6<sup>*</sup> | RENEE v2.5 <sup>@#</sup>, XAVIER v3.0 <sup>@#</sup>, CARLISLE v2.4 <sup>@</sup>, CHAMPAGNE v0.3 <sup>@</sup>, CRUISE v0.1 <sup>@</sup>, ASPEN v1.0 <sup>@</sup>, spacesavers2 v0.12 <sup>@</sup>, permfix v0.6 <sup>@</sup> | February, 29th 2024 | - |

>
> <sup>*</sup> = Current DEFAULT version on BIOWULF
>
> <sup>@</sup> = CLI available
>
> <sup>#</sup> = GUI available
4 changes: 4 additions & 0 deletions assets/make_readme/citation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

## Citation

Most of our end-to-end pipelines which have been used in published research work have been made available to the entire bioinformatics community via a Zenodo DOI. Please feel free to visit our [Zenodo community page](https://zenodo.org/communities/ccbr). And if you use our pipelines, don't forget to cite us!
136 changes: 136 additions & 0 deletions assets/make_readme/get_per_user_commits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import requests
import os
import pandas as pd
from collections import defaultdict
from datetime import datetime, timedelta

# Replace these with your GitHub token and organization name
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
ORG_NAME = 'CCBR'
# ORG_NAME = 'CCRGeneticsBranch'
# ORG_NAME = 'NIDAP-Community'
# ORG_NAME = 'NCI-VB'

headers = {
'Accept': 'application/vnd.github.v3+json',
'Authorization': f'token {GITHUB_TOKEN}'
}

def get_repos(org_name):
repos = []
page = 1
while True:
response = requests.get(f'https://api.github.com/orgs/{org_name}/repos?per_page=100&page={page}', headers=headers)
if response.status_code != 200:
break
repos.extend(response.json())
if len(response.json()) < 100:
break
page += 1
return repos

def get_members(org_name):
members = set()
page = 1
while True:
response = requests.get(f'https://api.github.com/orgs/{org_name}/members?per_page=100&page={page}', headers=headers)
if response.status_code != 200:
break
page_members = response.json()
if not page_members:
break
for member in page_members:
members.add(member['login'])
page += 1
return members

def get_outside_collaborators(repo_full_name):
collaborators = set()
page = 1
while True:
response = requests.get(f'https://api.github.com/repos/{repo_full_name}/collaborators?affiliation=outside&per_page=100&page={page}', headers=headers)
if response.status_code != 200:
break
outside_collaborators = response.json()
if not outside_collaborators:
break
for collaborator in outside_collaborators:
collaborators.add(collaborator['login'])
page += 1
return collaborators

def get_commits_count(repo_full_name, members_and_collaborators):
commits_count_by_user = defaultdict(lambda: {'total': 0, 'last_month': 0, 'last_6_months': 0})
page = 1
today = datetime.utcnow()
one_month_ago = today - timedelta(days=30)
six_months_ago = today - timedelta(days=180)

while True:
response = requests.get(f'https://api.github.com/repos/{repo_full_name}/commits?per_page=100&page={page}', headers=headers)
if response.status_code != 200:
break
commits = response.json()
if not commits:
break

for commit in commits:
author_login = commit['author']['login'] if commit['author'] else 'unknown'
commit_date_str = commit['commit']['author']['date']
commit_date = datetime.strptime(commit_date_str, '%Y-%m-%dT%H:%M:%SZ')

if author_login != 'unknown' and author_login in members_and_collaborators:
commits_count_by_user[author_login]['total'] += 1
if commit_date >= one_month_ago:
commits_count_by_user[author_login]['last_month'] += 1
if commit_date >= six_months_ago:
commits_count_by_user[author_login]['last_6_months'] += 1

page += 1

return commits_count_by_user

def main():
members = get_members(ORG_NAME)
repos = get_repos(ORG_NAME)

# Collect outside collaborators
outside_collaborators = set()
for repo in repos:
repo_full_name = repo['full_name']
# print(f"Fetching outside collaborators for repository: {repo_full_name}")
outside_collaborators.update(get_outside_collaborators(repo_full_name))

members_and_collaborators = members.union(outside_collaborators)

user_commits = defaultdict(lambda: {'total': 0, 'last_month': 0, 'last_6_months': 0})

for repo in repos:
repo_full_name = repo['full_name']
# print(f"Processing repository: {repo_full_name}")
commits_count_by_user = get_commits_count(repo_full_name, members_and_collaborators)
for user, counts in commits_count_by_user.items():
user_commits[user]['total'] += counts['total']
user_commits[user]['last_month'] += counts['last_month']
user_commits[user]['last_6_months'] += counts['last_6_months']

# Convert to a DataFrame
data = []
for user, counts in user_commits.items():
data.append([
user,
counts['total'],
counts['last_month'],
counts['last_6_months']
])

df = pd.DataFrame(data, columns=['User', 'Total Commits', 'Commits in Last Month', 'Commits in Last 6 Months'])
df = df[df['User'] != 'unknown'] # Remove 'unknown' users
df = df.sort_values(by='Total Commits', ascending=False).head(10) # Top 10 users

# Create a Markdown table
markdown_table = df.to_markdown(index=False, headers=['User', 'Total Commits', 'Commits in Last Month', 'Commits in Last 6 Months'])
print(markdown_table)

if __name__ == "__main__":
main()
91 changes: 91 additions & 0 deletions assets/make_readme/get_recent_releases_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import requests
import argparse
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Replace these with your GitHub token and organization name
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
ORG_NAME = 'CCBR'

headers = {
'Accept': 'application/vnd.github.v3+json',
'Authorization': f'token {GITHUB_TOKEN}'
}

def get_date_n_months_ago(n_months):
today = datetime.now()
n_months_ago = today - relativedelta(months=n_months)
return n_months_ago.strftime('%Y-%m-%d')

def get_repos(org_name):
repos = []
page = 1
while True:
response = requests.get(f'https://api.github.com/orgs/{org_name}/repos?per_page=100&page={page}', headers=headers)
if response.status_code != 200:
break
repos.extend(response.json())
if len(response.json()) < 100:
break
page += 1
return repos

def format_date(date_str):
try:
# Parse the date string and format it as YYYY-MM-DD
date_obj = datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%SZ')
return date_obj.strftime('%Y-%m-%d')
except ValueError:
return 'Unknown date'

def get_latest_release(repo_full_name):
response = requests.get(f'https://api.github.com/repos/{repo_full_name}/releases/latest', headers=headers)
if response.status_code == 200:
return response.json()
return None

def get_open_issues_count(repo_full_name):
response = requests.get(f'https://api.github.com/repos/{repo_full_name}/issues?state=open', headers=headers)
if response.status_code == 200:
return len(response.json())
return 0

def main():
parser = argparse.ArgumentParser(description='Fetch GitHub repository releases.')
parser.add_argument('--nmonths', type=int, default=0, help='Number of months to filter releases. If not provided, shows all releases.')
args = parser.parse_args()

repos = get_repos(ORG_NAME)
releases = []
cutoff_date = get_date_n_months_ago(args.nmonths)

for repo in repos:
latest_release = get_latest_release(repo['full_name'])
open_issues_count = get_open_issues_count(repo['full_name'])
if latest_release:
repo_name = repo['name']
release_name = latest_release['name']
release_url = latest_release['html_url']
release_date = latest_release['published_at']
formatted_date = format_date(release_date)
if formatted_date != 'Unknown date' and (args.nmonths == 0 or formatted_date >= cutoff_date):
releases.append({
'Repo Name': f"[{repo_name}](https://github.com/{ORG_NAME}/{repo_name})",
'Release Name': f"[{release_name}]({release_url})",
'Release Date': formatted_date,
'Open Issues': open_issues_count
})

# Sort releases by date in descending order
sorted_releases = sorted(releases, key=lambda x: x['Release Date'], reverse=True)

# Create a DataFrame for Markdown table
df = pd.DataFrame(sorted_releases)
markdown_table = df.to_markdown(index=False, headers=['Repo Name', 'Release Name', 'Release Date', 'Open Issues'])

# Print Markdown table
print(markdown_table)

if __name__ == "__main__":
main()
Loading

0 comments on commit 342e04d

Please sign in to comment.