Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MANUAL file expiry deletion script #1

Merged
merged 40 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
96c5c1f
Base folder generation and expiry date checker
cehune Feb 17, 2024
32375ae
Implemented file deletion - tested locally
cehune Feb 17, 2024
b9c9a30
Implemented unit tests for file expiry tool
cehune Feb 17, 2024
576867c
Cleaned up starting code
cehune Feb 17, 2024
f0a3298
Updating days for expiry
cehune Feb 17, 2024
2f777d9
Creating Base script for file deletion
cehune Feb 17, 2024
22209a5
Updating to only delete top level folders
cehune Feb 18, 2024
0fe7230
Separated File Deletion Function between searching and deleting
cehune Feb 18, 2024
2eb6340
Update dependencies
cehune Feb 18, 2024
b6a7fba
Updated changes, only notifys top level folders but checks all files
cehune Feb 21, 2024
f6b58c4
Fixed Notifications
cehune Feb 21, 2024
b35fee4
Remove pycache
cehune Feb 21, 2024
848f266
Updated Utils
cehune Feb 29, 2024
503ce9c
Updated utils with temporary test functions
cehune Mar 4, 2024
fb5d0ef
Updated Utils with test functions
cehune Mar 4, 2024
ad75a9a
updating utils with testing functions
cehune Mar 4, 2024
e4fa991
Manually Tested
cehune Mar 21, 2024
8d5646f
Updated function documentation
cehune Mar 21, 2024
fba50a5
Implemented accumulation of informaiton and updated unit tests
cehune Mar 29, 2024
069049b
Updating Comments
cehune Mar 29, 2024
d02c9e1
Update README
cehune Mar 29, 2024
c18cd1e
Remove INIT files
cehune Mar 29, 2024
ed7af0f
Upating file information dictionary
cehune Apr 2, 2024
d5cee78
Update main.py
cehune Apr 2, 2024
cfd08db
Implemented Typer application
cehune Apr 9, 2024
2deb4fc
Cleaning and fixing unit tests
cehune May 1, 2024
bb31cda
Adding YML for testing CI Pipeline
cehune May 1, 2024
deb64f9
Removing Unecessary Files
cehune May 1, 2024
3119581
Updating YML Workflow CI file for Typos
cehune May 1, 2024
7cbd8d6
Adding Requirements.txt
cehune May 1, 2024
6f68ffd
Removing Hard coded paths
cehune May 1, 2024
8b750c2
Updated Unit tests
cehune May 2, 2024
94e1aff
Updating dependency names in unit test functions
cehune May 2, 2024
53aae90
Split functions into seperate files
cehune May 27, 2024
f7d767f
update pipeline yml
cehune May 27, 2024
cd25e5f
Cleaning Directory
cehune May 27, 2024
e48f9ac
Optimizing imports for main.py
cehune May 27, 2024
9189aef
Fixing unit tests
cehune May 27, 2024
c5f5350
Fixing unit test file paths for pipeline yml
cehune May 27, 2024
133b942
Updating .gitignore and README
cehune Jun 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/venv
/infra_file_auto_expiry/source/__pycache__
cehune marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 14 additions & 0 deletions file_expiry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
cehune marked this conversation as resolved.
Show resolved Hide resolved

set -e

PYTHON=$(which python) # get python location
MAIN_SCRIPT=$(realpath "infra_file_auto_expiry/source/main.py")
STORAGE_FOLDER=""

echo "Automatic File Expiry Tool"
echo "Removes expired files - unused for 30 days or more"
echo "Python location is: $PYTHON"
echo "Running $MAIN_SCRIPT"

(crontab -l ; echo "0 0 * * * $PYTHON $MAIN_SCRIPT \"$STORAGE_FOLDER\"") | crontab -
Empty file.
Empty file.
17 changes: 17 additions & 0 deletions infra_file_auto_expiry/source/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from utils import *
ben-z marked this conversation as resolved.
Show resolved Hide resolved
import sys


def main(base_folder):
delete_expired_files(base_folder, temp_folder, 0)

if __name__ == "__main__":
if len(sys.argv) != 3:

print("Usage: python file_expiry.py <folder to scan> <temp>")
sys.exit(1)

base_folder = sys.argv[1]
temp_folder = sys.argv[2]

main(base_folder=base_folder)
96 changes: 96 additions & 0 deletions infra_file_auto_expiry/source/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import pwd
import shutil
import os
import time

def is_expired_file(path, days_for_expire=30):
"""
Checks the last time a file or folder has been accessed. If it has not
been accessed in the days specified, then return True. False if otherwise.

string days: The full path to the file that is being checked
int days: The amount of days since last access that indicates that a file
has expired.
"""
current_time = time.time()
cehune marked this conversation as resolved.
Show resolved Hide resolved
# get last access time (includes change and modification times)
file_stat = os.stat(path)
cehune marked this conversation as resolved.
Show resolved Hide resolved

# compare days for expiry (in seconds) to the time since last access
return ((days_for_expire * 24 * 60 * 60) < (current_time - file_stat.st_atime) and
(days_for_expire * 24 * 60 * 60) < (current_time - file_stat.st_ctime) and
(days_for_expire * 24 * 60 * 60) < (current_time - file_stat.st_mtime))

def is_expired_folder(folder_path, days_for_expire=30):
"""
Goes through all files in a folder, and deletes the ones that
cehune marked this conversation as resolved.
Show resolved Hide resolved
have expired

string base_folder: The folder containing the files to delete
"""
if not os.path.isdir(folder_path):
print("Bad folder path:", folder_path)
return False
cehune marked this conversation as resolved.
Show resolved Hide resolved

all_files_expired = True
for entry in os.scandir(folder_path):
if entry.is_dir():
if not is_expired_folder(entry.path, days_for_expire):
return False
elif entry.is_file() and not is_expired(entry.path, days_for_expire):
all_files_expired = False
return all_files_expired
cehune marked this conversation as resolved.
Show resolved Hide resolved

def is_expired(path, days_for_expire=30):
if os.path.isfile(path):
return is_expired_file(path, days_for_expire)
if os.path.isdir(path):
return is_expired_folder(path, days_for_expire)

return False
cehune marked this conversation as resolved.
Show resolved Hide resolved

def get_file_creator(path):
cehune marked this conversation as resolved.
Show resolved Hide resolved
"""
Gets the file creators username

ls -l filepath command on linux returns something like this:
-rw-rw-r-- 1 machung machung 4 Feb 17 05:14 /home/machung/test.txt
So we select the file owner username from this command output.

string file_path: The absolute path of the file
"""
try:
# Get the UID of the file or directory owner
uid = os.stat(path).st_uid
# Get the username associated with the UID
username = pwd.getpwuid(uid).pw_name
return username
except FileNotFoundError:
print(f"Error: File '{path}' not found.")
return None
except Exception as e:
print(f"Error: {e}")
return None

def notify_file_creator(file_creator):
"""
TODO: implement proper notification system
"""
print(f"Deleting file by ", file_creator)

def scan_folder_for_expired(folder_path, days_for_expire=30):
for entry in os.scandir(folder_path):
if is_expired(entry.path, days_for_expire):
cehune marked this conversation as resolved.
Show resolved Hide resolved
yield entry.path

def delete_expired_files(folder_path, temp_folder, days_for_expire=30):
if not os.path.isdir(folder_path) or not os.path.isdir(temp_folder):
print("Base folder does not exist ")
return

for path in scan_folder_for_expired(folder_path, days_for_expire):
notify_file_creator(get_file_creator(path))
shutil.move(path, temp_folder)

shutil.rmtree(temp_folder)

Empty file.
49 changes: 49 additions & 0 deletions infra_file_auto_expiry/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import unittest
import os
import sys
from unittest.mock import patch

module_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__))
)
sys.path.append(module_path)

from source.utils import *


class TestUtils(unittest.TestCase):
@patch("pwd.getpwuid")
@patch("os.stat")
def test_get_file_creator(self, patch_os, patch_pwd):
"""
Tests retrieving the user name of a file owner
"""
# Successfully retrieves file owner
patch_os.return_value.st_uid=0
patch_pwd.return_value.pw_name="tester_account"
self.assertEqual(get_file_creator("/home/machung/test.txt"), "tester_account")


@patch('os.stat')
@patch('time.time')
def test_is_expired(self, patch_time, patch_os):
"""
Tests the is_expired_file function
"""
time_to_expire_days = 30
patch_os.return_value.st_atime = 5 * 24 * 60 * 60
patch_time.return_value = 50 * 24 * 60 * 60

# Days since last access is 50 - 5 = 45 > 30
# The file should be expired
self.assertTrue(is_expired("test_name", time_to_expire_days))

patch_time.return_value = 10 * 24 * 60 * 60
# Days since last access is 10 - 5 = 5 < 30
# The file should not be expired
self.assertFalse(is_expired("test_name", time_to_expire_days))

def test_notify_file_creator(self):
pass
if __name__ == '__main__':
unittest.main()