Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/upload to s3 #181

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for information related to developing the
each set of changes to `main` atomic and as a side effect naturally encourages small
well defined PR's.

## Introduction to Remote Databases
### AWS S3
1. Pre-requisites
* Obtain an AWS account for AICS. Please contact the IT team or the code owner.
* Generate an `aws_access_key_id` and `aws_secret_access_key` in your AWS account.

2. Step-by-step Guide
* Download and install the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)
* Configure AWS CLI by running `aws configure`, then enter your credentials as prompted.
* Ensure that Boto3, the AWS SDK for Python is installed and included in the requirements section of `setup.py`.

### Firebase Firestore
1. Step-by-step Guide
* Create a Firebase project in test mode with your google account, select `firebase_admin` as the SDK. [Firebase Firestore tutorial](https://firebase.google.com/docs/firestore)
* Generate a new private key by navigating to "Project settings">"Service account" in the project's dashboard.

**MIT license**

78 changes: 78 additions & 0 deletions cellpack/autopack/AWSHandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging
import boto3
from botocore.exceptions import ClientError
from pathlib import Path


class AWSHandler(object):
"""
Handles all the AWS S3 operations
"""

def __init__(
self,
bucket_name,
sub_folder_name=None,
region_name=None,
):
self.bucket_name = bucket_name
self.folder_name = sub_folder_name
session = boto3.Session()
self.s3_client = session.client(
"s3",
endpoint_url=f"https://s3.{region_name}.amazonaws.com",
region_name=region_name,
)

def get_aws_object_key(self, object_name):
if self.folder_name is not None:
object_name = self.folder_name + object_name
else:
object_name = object_name
return object_name

def upload_file(self, file_path):
"""Upload a file to an S3 bucket

:param file_path: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_path is used
:return: True if file was uploaded, else False
"""

file_name = Path(file_path).stem

object_name = self.get_aws_object_key(file_name)
# Upload the file
try:
self.s3_client.upload_file(file_path, self.bucket_name, object_name)
self.s3_client.put_object_acl(
ACL="public-read", Bucket=self.bucket_name, Key=object_name
)

except ClientError as e:
logging.error(e)
return False
return file_name

def create_presigned_url(self, object_name, expiration=3600):
"""Generate a presigned URL to share an S3 object

:param object_name: string
:param expiration: Time in seconds for the presigned URL to remain valid
:return: Presigned URL as string. If error, returns None.
"""
object_name = self.get_aws_object_key(object_name)
# Generate a presigned URL for the S3 object
try:
url = self.s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": self.bucket_name, "Key": object_name},
ExpiresIn=expiration,
)
except ClientError as e:
logging.error(e)
return None
# The response contains the presigned URL
# https://{self.bucket_name}.s3.{region}.amazonaws.com/{object_key}
return url
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

from cellpack.autopack.utils import deep_merge

from google.cloud.exceptions import NotFound

from cellpack.autopack import FirebaseHandler


class DataDoc(object):
def __init__(
Expand Down Expand Up @@ -124,7 +128,7 @@ def resolve_local_regions(self, local_data, recipe_data, db):
Recursively resolves the regions of a composition from local data.
Restructure the local data to match the db data.
"""
unpack_recipe_data = DBRecipeHandler.prep_data_for_db(recipe_data)
unpack_recipe_data = DBHandler.prep_data_for_db(recipe_data)
prep_recipe_data = ObjectDoc.convert_representation(unpack_recipe_data, db)
# `gradients` is a list, convert it to dict for easy access and replace
CompositionDoc.gradient_list_to_dict(prep_recipe_data)
Expand Down Expand Up @@ -220,15 +224,14 @@ def update_reference(
doc, doc_ref = db.get_doc_by_id("composition", composition_id)
if doc is None:
return
_, new_item_ref = db.get_doc_by_id("composition", referring_comp_id)
update_ref_path = f"{db.db_name()}:{db.get_path_from_ref(new_item_ref)}"
if update_in_array:
db.update_elements_in_array(
doc_ref, index, update_ref_path, remove_comp_name
)
else:
_, new_item_ref = db.get_doc_by_id("composition", referring_comp_id)
update_ref_path = f"{db.db_name()}:{db.get_path_from_ref(new_item_ref)}"
if update_in_array:
db.update_elements_in_array(
doc_ref, index, update_ref_path, remove_comp_name
)
else:
db.update_reference_on_doc(doc_ref, index, update_ref_path)
db.update_reference_on_doc(doc_ref, index, update_ref_path)

def should_write(self, db, recipe_data):
"""
Expand All @@ -247,9 +250,14 @@ def should_write(self, db, recipe_data):
if db_docs and len(db_docs) >= 1:
for doc in db_docs:
db_data = db.doc_to_dict(doc)
shallow_match = True
for item in CompositionDoc.SHALLOW_MATCH:
if db_data[item] != local_data[item]:
print(db_data[item], local_data[item])
shallow_match = False
break
if not shallow_match:
continue
if local_data["regions"] is None and db_data["regions"] is None:
# found a match, so shouldn't write
return False, db.doc_id(doc)
Expand Down Expand Up @@ -322,7 +330,7 @@ def should_write(self, db):
# if there is repr in the obj doc from db
full_doc_data = ObjectDoc.convert_representation(doc, db)
# unpack objects to dicts in local data for comparison
local_data = DBRecipeHandler.prep_data_for_db(self.as_dict())
local_data = DBHandler.prep_data_for_db(self.as_dict())
difference = DeepDiff(full_doc_data, local_data, ignore_order=True)
if not difference:
return doc, db.doc_id(doc)
Expand All @@ -338,17 +346,21 @@ def should_write(self, db, grad_name):
docs = db.get_doc_by_name("gradients", grad_name)
if docs and len(docs) >= 1:
for doc in docs:
local_data = DBRecipeHandler.prep_data_for_db(db.doc_to_dict(doc))
local_data = DBHandler.prep_data_for_db(db.doc_to_dict(doc))
db_data = db.doc_to_dict(doc)
difference = DeepDiff(db_data, local_data, ignore_order=True)
if not difference:
return doc, db.doc_id(doc)
return None, None


class DBRecipeHandler(object):
class DBHandler(object):
firebase_handler = None

def __init__(self, db_handler):
self.db = db_handler
if isinstance(db_handler, FirebaseHandler):
DBHandler.firebase_handler = db_handler
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't be in this file. The idea of DBHandler is it's agnostic to what handler is being used. There should be nothing explicitly referencing firebase in this file

self.objects_to_path_map = {}
self.comp_to_path_map = {}
self.grad_to_path_map = {}
meganrm marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -377,20 +389,18 @@ def prep_data_for_db(data):
modified_data = {}
for key, value in data.items():
# convert 2d array to dict
if DBRecipeHandler.is_nested_list(value):
if DBHandler.is_nested_list(value):
flatten_dict = dict(zip([str(i) for i in range(len(value))], value))
modified_data[key] = DBRecipeHandler.prep_data_for_db(flatten_dict)
modified_data[key] = DBHandler.prep_data_for_db(flatten_dict)
# If the value is an object, we want to convert it to dict
elif isinstance(value, object) and "__dict__" in dir(value):
unpacked_value = vars(value)
modified_data[key] = unpacked_value
if isinstance(unpacked_value, dict):
modified_data[key] = DBRecipeHandler.prep_data_for_db(
unpacked_value
)
modified_data[key] = DBHandler.prep_data_for_db(unpacked_value)
# If the value is a dictionary, recursively convert its nested lists to dictionaries
elif isinstance(value, dict):
modified_data[key] = DBRecipeHandler.prep_data_for_db(value)
modified_data[key] = DBHandler.prep_data_for_db(value)
else:
modified_data[key] = value
return modified_data
Expand All @@ -400,7 +410,7 @@ def upload_data(self, collection, data, id=None):
If should_write is true, upload the data to the database
"""
# check if we need to convert part of the data(2d arrays and objs to dict)
modified_data = DBRecipeHandler.prep_data_for_db(data)
modified_data = DBHandler.prep_data_for_db(data)
if id is None:
name = modified_data["name"]
doc = self.db.upload_doc(collection, modified_data)
Expand Down Expand Up @@ -542,6 +552,15 @@ def upload_recipe(self, recipe_meta_data, recipe_data):
key = self.get_recipe_id(recipe_to_save)
self.upload_data("recipes", recipe_to_save, key)

def update_or_create_metadata(self, collection, id, data):
"""
If the input id exists, update the metadata. If not, create a new file.
"""
try:
self.db.update_doc(collection, id, data)
except NotFound:
self.db.set_doc(collection, id, data)

def prep_db_doc_for_download(self, db_doc):
"""
convert data from db and resolve references.
Expand Down
18 changes: 18 additions & 0 deletions cellpack/autopack/FirebaseHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def get_creds():
creds = FirebaseHandler.write_creds_path()
return creds["firebase"]

@staticmethod
def get_username():
creds = read_json_file("./.creds")
try:
return creds["username"]
except KeyError:
raise ValueError("No username found in .creds file")

def db_name(self):
return self.name

Expand All @@ -54,6 +62,10 @@ def doc_id(doc):
def create_path(collection, doc_id):
return f"firebase:{collection}/{doc_id}"

@staticmethod
def create_timestamp():
return firestore.SERVER_TIMESTAMP

@staticmethod
def get_path_from_ref(doc):
return doc.path
Expand All @@ -66,6 +78,12 @@ def get_collection_id_from_path(path):
id = components[1]
return collection, id

def update_doc(self, collection, id, data):
doc_ref = self.db.collection(collection).document(id)
doc_ref.update(data)
print(f"successfully updated to path: {doc_ref.path}")
return doc_ref

@staticmethod
def update_reference_on_doc(doc_ref, index, new_item_ref):
doc_ref.update({index: new_item_ref})
Expand Down
11 changes: 5 additions & 6 deletions cellpack/autopack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import re
import shutil
from os import path, environ
import pwd
import getpass
from pathlib import Path
import urllib.request as urllib
from collections import OrderedDict
Expand All @@ -50,7 +50,7 @@

from cellpack.autopack.interface_objects.meta_enum import MetaEnum
from cellpack.autopack.FirebaseHandler import FirebaseHandler
from cellpack.autopack.DBRecipeHandler import DBRecipeHandler
from cellpack.autopack.DBHandler import DBHandler
from cellpack.autopack.loaders.utils import read_json_file, write_json_file


Expand Down Expand Up @@ -392,9 +392,8 @@ def load_file(filename, destination="", cache="geometries", force=None):
# command example: `pack -r firebase:recipes/peroxisomes_surface_gradient_v-linear -c examples/packing-configs/peroxisome_packing_config.json`
if database_name == "firebase":
recipe_id = file_path.split("/")[-1]
db = FirebaseHandler()
db_doc, _ = db.get_doc_by_id(collection="recipes", id=recipe_id)
db_handler = DBRecipeHandler(db)
db_handler = DBHandler(FirebaseHandler())
db_doc, _ = db_handler.db.get_doc_by_id(collection="recipes", id=recipe_id)
downloaded_recipe_data = db_handler.prep_db_doc_for_download(db_doc)
return downloaded_recipe_data, database_name
else:
Expand Down Expand Up @@ -542,7 +541,7 @@ def clearCaches(*args):


def write_username_to_creds():
username = pwd.getpwuid(os.getuid())[0]
username = getpass.getuser()
creds = read_json_file("./.creds")
if creds is None or "username" not in creds:
creds = {}
Expand Down
Loading
Loading