Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/firebase upload gradients #170

Merged
merged 44 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
c78f74d
skeleton of gradient upload
rugeli Apr 25, 2023
faf1b53
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli May 1, 2023
9b28ad4
reconstruct gradient class
rugeli May 1, 2023
d50ba40
fix gradient should write
rugeli May 1, 2023
87974e8
Merge branch 'feature/firebase-upload-test' of https://github.com/mes…
rugeli May 3, 2023
c2b7177
pulled mock db for gradient testing
rugeli May 3, 2023
069b0ce
test gradient doc
rugeli May 3, 2023
2010dbf
upload gradient tests
rugeli May 4, 2023
8af2102
fix lint
rugeli May 5, 2023
dcc4d6a
fix test in db handler for gradient upload
rugeli May 5, 2023
d977b5a
[wip] get db data in load_file
rugeli May 11, 2023
2c1e498
delete as_dict in gradientDoc
rugeli May 11, 2023
1f881c8
fix test
rugeli May 11, 2023
6ad0389
cleaned as_dict and tests in gradient doc
rugeli May 11, 2023
cdcc9ab
[wip] resolve reference
rugeli May 15, 2023
a1ac167
update grad path in obj doc
rugeli May 16, 2023
7ea2e59
comment
rugeli May 16, 2023
5a4146f
Merge branch 'feature/firebase-upload-gradients' of https://github.co…
rugeli May 16, 2023
68aecad
get grad data in downloaded_data
rugeli May 16, 2023
c1a346f
resolve reference
rugeli May 16, 2023
d9365d0
resolve gradient reference in local data
rugeli May 20, 2023
2abf7cc
resolve grad and pass surface gradient
rugeli May 23, 2023
40f7216
fixed gradient update in regions
rugeli May 23, 2023
fa87f0f
add tests for gradient reference
rugeli May 24, 2023
70c3165
linter
rugeli May 25, 2023
6dcad95
[wip] collect gradient and obj data for packing
rugeli May 26, 2023
c7b2f92
let load_file also return database_name
rugeli May 26, 2023
289760b
[wip] revert recipe draft
rugeli May 26, 2023
c5ea1c4
fix gradient in resolve_local_regions
rugeli Jun 2, 2023
9fc15df
remove name key in obj and grad
rugeli Jun 2, 2023
0728304
separate out resolve grad code
rugeli Jun 2, 2023
a4af8f6
clean downloading code from this branch
rugeli Jun 2, 2023
bb9abeb
linter
rugeli Jun 2, 2023
1e1610c
cleanup
rugeli Jun 2, 2023
c1aeea9
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Jun 13, 2023
20fb882
revert should_write check
rugeli Jun 13, 2023
39435a1
correct obj references in newly uploaded recipes
rugeli Jun 27, 2023
f24612d
Update cellpack/autopack/DBRecipeHandler.py
rugeli Jul 17, 2023
b07b1b5
Update cellpack/autopack/__init__.py
rugeli Jul 19, 2023
2d0859d
Merge pull request #174 from mesoscope/feature/resovle-grads-in-comp
rugeli Jul 28, 2023
3c09ce9
fixed test after the sub branch merged
rugeli Aug 2, 2023
4125043
lint
rugeli Aug 2, 2023
55859a2
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Aug 2, 2023
15a12f8
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Aug 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 82 additions & 9 deletions cellpack/autopack/DBRecipeHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ def as_dict(self):
data["regions"] = self.regions
return data

@staticmethod
def get_gradient_reference(downloaded_data, db):
if "gradient" in downloaded_data and db.is_reference(
downloaded_data["gradient"]
):
gradient_key = downloaded_data["gradient"]
downloaded_data["gradient"], _ = db.get_doc_by_ref(gradient_key)

@staticmethod
def get_reference_data(key_or_dict, db):
"""
Expand All @@ -66,12 +74,14 @@ def get_reference_data(key_or_dict, db):
if DataDoc.is_key(key_or_dict) and db.is_reference(key_or_dict):
key = key_or_dict
downloaded_data, _ = db.get_doc_by_ref(key)
CompositionDoc.get_gradient_reference(downloaded_data, db)
return downloaded_data, None
elif key_or_dict and isinstance(key_or_dict, dict):
object_dict = key_or_dict
if "object" in object_dict and db.is_reference(object_dict["object"]):
key = object_dict["object"]
downloaded_data, _ = db.get_doc_by_ref(key)
CompositionDoc.get_gradient_reference(downloaded_data, db)
return downloaded_data, key
return {}, None

Expand All @@ -96,19 +106,40 @@ def resolve_db_regions(self, db_data, db):
):
self.resolve_db_regions(downloaded_data, db)

@staticmethod
def gradient_list_to_dict(prep_recipe_data):
"""
Convert gradient list to dict for resolve_local_regions
"""
if "gradients" in prep_recipe_data and isinstance(
prep_recipe_data["gradients"], list
):
gradient_dict = {}
for gradient in prep_recipe_data["gradients"]:
gradient_dict[gradient["name"]] = gradient
prep_recipe_data["gradients"] = gradient_dict

def resolve_local_regions(self, local_data, recipe_data, db):
"""
Recursively resolves the regions of a composition from local data.
Restructure the local data to match the db data.
"""
unpack_recipe_data = DBRecipeHandler.prep_data_for_db(recipe_data)
prep_recipe_data = ObjectDoc.convert_representation(unpack_recipe_data, db)
# `gradients` is a list, convert it to dict for easy access and replace
CompositionDoc.gradient_list_to_dict(prep_recipe_data)
if "object" in local_data and local_data["object"] is not None:
if DataDoc.is_key(local_data["object"]):
key_name = local_data["object"]
else:
key_name = local_data["object"]["name"]
local_data["object"] = prep_recipe_data["objects"][key_name]
if "gradient" in local_data["object"] and isinstance(
local_data["object"]["gradient"], str
):
local_data["object"]["gradient"] = prep_recipe_data["gradients"][
local_data["object"]["gradient"]
]
for region_name in local_data["regions"]:
for index, key_or_dict in enumerate(local_data["regions"][region_name]):
if not DataDoc.is_key(key_or_dict):
Expand All @@ -121,6 +152,12 @@ def resolve_local_regions(self, local_data, recipe_data, db):
local_data["regions"][region_name][index][
"object"
] = prep_recipe_data["objects"][obj_item["name"]]
# replace gradient reference with gradient data
obj_data = local_data["regions"][region_name][index]["object"]
if "gradient" in obj_data and isinstance(obj_data["gradient"], str):
local_data["regions"][region_name][index]["object"][
"gradient"
] = prep_recipe_data["gradients"][obj_data["gradient"]]
else:
comp_name = local_data["regions"][region_name][index]
prep_comp_data = prep_recipe_data["composition"][comp_name]
Expand Down Expand Up @@ -209,14 +246,9 @@ def should_write(self, db, recipe_data):
if db_docs and len(db_docs) >= 1:
for doc in db_docs:
db_data = db.doc_to_dict(doc)
shallow_match = True
for item in CompositionDoc.SHALLOW_MATCH:
if db_data[item] != local_data[item]:
print(db_data[item], local_data[item])
shallow_match = False
break
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This break statement should probably be a return statement? It looks like it only breaks out of the inner loop right now.

Suggested change
break
return None, None

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I know it's confusing, the readability of this function is truly a concern for us. Let's address it during our group programming session this week.

if not shallow_match:
continue
if local_data["regions"] is None and db_data["regions"] is None:
# found a match, so shouldn't write
return False, db.doc_id(doc)
Expand Down Expand Up @@ -296,11 +328,29 @@ def should_write(self, db):
return None, None


class GradientDoc(DataDoc):
def __init__(self, settings):
super().__init__()
self.settings = settings

def should_write(self, db, grad_name):
docs = db.get_doc_by_name("gradients", grad_name)
if docs and len(docs) >= 1:
for doc in docs:
local_data = DBRecipeHandler.prep_data_for_db(db.doc_to_dict(doc))
db_data = db.doc_to_dict(doc)
difference = DeepDiff(db_data, local_data, ignore_order=True)
if not difference:
return doc, db.doc_id(doc)
return None, None


class DBRecipeHandler(object):
def __init__(self, db_handler):
self.db = db_handler
self.objects_to_path_map = {}
self.comp_to_path_map = {}
self.grad_to_path_map = {}

@staticmethod
def is_nested_list(item):
Expand Down Expand Up @@ -355,13 +405,34 @@ def upload_data(self, collection, data, id=None):
doc = self.db.set_doc(collection, id, modified_data)
return id, self.db.create_path(collection, id)

def upload_gradients(self, gradients):
for gradient in gradients:
gradient_name = gradient["name"]
gradient_doc = GradientDoc(settings=gradient)
_, doc_id = gradient_doc.should_write(self.db, gradient_name)
if doc_id:
print(f"gradients/{gradient_name} already exists in firestore")
self.grad_to_path_map[gradient_name] = self.db.create_path(
"gradients", doc_id
)
else:
_, grad_path = self.upload_data("gradients", gradient_doc.settings)
self.grad_to_path_map[gradient_name] = grad_path

def upload_objects(self, objects):
for obj_name in objects:
objects[obj_name]["name"] = obj_name
object_doc = ObjectDoc(name=obj_name, settings=objects[obj_name])
modify_objects = copy.deepcopy(objects)
# replace gradient name with path before uploading
if "gradient" in modify_objects[obj_name]:
grad_name = modify_objects[obj_name]["gradient"]
modify_objects[obj_name]["gradient"] = self.grad_to_path_map[grad_name]
object_doc = ObjectDoc(name=obj_name, settings=modify_objects[obj_name])
_, doc_id = object_doc.should_write(self.db)
if doc_id:
print(f"objects/{object_doc.name} is already in firestore")
obj_path = self.db.create_path("objects", doc_id)
self.objects_to_path_map[obj_name] = obj_path
else:
_, obj_path = self.upload_data("objects", object_doc.as_dict())
self.objects_to_path_map[obj_name] = obj_path
Expand Down Expand Up @@ -416,18 +487,20 @@ def get_recipe_id(self, recipe_data):
"""
recipe_name = recipe_data["name"]
recipe_version = recipe_data["version"]
key = f"{recipe_name}_v{recipe_version}"
key = f"{recipe_name}_v-{recipe_version}"
return key

def upload_collections(self, recipe_meta_data, recipe_data):
"""
Separate collections from recipe data and upload them to db
"""
recipe_to_save = copy.deepcopy(recipe_meta_data)
gradients = recipe_data.get("gradients")
objects = recipe_data["objects"]
compositions = recipe_data["composition"]
# TODO: test gradients recipes
# gradients = recipe_data.get("gradients")
# save gradients to db
if gradients:
self.upload_gradients(gradients)
# save objects to db
self.upload_objects(objects)
# save comps to db
Expand Down
2 changes: 2 additions & 0 deletions cellpack/autopack/FirebaseHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def update_elements_in_array(doc_ref, index, new_item_ref, remove_item):

@staticmethod
def is_reference(path):
if not isinstance(path, str):
return False
if path is None:
return False
if path.startswith("firebase:"):
Expand Down
3 changes: 2 additions & 1 deletion cellpack/autopack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ def is_remote_path(file_path):
@param file_path: str
"""
for ele in DATABASE_NAME:
return ele in file_path
if ele in file_path:
return True


def convert_db_shortname_to_url(file_location):
Expand Down
1 change: 1 addition & 0 deletions cellpack/autopack/loaders/migrate_v1_to_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def convert(old_recipe):
new_recipe["name"] = old_recipe["recipe"]["name"]
new_recipe["bounding_box"] = old_recipe["options"]["boundingBox"]
objects_dict = {}
# TODO: check if composition structure is correct
composition = {"space": {"regions": {}}}
if "cytoplasme" in old_recipe:
outer_most_region_array = []
Expand Down
21 changes: 19 additions & 2 deletions cellpack/tests/test_db_recipe_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,18 @@ def test_upload_objects():
data = {"test": {"test_key": "test_value"}}
object_doc = DBRecipeHandler(mock_db)
object_doc.upload_objects(data)

assert object_doc.objects_to_path_map == {"test": "firebase:objects/test_id"}


def test_upload_objects_with_gradient():
# the value of gradient in obj should be changed to the path instead of the name before uploading
data = {"test": {"test_key": "test_value", "gradient": "test_grad_name"}}
object_doc = DBRecipeHandler(mock_db)
object_doc.grad_to_path_map = {"test_grad_name": "firebase:gradients/test_id"}
object_doc.upload_objects(data)
assert data["test"]["gradient"] == "firebase:gradients/test_id"


def test_upload_compositions():
composition = {
"space": {"regions": {"interior": ["A"]}},
Expand Down Expand Up @@ -95,6 +103,15 @@ def test_upload_compositions():
}


def test_upload_gradients():
data = [{"name": "test_grad_name", "test_key": "test_value"}]
gradient_doc = DBRecipeHandler(mock_db)
gradient_doc.upload_gradients(data)
assert gradient_doc.grad_to_path_map == {
"test_grad_name": "firebase:gradients/test_id"
}


def test_get_recipe_id():
recipe_data = {
"name": "test",
Expand All @@ -103,7 +120,7 @@ def test_get_recipe_id():
"composition": {},
}
recipe_doc = DBRecipeHandler(mock_db)
assert recipe_doc.get_recipe_id(recipe_data) == "test_v1.0.0"
assert recipe_doc.get_recipe_id(recipe_data) == "test_v-1.0.0"


def test_upload_collections():
Expand Down
21 changes: 21 additions & 0 deletions cellpack/tests/test_gradient_doc.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are great, thanks for adding tests!

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from cellpack.autopack.DBRecipeHandler import GradientDoc
from cellpack.tests.mocks.mock_db import MockDB

mock_db = MockDB({})


def test_should_write_with_no_existing_doc():
gradient_doc = GradientDoc({"name": "test_grad_name", "test_key": "test_value"})
doc, doc_id = gradient_doc.should_write(mock_db, "test_grad_name")
assert doc_id is None
assert doc is None


def test_should_write_with_existing_doc():
existing_doc = {"name": "test_grad_name", "test_key": "test_value"}
mock_db.data = existing_doc
gradient_doc = GradientDoc({"name": "test_grad_name", "test_key": "test_value"})

doc, doc_id = gradient_doc.should_write(mock_db, "test_grad_name")
assert doc_id is not None
assert doc is not None
Loading