From 6b2756dbd2f20f339a454cb50441385e58cdd79c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 12 Mar 2020 16:56:52 +0000 Subject: [PATCH 1/5] Rename `add-many` / `add-one` CLI commands fixes #24 --- README.md | 88 ++++++++++++------------- jupyter_cache/cli/commands/cmd_cache.py | 4 +- jupyter_cache/cli/commands/cmd_stage.py | 4 +- tests/make_cli_readme.py | 7 +- 4 files changed, 54 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 5b1a598..405e8ee 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ to come ... ## Example CLI usage - + From the checked-out repository folder: @@ -83,13 +83,13 @@ Options: --help Show this message and exit. Commands: - add-many Cache notebook(s) that have already been executed. - add-one Cache a notebook, with possible artefact files. - cat-artifact Print the contents of a cached artefact. - diff-nb Print a diff of a notebook to one stored in the cache. - list List cached notebook records in the cache. - remove Remove notebooks stored in the cache. - show Show details of a cached notebook in the cache. + add Cache notebook(s) that have already been executed. + add-with-artefacts Cache a notebook, with possible artefact files. + cat-artifact Print the contents of a cached artefact. + diff-nb Print a diff of a notebook to one stored in the cache. + list List cached notebook records in the cache. + remove Remove notebooks stored in the cache. + show Show details of a cached notebook in the cache. ``` The first time the cache is required, it will be lazily created: @@ -107,7 +107,7 @@ When caching, a check will be made that the notebooks look to have been executed correctly, i.e. the cell execution counts go sequentially up from 1. ```console -$ jcache cache add-many tests/notebooks/basic.ipynb +$ jcache cache add tests/notebooks/basic.ipynb Caching: ../tests/notebooks/basic.ipynb Validity Error: Expected cell 1 to have execution_count 1 not 2 The notebook may not have been executed, continue caching? [y/N]: y @@ -117,7 +117,7 @@ Success! Or to skip validation: ```console -$ jcache cache add-many --no-validate tests/notebooks/basic.ipynb tests/notebooks/basic_failing.ipynb tests/notebooks/basic_unrun.ipynb tests/notebooks/complex_outputs.ipynb tests/notebooks/external_output.ipynb +$ jcache cache add --no-validate tests/notebooks/basic.ipynb tests/notebooks/basic_failing.ipynb tests/notebooks/basic_unrun.ipynb tests/notebooks/complex_outputs.ipynb tests/notebooks/external_output.ipynb Caching: ../tests/notebooks/basic.ipynb Caching: ../tests/notebooks/basic_failing.ipynb Caching: ../tests/notebooks/basic_unrun.ipynb @@ -141,17 +141,17 @@ You can remove cached records by their ID. $ jcache cache list ID URI Created Accessed ---- ------------------------------------- ---------------- ---------------- - 5 tests/notebooks/external_output.ipynb 2020-02-29 17:48 2020-02-29 17:48 - 4 tests/notebooks/complex_outputs.ipynb 2020-02-29 17:48 2020-02-29 17:48 - 3 tests/notebooks/basic_unrun.ipynb 2020-02-29 17:48 2020-02-29 17:48 - 2 tests/notebooks/basic_failing.ipynb 2020-02-29 17:48 2020-02-29 17:48 + 5 tests/notebooks/external_output.ipynb 2020-03-12 16:55 2020-03-12 16:55 + 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 16:55 2020-03-12 16:55 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 16:55 2020-03-12 16:55 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 16:55 2020-03-12 16:55 ``` You can also cache notebooks with artefacts (external outputs of the notebook execution). ```console -$ jcache cache add-one -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt +$ jcache cache add-with-artefacts -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt Caching: ../tests/notebooks/basic.ipynb Validity Error: Expected cell 1 to have execution_count 1 not 2 The notebook may not have been executed, continue caching? [y/N]: y @@ -164,8 +164,8 @@ Show a full description of a cached notebook by referring to its ID $ jcache cache show 6 ID: 6 URI: ../tests/notebooks/basic.ipynb -Created: 2020-02-29 17:48 -Accessed: 2020-02-29 17:48 +Created: 2020-03-12 16:55 +Accessed: 2020-03-12 16:55 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Artifacts: - artifact_folder/artifact.txt @@ -174,7 +174,7 @@ Artifacts: Note artefact paths must be 'upstream' of the notebook folder: ```console -$ jcache cache add-one -nb tests/notebooks/basic.ipynb tests/test_db.py +$ jcache cache add-with-artefacts -nb tests/notebooks/basic.ipynb tests/test_db.py Caching: ../tests/notebooks/basic.ipynb Artifact Error: Path '../tests/test_db.py' is not in folder '../tests/notebooks'' ``` @@ -236,12 +236,12 @@ Options: --help Show this message and exit. Commands: - add-many Stage notebook(s) for execution. - add-one Stage a notebook, with possible asset files. - list List notebooks staged for possible execution. - remove-ids Un-stage notebook(s), by ID. - remove-uris Un-stage notebook(s), by URI. - show Show details of a staged notebook. + add Stage notebook(s) for execution. + add-with-assets Stage a notebook, with possible asset files. + list List notebooks staged for possible execution. + remove-ids Un-stage notebook(s), by ID. + remove-uris Un-stage notebook(s), by URI. + show Show details of a staged notebook. ``` Staged notebooks are recorded as pointers to their URI, @@ -252,7 +252,7 @@ you can list them to see which have existing records in the cache (by hash), and which will require execution: ```console -$ jcache stage add-many tests/notebooks/basic.ipynb tests/notebooks/basic_failing.ipynb tests/notebooks/basic_unrun.ipynb tests/notebooks/complex_outputs.ipynb tests/notebooks/external_output.ipynb +$ jcache stage add tests/notebooks/basic.ipynb tests/notebooks/basic_failing.ipynb tests/notebooks/basic_unrun.ipynb tests/notebooks/complex_outputs.ipynb tests/notebooks/external_output.ipynb Staging: ../tests/notebooks/basic.ipynb Staging: ../tests/notebooks/basic_failing.ipynb Staging: ../tests/notebooks/basic_unrun.ipynb @@ -265,11 +265,11 @@ Success! $ jcache stage list ID URI Created Assets Cache ID ---- ------------------------------------- ---------------- -------- ---------- - 5 tests/notebooks/external_output.ipynb 2020-02-29 17:48 0 5 - 4 tests/notebooks/complex_outputs.ipynb 2020-02-29 17:48 0 - 3 tests/notebooks/basic_unrun.ipynb 2020-02-29 17:48 0 6 - 2 tests/notebooks/basic_failing.ipynb 2020-02-29 17:48 0 2 - 1 tests/notebooks/basic.ipynb 2020-02-29 17:48 0 6 + 5 tests/notebooks/external_output.ipynb 2020-03-12 16:55 0 5 + 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 16:55 0 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 16:55 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 16:55 0 2 + 1 tests/notebooks/basic.ipynb 2020-03-12 16:55 0 6 ``` You can remove a staged notebook by its URI or ID: @@ -315,10 +315,10 @@ that are inside the notebook folder, and data supplied by the executor. $ jcache stage list ID URI Created Assets Cache ID ---- ------------------------------------- ---------------- -------- ---------- - 5 tests/notebooks/external_output.ipynb 2020-02-29 17:48 0 5 - 3 tests/notebooks/basic_unrun.ipynb 2020-02-29 17:48 0 6 - 2 tests/notebooks/basic_failing.ipynb 2020-02-29 17:48 0 - 1 tests/notebooks/basic.ipynb 2020-02-29 17:48 0 6 + 5 tests/notebooks/external_output.ipynb 2020-03-12 16:55 0 5 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 16:55 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 16:55 0 + 1 tests/notebooks/basic.ipynb 2020-03-12 16:55 0 6 ``` Execution data (such as execution time) will be stored in the cache record: @@ -327,11 +327,11 @@ Execution data (such as execution time) will be stored in the cache record: $ jcache cache show 6 ID: 6 URI: ../tests/notebooks/basic_unrun.ipynb -Created: 2020-02-29 17:48 -Accessed: 2020-02-29 17:48 +Created: 2020-03-12 16:55 +Accessed: 2020-03-12 16:55 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Data: - execution_seconds: 1.2727476909999993 + execution_seconds: 1.0545749530000004 ``` @@ -341,18 +341,18 @@ Failed notebooks will not be cached, but the exception traceback will be added t $ jcache stage show 2 ID: 2 URI: ../tests/notebooks/basic_failing.ipynb -Created: 2020-02-29 17:48 +Created: 2020-03-12 16:55 Failed Last Execution! Traceback (most recent call last): File "../jupyter_cache/executors/basic.py", line 152, in execute executenb(nb_bundle.nb, cwd=tmpdirname) - File "//anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/execute.py", line 737, in executenb + File "/anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/execute.py", line 737, in executenb return ep.preprocess(nb, resources, km=km)[0] - File "//anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/execute.py", line 405, in preprocess + File "/anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/execute.py", line 405, in preprocess nb, resources = super(ExecutePreprocessor, self).preprocess(nb, resources) - File "//anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/base.py", line 69, in preprocess + File "/anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/base.py", line 69, in preprocess nb.cells[index], resources = self.preprocess_cell(cell, resources, index) - File "//anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/execute.py", line 448, in preprocess_cell + File "/anaconda/envs/mistune/lib/python3.7/site-packages/nbconvert/preprocessors/execute.py", line 448, in preprocess_cell raise CellExecutionError.from_cell_and_msg(cell, out) nbconvert.preprocessors.execute.CellExecutionError: An error occurred while executing the following cell: ------------------ @@ -388,7 +388,7 @@ As with artefacts, these files must be in the same folder as the notebook, or a sub-folder. ```console -$ jcache stage add-one -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt +$ jcache stage add-with-assets -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt Success! ``` @@ -396,7 +396,7 @@ Success! $ jcache stage show 1 ID: 1 URI: ../tests/notebooks/basic.ipynb -Created: 2020-02-29 17:48 +Created: 2020-03-12 16:55 Cache ID: 6 Assets: - ../tests/notebooks/artifact_folder/artifact.txt diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index 1724040..1f39afa 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -131,7 +131,7 @@ def cache_file(db, nbpath, validate, overwrite, artifact_paths=()): return True -@cmnd_cache.command("add-one") +@cmnd_cache.command("add-with-artefacts") @arguments.ARTIFACT_PATHS @options.NB_PATH @options.CACHE_PATH @@ -145,7 +145,7 @@ def cache_nb(cache_path, artifact_paths, nbpath, validate, overwrite): click.secho("Success!", fg="green") -@cmnd_cache.command("add-many") +@cmnd_cache.command("add") @arguments.NB_PATHS @options.CACHE_PATH @options.VALIDATE_NB diff --git a/jupyter_cache/cli/commands/cmd_stage.py b/jupyter_cache/cli/commands/cmd_stage.py index 882c42d..a7c4af8 100644 --- a/jupyter_cache/cli/commands/cmd_stage.py +++ b/jupyter_cache/cli/commands/cmd_stage.py @@ -13,7 +13,7 @@ def cmnd_stage(): pass -@cmnd_stage.command("add-many") +@cmnd_stage.command("add") @arguments.NB_PATHS @options.CACHE_PATH def stage_nbs(cache_path, nbpaths): @@ -26,7 +26,7 @@ def stage_nbs(cache_path, nbpaths): click.secho("Success!", fg="green") -@cmnd_stage.command("add-one") +@cmnd_stage.command("add-with-assets") @arguments.ASSET_PATHS @options.NB_PATH @options.CACHE_PATH diff --git a/tests/make_cli_readme.py b/tests/make_cli_readme.py index 428cf59..0fcef1e 100644 --- a/tests/make_cli_readme.py +++ b/tests/make_cli_readme.py @@ -1,3 +1,4 @@ +from datetime import datetime from glob import glob import os from textwrap import dedent @@ -30,7 +31,11 @@ def main(): get_string(cmd_main.clear_cache, input="y") strings = [] - strings.append("".format(__file__)) + strings.append( + "".format( + datetime.now().isoformat(" ", "minutes"), __file__ + ) + ) strings.append("From the checked-out repository folder:") strings.append(get_string(cmd_main.jcache, None, ["--help"])) strings.append( From 3553e0e9687ce873033354f33e1eac690322746b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 12 Mar 2020 17:06:28 +0000 Subject: [PATCH 2/5] Improve `KeyError` messages fixes #26 --- jupyter_cache/cache/db.py | 24 ++++++++++++++++-------- jupyter_cache/cache/main.py | 8 ++++++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/jupyter_cache/cache/db.py b/jupyter_cache/cache/db.py index 781a917..f5a20c2 100644 --- a/jupyter_cache/cache/db.py +++ b/jupyter_cache/cache/db.py @@ -65,7 +65,7 @@ def get_value(key: str, db: Engine, default=None): Setting.set_value(key, default, db) result = [default] else: - raise KeyError(key) + raise KeyError("Setting not found in DB: {}".format(key)) value = result[0] return value @@ -121,7 +121,9 @@ def record_from_hashkey(hashkey: str, db: Engine) -> "NbCacheRecord": session.query(NbCacheRecord).filter_by(hashkey=hashkey).one_or_none() ) if result is None: - raise KeyError(hashkey) + raise KeyError( + "Cache record not found for NB with hashkey: {}".format(hashkey) + ) session.expunge(result) return result @@ -130,7 +132,7 @@ def record_from_pk(pk: int, db: Engine) -> "NbCacheRecord": with session_context(db) as session: # type: Session result = session.query(NbCacheRecord).filter_by(pk=pk).one_or_none() if result is None: - raise KeyError(pk) + raise KeyError("Cache record not found for NB with PK: {}".format(pk)) session.expunge(result) return result @@ -139,7 +141,7 @@ def touch(pk, db: Engine): with session_context(db) as session: # type: Session record = session.query(NbCacheRecord).filter_by(pk=pk).one_or_none() if record is None: - raise KeyError(pk) + raise KeyError("Cache record not found for NB with PK: {}".format(pk)) record.accessed = datetime.utcnow() session.commit() @@ -150,7 +152,9 @@ def touch_hashkey(hashkey, db: Engine): session.query(NbCacheRecord).filter_by(hashkey=hashkey).one_or_none() ) if record is None: - raise KeyError(hashkey) + raise KeyError( + "Cache record not found for NB with hashkey: {}".format(hashkey) + ) record.accessed = datetime.utcnow() session.commit() @@ -260,7 +264,7 @@ def record_from_pk(pk: int, db: Engine) -> "NbStageRecord": with session_context(db) as session: # type: Session result = session.query(NbStageRecord).filter_by(pk=pk).one_or_none() if result is None: - raise KeyError(pk) + raise KeyError("Staging record not found for NB with PK: {}".format(pk)) session.expunge(result) return result @@ -269,7 +273,9 @@ def record_from_uri(uri: str, db: Engine) -> "NbStageRecord": with session_context(db) as session: # type: Session result = session.query(NbStageRecord).filter_by(uri=uri).one_or_none() if result is None: - raise KeyError(uri) + raise KeyError( + "Staging record not found for NB with URI: {}".format(uri) + ) session.expunge(result) return result @@ -292,7 +298,9 @@ def set_traceback(uri: str, traceback: Optional[str], db: Engine): with session_context(db) as session: # type: Session result = session.query(NbStageRecord).filter_by(uri=uri).one_or_none() if result is None: - raise KeyError(uri) + raise KeyError( + "Staging record not found for NB with URI: {}".format(uri) + ) result.traceback = traceback try: session.commit() diff --git a/jupyter_cache/cache/main.py b/jupyter_cache/cache/main.py index 00b059b..8bbba9b 100644 --- a/jupyter_cache/cache/main.py +++ b/jupyter_cache/cache/main.py @@ -279,7 +279,9 @@ def get_cache_bundle(self, pk: int) -> NbBundleOut: path = self._get_notebook_path_cache(record.hashkey) artifact_folder = self._get_artifact_path_cache(record.hashkey) if not path.exists(): - raise KeyError(pk) + raise KeyError( + "Notebook file does not exist for cache record PK: {}".format(pk) + ) return NbBundleOut( nbf.reads(path.read_text(), NB_VERSION), @@ -307,7 +309,9 @@ def remove_cache(self, pk: int): record = NbCacheRecord.record_from_pk(pk, self.db) path = self._get_notebook_path_cache(record.hashkey) if not path.exists(): - raise KeyError(pk) + raise KeyError( + "Notebook file does not exist for cache record PK: {}".format(pk) + ) shutil.rmtree(path.parent) NbCacheRecord.remove_records([pk], self.db) From 2c5397d503afd0edc63d271434d53e6d62ab9022 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 12 Mar 2020 17:10:12 +0000 Subject: [PATCH 3/5] Clarify auto-caching in `jcache execute` fixes #23 --- README.md | 2 +- jupyter_cache/cli/commands/cmd_exec.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 405e8ee..9bf3325 100644 --- a/README.md +++ b/README.md @@ -297,7 +297,7 @@ Executing: ../tests/notebooks/basic_failing.ipynb error: Execution Failed: ../tests/notebooks/basic_failing.ipynb Executing: ../tests/notebooks/basic_unrun.ipynb Execution Succeeded: ../tests/notebooks/basic_unrun.ipynb -Finished! +Finished! Successfully executed notebooks have been cached. succeeded: - ../tests/notebooks/basic.ipynb - ../tests/notebooks/basic_unrun.ipynb diff --git a/jupyter_cache/cli/commands/cmd_exec.py b/jupyter_cache/cli/commands/cmd_exec.py index d090694..ad91777 100644 --- a/jupyter_cache/cli/commands/cmd_exec.py +++ b/jupyter_cache/cli/commands/cmd_exec.py @@ -28,5 +28,7 @@ def execute_nbs(cache_path, entry_point, pks): logger.error(str(error)) return 1 result = executor.run_and_cache(filter_pks=pks or None) - click.secho("Finished!", fg="green") + click.secho( + "Finished! Successfully executed notebooks have been cached.", fg="green" + ) click.echo(yaml.safe_dump(result, sort_keys=False)) From 986ee852f135fb0e66bca3531a82ee734e57b598 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 12 Mar 2020 17:15:25 +0000 Subject: [PATCH 4/5] Rename `URI` to `Origin URI` for formatted cache record To hopefully make it clearer that this doesn't have any ongoing link to that URI --- README.md | 48 ++++++++++++------------- jupyter_cache/cli/commands/cmd_cache.py | 2 +- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 9bf3325..cad198c 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ to come ... ## Example CLI usage - + From the checked-out repository folder: @@ -139,12 +139,12 @@ You can remove cached records by their ID. ```console $ jcache cache list - ID URI Created Accessed + ID Origin URI Created Accessed ---- ------------------------------------- ---------------- ---------------- - 5 tests/notebooks/external_output.ipynb 2020-03-12 16:55 2020-03-12 16:55 - 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 16:55 2020-03-12 16:55 - 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 16:55 2020-03-12 16:55 - 2 tests/notebooks/basic_failing.ipynb 2020-03-12 16:55 2020-03-12 16:55 + 5 tests/notebooks/external_output.ipynb 2020-03-12 17:11 2020-03-12 17:11 + 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 17:11 2020-03-12 17:11 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:11 2020-03-12 17:11 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:11 2020-03-12 17:11 ``` You can also cache notebooks with artefacts @@ -163,9 +163,9 @@ Show a full description of a cached notebook by referring to its ID ```console $ jcache cache show 6 ID: 6 -URI: ../tests/notebooks/basic.ipynb -Created: 2020-03-12 16:55 -Accessed: 2020-03-12 16:55 +Origin URI: ../tests/notebooks/basic.ipynb +Created: 2020-03-12 17:11 +Accessed: 2020-03-12 17:11 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Artifacts: - artifact_folder/artifact.txt @@ -265,11 +265,11 @@ Success! $ jcache stage list ID URI Created Assets Cache ID ---- ------------------------------------- ---------------- -------- ---------- - 5 tests/notebooks/external_output.ipynb 2020-03-12 16:55 0 5 - 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 16:55 0 - 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 16:55 0 6 - 2 tests/notebooks/basic_failing.ipynb 2020-03-12 16:55 0 2 - 1 tests/notebooks/basic.ipynb 2020-03-12 16:55 0 6 + 5 tests/notebooks/external_output.ipynb 2020-03-12 17:11 0 5 + 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 17:11 0 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:11 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:11 0 2 + 1 tests/notebooks/basic.ipynb 2020-03-12 17:11 0 6 ``` You can remove a staged notebook by its URI or ID: @@ -315,10 +315,10 @@ that are inside the notebook folder, and data supplied by the executor. $ jcache stage list ID URI Created Assets Cache ID ---- ------------------------------------- ---------------- -------- ---------- - 5 tests/notebooks/external_output.ipynb 2020-03-12 16:55 0 5 - 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 16:55 0 6 - 2 tests/notebooks/basic_failing.ipynb 2020-03-12 16:55 0 - 1 tests/notebooks/basic.ipynb 2020-03-12 16:55 0 6 + 5 tests/notebooks/external_output.ipynb 2020-03-12 17:11 0 5 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:11 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:11 0 + 1 tests/notebooks/basic.ipynb 2020-03-12 17:11 0 6 ``` Execution data (such as execution time) will be stored in the cache record: @@ -326,12 +326,12 @@ Execution data (such as execution time) will be stored in the cache record: ```console $ jcache cache show 6 ID: 6 -URI: ../tests/notebooks/basic_unrun.ipynb -Created: 2020-03-12 16:55 -Accessed: 2020-03-12 16:55 +Origin URI: ../tests/notebooks/basic_unrun.ipynb +Created: 2020-03-12 17:11 +Accessed: 2020-03-12 17:11 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Data: - execution_seconds: 1.0545749530000004 + execution_seconds: 1.1132317770000002 ``` @@ -341,7 +341,7 @@ Failed notebooks will not be cached, but the exception traceback will be added t $ jcache stage show 2 ID: 2 URI: ../tests/notebooks/basic_failing.ipynb -Created: 2020-03-12 16:55 +Created: 2020-03-12 17:11 Failed Last Execution! Traceback (most recent call last): File "../jupyter_cache/executors/basic.py", line 152, in execute @@ -396,7 +396,7 @@ Success! $ jcache stage show 1 ID: 1 URI: ../tests/notebooks/basic.ipynb -Created: 2020-03-12 16:55 +Created: 2020-03-12 17:11 Cache ID: 6 Assets: - ../tests/notebooks/artifact_folder/artifact.txt diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index 1f39afa..fe431d2 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -16,7 +16,7 @@ def cmnd_cache(): def format_cache_record(record, hashkeys, path_length): data = { "ID": record.pk, - "URI": str(shorten_path(record.uri, path_length)), + "Origin URI": str(shorten_path(record.uri, path_length)), "Created": record.created.isoformat(" ", "minutes"), "Accessed": record.accessed.isoformat(" ", "minutes"), # "Description": record.description, From 19341e7e01de4c90685145428c82943858324a28 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 12 Mar 2020 17:34:27 +0000 Subject: [PATCH 5/5] Add `--latest-only` option to `jcache cache list` Fixes #32 --- README.md | 44 +++++++++++++------------ jupyter_cache/cli/commands/cmd_cache.py | 19 +++++++++-- tests/make_cli_readme.py | 4 +++ tests/test_cli.py | 23 +++++++++++++ 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index cad198c..fb187e8 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ to come ... ## Example CLI usage - + From the checked-out repository folder: @@ -141,12 +141,14 @@ You can remove cached records by their ID. $ jcache cache list ID Origin URI Created Accessed ---- ------------------------------------- ---------------- ---------------- - 5 tests/notebooks/external_output.ipynb 2020-03-12 17:11 2020-03-12 17:11 - 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 17:11 2020-03-12 17:11 - 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:11 2020-03-12 17:11 - 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:11 2020-03-12 17:11 + 5 tests/notebooks/external_output.ipynb 2020-03-12 17:31 2020-03-12 17:31 + 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 17:31 2020-03-12 17:31 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:31 2020-03-12 17:31 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:31 2020-03-12 17:31 ``` +Tip: Use the `--latest-only` option, to only show the latest versions of cached notebooks. + You can also cache notebooks with artefacts (external outputs of the notebook execution). @@ -164,8 +166,8 @@ Show a full description of a cached notebook by referring to its ID $ jcache cache show 6 ID: 6 Origin URI: ../tests/notebooks/basic.ipynb -Created: 2020-03-12 17:11 -Accessed: 2020-03-12 17:11 +Created: 2020-03-12 17:31 +Accessed: 2020-03-12 17:31 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Artifacts: - artifact_folder/artifact.txt @@ -265,11 +267,11 @@ Success! $ jcache stage list ID URI Created Assets Cache ID ---- ------------------------------------- ---------------- -------- ---------- - 5 tests/notebooks/external_output.ipynb 2020-03-12 17:11 0 5 - 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 17:11 0 - 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:11 0 6 - 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:11 0 2 - 1 tests/notebooks/basic.ipynb 2020-03-12 17:11 0 6 + 5 tests/notebooks/external_output.ipynb 2020-03-12 17:31 0 5 + 4 tests/notebooks/complex_outputs.ipynb 2020-03-12 17:31 0 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:31 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:31 0 2 + 1 tests/notebooks/basic.ipynb 2020-03-12 17:31 0 6 ``` You can remove a staged notebook by its URI or ID: @@ -315,10 +317,10 @@ that are inside the notebook folder, and data supplied by the executor. $ jcache stage list ID URI Created Assets Cache ID ---- ------------------------------------- ---------------- -------- ---------- - 5 tests/notebooks/external_output.ipynb 2020-03-12 17:11 0 5 - 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:11 0 6 - 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:11 0 - 1 tests/notebooks/basic.ipynb 2020-03-12 17:11 0 6 + 5 tests/notebooks/external_output.ipynb 2020-03-12 17:31 0 5 + 3 tests/notebooks/basic_unrun.ipynb 2020-03-12 17:31 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-03-12 17:31 0 + 1 tests/notebooks/basic.ipynb 2020-03-12 17:31 0 6 ``` Execution data (such as execution time) will be stored in the cache record: @@ -327,11 +329,11 @@ Execution data (such as execution time) will be stored in the cache record: $ jcache cache show 6 ID: 6 Origin URI: ../tests/notebooks/basic_unrun.ipynb -Created: 2020-03-12 17:11 -Accessed: 2020-03-12 17:11 +Created: 2020-03-12 17:31 +Accessed: 2020-03-12 17:31 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Data: - execution_seconds: 1.1132317770000002 + execution_seconds: 1.0559415130000005 ``` @@ -341,7 +343,7 @@ Failed notebooks will not be cached, but the exception traceback will be added t $ jcache stage show 2 ID: 2 URI: ../tests/notebooks/basic_failing.ipynb -Created: 2020-03-12 17:11 +Created: 2020-03-12 17:31 Failed Last Execution! Traceback (most recent call last): File "../jupyter_cache/executors/basic.py", line 152, in execute @@ -396,7 +398,7 @@ Success! $ jcache stage show 1 ID: 1 URI: ../tests/notebooks/basic.ipynb -Created: 2020-03-12 17:11 +Created: 2020-03-12 17:31 Cache ID: 6 Assets: - ../tests/notebooks/artifact_folder/artifact.txt diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index fe431d2..3c5d50a 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -28,9 +28,15 @@ def format_cache_record(record, hashkeys, path_length): @cmnd_cache.command("list") @options.CACHE_PATH -@click.option("-h", "--hashkeys", is_flag=True, help="Whether to show hashkeys.") +@click.option( + "-l", + "--latest-only", + is_flag=True, + help="Show only the most recent record per origin URI.", +) +@click.option("-h", "--hashkeys", is_flag=True, help="Show the hashkey of notebook.") @options.PATH_LENGTH -def list_caches(cache_path, hashkeys, path_length): +def list_caches(cache_path, latest_only, hashkeys, path_length): """List cached notebook records in the cache.""" import tabulate @@ -39,6 +45,15 @@ def list_caches(cache_path, hashkeys, path_length): if not records: click.secho("No Cached Notebooks", fg="blue") # TODO optionally list number of artifacts + if latest_only: + latest_records = {} + for record in records: + if record.uri not in latest_records: + latest_records[record.uri] = record + continue + if latest_records[record.uri].created < record.created: + latest_records[record.uri] = record + records = list(latest_records.values()) click.echo( tabulate.tabulate( [ diff --git a/tests/make_cli_readme.py b/tests/make_cli_readme.py index 0fcef1e..65713c0 100644 --- a/tests/make_cli_readme.py +++ b/tests/make_cli_readme.py @@ -94,6 +94,10 @@ def main(): ) ) strings.append(get_string(cmd_cache.list_caches, cache_name)) + strings.append( + "Tip: Use the `--latest-only` option, " + "to only show the latest versions of cached notebooks." + ) strings.append( dedent( """\ diff --git a/tests/test_cli.py b/tests/test_cli.py index c970382..b980356 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -39,6 +39,29 @@ def test_list_caches(tmp_path): assert "basic.ipynb" in result.output.strip(), result.output +def test_list_caches_latest_only(tmp_path): + db = JupyterCacheBase(str(tmp_path)) + db.cache_notebook_file( + path=os.path.join(NB_PATH, "basic.ipynb"), + uri="basic.ipynb", + check_validity=False, + ) + db.cache_notebook_file( + path=os.path.join(NB_PATH, "complex_outputs.ipynb"), + uri="basic.ipynb", + check_validity=False, + ) + runner = CliRunner() + result = runner.invoke(cmd_cache.list_caches, ["-p", tmp_path]) + assert result.exception is None, result.output + assert result.exit_code == 0, result.output + assert len(result.output.strip().splitlines()) == 4, result.output + result = runner.invoke(cmd_cache.list_caches, ["-p", tmp_path, "--latest-only"]) + assert result.exception is None, result.output + assert result.exit_code == 0, result.output + assert len(result.output.strip().splitlines()) == 3, result.output + + def test_cache_with_artifact(tmp_path): JupyterCacheBase(str(tmp_path)) nb_path = os.path.join(NB_PATH, "basic.ipynb")