Skip to content

Commit

Permalink
Enable support for volatile repos and garbage collection
Browse files Browse the repository at this point in the history
  • Loading branch information
ben-z committed Oct 13, 2024
1 parent 5330f8d commit 09ab8d8
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 4 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ A CVMFS stratum 0 server meant fo storing ephemeral data. The main features are:
These features make it suitable for storing short-lived artifacts in CI/CD pipelines.

Coming soon:
- [ ] File upload API (we may be able to simply use the [publisher](https://cvmfs.readthedocs.io/en/stable/cpt-repository-gateway.html#publisher-configuration). It has nice features like being able to handle concurrent transactions.)
- [x] File upload API (we may be able to simply use the [publisher](https://cvmfs.readthedocs.io/en/stable/cpt-repository-gateway.html#publisher-configuration). It has nice features like being able to handle concurrent transactions.)
- The publisher appears to be bottlenecked at 20MiB/s when running the server in Kubernetes, and around 80MiB/s when running in Docker. `iperf` gives much higher bandwidth (between nodes and between the Kubernetes container and nodes), so it's likely not a network bottleneck.
- When using the custom FastAPI upload server, speeds reach over 400MiB/s easily. We'll adopt this approach.
- [ ] Garbage collection
- [ ] Better documentation
- [ ] Automatic [whitelist re-signing](https://cvmfs.readthedocs.io/en/stable/apx-security.html#signature-details)
Expand Down
48 changes: 45 additions & 3 deletions server/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,26 @@
from watcloud_utils.fastapi import WATcloudFastAPI
from watcloud_utils.logging import logger, set_up_logging
from watcloud_utils.typer import app
from typing_extensions import Annotated
import typer


set_up_logging()

@app.command()
def init_cvmfs_repo(repo_name: str):
def init_cvmfs_repo(
repo_name: Annotated[str, typer.Argument(help="Name of the CVMFS repo. CVMFS requires this to be an FQDN.")],
volatile: Annotated[bool, typer.Option(help="Whether the repo is volatile or not. If True, the repo will be created (cvmfs_server mkfs) with the -v flag.")] = True,
enable_garbage_collection: Annotated[bool, typer.Option(help="Whether to enable garbage collection for the repo.")] = True,
disable_auto_tag: Annotated[bool, typer.Option(help="Whether to disable auto-tagging for the repo.")] = True,
compression_algorithm: Annotated[str, typer.Option(help="Compression algorithm to use for the repo.")] = "none",
file_mbyte_limit: Annotated[int, typer.Option(help="Maximum file size in MiB that can be uploaded to the repo.")] = 4096,
):
"""
Initialize a CVMFS repo.
Docs: https://cvmfs.readthedocs.io/en/stable/cpt-repo.html
"""
print(f"Initializing CVMFS repo: {repo_name}")

# Make apache2 serve cvmfs repos
Expand All @@ -37,10 +52,23 @@ def init_cvmfs_repo(repo_name: str):
sys.exit(f"Failed to start apache2 service (exit code: {res.returncode})")

# Run cvmfs_server mkfs
res = subprocess.run(["cvmfs_server", "mkfs", "-o", "root", "-Z", "none", repo_name], check=True)
res = subprocess.run(
["cvmfs_server", "mkfs", "-o", "root", "-Z", compression_algorithm]
+ (["-v"] if volatile else [])
+ (["-z"] if enable_garbage_collection else [])
+ (["-g"] if disable_auto_tag else [])
+ [repo_name],
check=True
)
if res.returncode != 0:
sys.exit(f"Failed to run cvmfs_server mkfs (exit code: {res.returncode})")

# Populate repo configuration
repo_config_path = Path(f"/etc/cvmfs/repositories.d/{repo_name}/server.conf")
with open(repo_config_path, "a") as f:
f.write("\n")
f.write(f"CVMFS_FILE_MBYTE_LIMIT={file_mbyte_limit}\n")

# Make the public key and certificate available via HTTP
# Useful for clients and publishers:
# https://cvmfs.readthedocs.io/en/stable/cpt-repository-gateway.html#example-procedure
Expand Down Expand Up @@ -81,7 +109,7 @@ def start_server():
fastapi_app = WATcloudFastAPI(logger=logger)
transaction_lock = Lock()

@fastapi_app.post("/repos/{repo_name}/upload")
@fastapi_app.post("/repos/{repo_name}")
async def upload(repo_name: str, file: UploadFile, overwrite: bool = False):
logger.info(f"Uploading file: {file.filename} (content_type: {file.content_type})")

Expand Down Expand Up @@ -173,6 +201,20 @@ async def delete_file(repo_name: str, file_name: str):

return {"filename": file_name}

@app.command()
@fastapi_app.post("/gc")
def gc():
"""
Perform garbage collection on all repos.
"""
with transaction_lock:
logger.info("Running garbage collection")
gc_start = time.perf_counter()
subprocess.run(["cvmfs_server", "gc", "-r", "0", "-f"], check=True)
gc_end = time.perf_counter()
logger.info(f"Garbage collection completed. Took {gc_end - gc_start:.2f}s")
return {"message": "Garbage collection completed", "gc_time_s": gc_end - gc_start}

@app.command()
def start_server(port: int = 81):
uvicorn.run(fastapi_app, host="0.0.0.0", port=port)
Expand Down

0 comments on commit 09ab8d8

Please sign in to comment.