From 5b6bf04ecc0fe0e9d3a049120b0678dc61a52970 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Fri, 28 Jul 2023 18:07:36 +0800 Subject: [PATCH] scylla_node: add ScyllaNode.dump_sstables() `ScyllaNode.dump_sstables()` is a wrapper around `ScyllaNode.run_scylla_sstable()`. it provides a more user friendly interface than the latter. it is introduced so that tests can use it with less pain when migrating from `node.run_sstable2json()` to `node.run_scylla_sstable()`. Signed-off-by: Kefu Chai --- ccmlib/scylla_node.py | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/ccmlib/scylla_node.py b/ccmlib/scylla_node.py index ba6acfea..9a665424 100644 --- a/ccmlib/scylla_node.py +++ b/ccmlib/scylla_node.py @@ -3,6 +3,7 @@ from datetime import datetime import errno +import json import os import signal import shutil @@ -13,6 +14,8 @@ import threading from pathlib import Path from collections import OrderedDict +from typing import Any, Optional + import logging import psutil @@ -1433,6 +1436,57 @@ def do_invoke(sstables): ret[sst] = do_invoke([sst]) return ret + def dump_sstables(self, + datafiles: Optional[list[str]] = None, + keyspace: Optional[str] = None, + column_family: Optional[str] = None) -> list[dict[str, Any]]: + """read partitions from specified sstables using `scylla sstable dump-data` + + :param datafiles: paths to sstables (Data components) + :param keyspace: restrict the operation to sstables of this keyspace + :param column_family: restrict the operation to sstables of this column_family + :return: return all the partitions collected in the specified sstables + :raises: subprocess.CalledProcessError if scylla-sstable returns a non-zero exit code. + + a typical return value might look like: + ``` + [ + { + 'key': {'token': '-4069959284402364209', + 'raw': '000400000001', + 'value': '1'}, + 'tombstone': {'timestamp': 1690533264324595, + 'deletion_time': '2023-07-28 08:34:24z'} + }, + { + 'key': {'token': '-2249632751995682149', + 'raw': '00040000005e', + 'value': '94'}, + 'clustering_elements': [ + { + 'type': clustering-row', + 'key': {...}, + 'marker': {...}, + 'columns': {...}, + ] + } + ] + ``` + """ + batch : bool = False + column_families: Optional[list[str]] = None + if column_family is not None: + column_families = [column_family] + batch = True + sstable_dumps = self.run_scylla_sstable('dump-data', ['--merge'], + keyspace, datafiles, column_families, + batch) + all_partitions: list[dict[str, Any]] = [] + for stdout, _ in sstable_dumps.values(): + partitions = json.loads(stdout)['sstables']['anonymous'] + all_partitions += partitions + return all_partitions + class NodeUpgrader: