Skip to content

Commit

Permalink
add stripped down schema evolution (#341)
Browse files Browse the repository at this point in the history
* add stripped down schema evolution; relying on ROOT for the moment

---------

Co-authored-by: Thomas Madlener <[email protected]>
  • Loading branch information
hegner and tmadlener authored Mar 15, 2023
1 parent 9350b28 commit c7328d6
Show file tree
Hide file tree
Showing 32 changed files with 784 additions and 42 deletions.
8 changes: 4 additions & 4 deletions .github/scripts/pylint.rc
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,14 @@ exclude-protected=_asdict,_fields,_replace,_source,_make
[DESIGN]

# Maximum number of arguments for function / method
max-args=8
max-args=10

# Argument names that match this expression will be ignored. Default to name
# with leading underscore
ignored-argument-names=_.*

# Maximum number of locals for function / method body
max-locals=20
max-locals=25

# Maximum number of return / yield for function / method body
max-returns=8
Expand All @@ -285,10 +285,10 @@ max-statements=50
max-parents=7

# Maximum number of attributes for a class (see R0902).
max-attributes=20
max-attributes=25

# Minimum number of public methods for a class (see R0903).
min-public-methods=1
min-public-methods=0

# Maximum number of public methods for a class (see R0904).
max-public-methods=20
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ spack*
# Tooling
/.clangd/
/compile_commands.json
.vscode
/.cache/

# Generated files
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ project(podio)

#--- Version -------------------------------------------------------------------
SET( ${PROJECT_NAME}_VERSION_MAJOR 0 )
SET( ${PROJECT_NAME}_VERSION_MINOR 16 )
SET( ${PROJECT_NAME}_VERSION_PATCH 3 )
SET( ${PROJECT_NAME}_VERSION_MINOR 17 )
SET( ${PROJECT_NAME}_VERSION_PATCH 0 )

SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" )

Expand Down
16 changes: 14 additions & 2 deletions cmake/podioMacros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,22 @@ set_property(CACHE PODIO_USE_CLANG_FORMAT PROPERTY STRINGS AUTO ON OFF)
# RETURN_HEADERS variable that will be filled with the list of created headers files: ${datamodel}/*.h
# RETURN_SOURCES variable that will be filled with the list of created source files : src/*.cc
# Parameters:
# OLD_DESCRIPTION OPTIONAL: The path to the yaml file describing a previous datamodel version
# OUTPUT_FOLDER OPTIONAL: The folder in which the output files should be placed
# Default is ${CMAKE_CURRENT_SOURCE_DIR}
# UPSTREAM_EDM OPTIONAL: The upstream edm and its package name that are passed to the
# generator via --upstream-edm
# IO_BACKEND_HANDLERS OPTIONAL: The I/O backend handlers that should be generated. The list is
# passed directly to podio_class_generator.py and validated there
# Default is ROOT
# SCHEMA_EVOLUTION OPTIONAL: The path to the yaml file declaring the necessary schema evolution
# )
#
# Note that the create_${datamodel} target will always be called, but if the YAML_FILE has not changed
# this is essentially a no-op, and should not cause re-compilation.
#---------------------------------------------------------------------------------------------------
function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOURCES)
CMAKE_PARSE_ARGUMENTS(ARG "" "OUTPUT_FOLDER;UPSTREAM_EDM" "IO_BACKEND_HANDLERS" ${ARGN})
CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS" ${ARGN})
IF(NOT ARG_OUTPUT_FOLDER)
SET(ARG_OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR})
ENDIF()
Expand All @@ -141,11 +143,21 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR
SET(UPSTREAM_EDM_ARG "--upstream-edm=${ARG_UPSTREAM_EDM}")
ENDIF()

SET(OLD_DESCRIPTION_ARG "")
IF (ARG_OLD_DESCRIPTION)
SET(OLD_DESCRIPTION_ARG "--old-description=${ARG_OLD_DESCRIPTION}")
ENDIF()

IF(NOT ARG_IO_BACKEND_HANDLERS)
# At least build the ROOT selection.xml by default for now
SET(ARG_IO_BACKEND_HANDLERS "ROOT")
ENDIF()

SET(SCHEMA_EVOLUTION_ARG "")
IF (ARG_SCHEMA_EVOLUTION)
SET(SCHEMA_EVOLUTION_ARG "--evolution_file=${ARG_SCHEMA_EVOLUTION}")
ENDIF()

set(CLANG_FORMAT_ARG "")
if (PODIO_USE_CLANG_FORMAT STREQUAL AUTO OR PODIO_USE_CLANG_FORMAT)
find_program(CLANG_FORMAT_EXE NAMES "clang-format")
Expand Down Expand Up @@ -189,7 +201,7 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR
message(STATUS "Creating '${datamodel}' datamodel")
# we need to boostrap the data model, so this has to be executed in the cmake run
execute_process(
COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS}
COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE podio_generate_command_retval
)
Expand Down
1 change: 1 addition & 0 deletions include/podio/ASCIIWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PODIO_ASCIIWRITER_H

#include "podio/EventStore.h"
#include "podio/SchemaEvolution.h"
#include "podio/utilities/Deprecated.h"

#include <fstream>
Expand Down
9 changes: 9 additions & 0 deletions include/podio/CollectionBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "podio/CollectionBuffers.h"
#include "podio/ObjectID.h"
#include "podio/SchemaEvolution.h"

#include <iostream>
#include <string>
Expand Down Expand Up @@ -49,6 +50,12 @@ class CollectionBase {
/// Create (empty) collection buffers from which a collection can be constructed
virtual podio::CollectionReadBuffers createBuffers() /*const*/ = 0;

/// Create (empty) collection buffers from which a collection can be constructed
/// Versioned to support schema evolution
virtual podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion,
podio::Backend backend) /*const*/
= 0;

/// check for validity of the container after read
virtual bool isValid() const = 0;

Expand All @@ -61,6 +68,8 @@ class CollectionBase {
virtual std::string getValueTypeName() const = 0;
/// fully qualified type name of stored POD elements - with namespace
virtual std::string getDataTypeName() const = 0;
/// schema version of the collection
virtual SchemaVersionT getSchemaVersion() const = 0;

/// destructor
virtual ~CollectionBase() = default;
Expand Down
3 changes: 3 additions & 0 deletions include/podio/CollectionBuffers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PODIO_COLLECTIONBUFFERS_H

#include "podio/ObjectID.h"
#include "podio/SchemaEvolution.h"

#include <functional>
#include <memory>
Expand Down Expand Up @@ -41,7 +42,9 @@ struct CollectionWriteBuffers {
};

struct CollectionReadBuffers {
bool needsSchemaEvolution{false};
void* data{nullptr};
void* data_oldschema{nullptr};
CollRefCollection* references{nullptr};
VectorMembersInfo* vectorMembers{nullptr};

Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTFrameWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class ROOTFrameWriter {

// collectionID, collectionType, subsetCollection
// NOTE: same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<int, std::string, bool>;
using CollectionInfoT = std::tuple<int, std::string, bool, unsigned int>;

/**
* Helper struct to group together all necessary state to write / process a
Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class ROOTLegacyReader {
private:
std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);

void createCollectionBranches(const std::vector<std::tuple<int, std::string, bool>>& collInfo);
void createCollectionBranches(const std::vector<std::tuple<int, std::string, bool, unsigned int>>& collInfo);

podio::GenericParameters readEventMetaData();

Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class ROOTReader : public IReader {
std::map<int, GenericParameters>* readRunMetaData() override;

private:
void createCollectionBranches(const std::vector<std::tuple<int, std::string, bool>>& collInfo);
void createCollectionBranches(const std::vector<std::tuple<int, std::string, bool, unsigned int>>& collInfo);

std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);
// Information about the data vector as wall as the collection class type
Expand Down
14 changes: 14 additions & 0 deletions include/podio/SchemaEvolution.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef PODIO_SCHEMAEVOLUTION_H
#define PODIO_SCHEMAEVOLUTION_H

#include <cstdint>

namespace podio {

enum class Backend { ROOT, SIO };

using SchemaVersionT = uint32_t;

} // namespace podio

#endif
11 changes: 11 additions & 0 deletions include/podio/UserDataCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ class UserDataCollection : public CollectionBase {
}};
}

podio::CollectionReadBuffers createSchemaEvolvableBuffers(__attribute__((unused)) int readSchemaVersion,
__attribute__((unused))
podio::Backend backend) /*const*/ final {
return createBuffers();
}

/// check for validity of the container after read
bool isValid() const override {
return true;
Expand Down Expand Up @@ -157,6 +163,11 @@ class UserDataCollection : public CollectionBase {
void setSubsetCollection(bool) override {
}

/// The schema version is fixed manually
SchemaVersionT getSchemaVersion() const final {
return 1;
}

/// Print this collection to the passed stream
void print(std::ostream& os = std::cout, bool flush = true) const override {
os << "[";
Expand Down
1 change: 1 addition & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ SET(podio_PYTHON_DIR ${CMAKE_CURRENT_LIST_DIR} PARENT_SCOPE)

set(to_install
podio_class_generator.py
podio_schema_evolution.py
figure.txt
EventStore.py)

Expand Down
7 changes: 4 additions & 3 deletions python/podio/generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ def _is_fixed_width_type(type_name):
class DataType:
"""Simple class to hold information about a datatype or component that is
defined in the datamodel."""
def __init__(self, klass):
def __init__(self, klass, schema_version):
self.full_type = klass
self.namespace, self.bare_type = _get_namespace_class(self.full_type)
self.schema_version = schema_version

def __str__(self):
if self.namespace:
Expand Down Expand Up @@ -195,8 +196,7 @@ def _to_json(self):

class DataModel: # pylint: disable=too-few-public-methods
"""A class for holding a complete datamodel read from a configuration file"""

def __init__(self, datatypes=None, components=None, options=None):
def __init__(self, datatypes=None, components=None, options=None, schema_version=None):
self.options = options or {
# should getters / setters be prefixed with get / set?
"getSyntax": False,
Expand All @@ -205,6 +205,7 @@ def __init__(self, datatypes=None, components=None, options=None):
# use subfolder when including package header files
"includeSubfolder": False,
}
self.schema_version = schema_version
self.components = components or {}
self.datatypes = datatypes or {}

Expand Down
10 changes: 9 additions & 1 deletion python/podio/podio_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,14 @@ def _read_datatype(cls, value):
@classmethod
def parse_model(cls, model_dict, package_name, upstream_edm=None):
"""Parse a model from the dictionary, e.g. read from a yaml file."""

if "schema_version" in model_dict:
schema_version = model_dict["schema_version"]
else:
warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 0 as default",
FutureWarning, stacklevel=3)
schema_version = 0

components = {}
if "components" in model_dict:
for klassname, value in model_dict["components"].items():
Expand All @@ -432,7 +440,7 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None):

# If this doesn't raise an exception everything should in principle work out
validator = ClassDefinitionValidator()
datamodel = DataModel(datatypes, components, options)
datamodel = DataModel(datatypes, components, options, schema_version)
validator.validate(datamodel, upstream_edm)
return datamodel

Expand Down
Loading

0 comments on commit c7328d6

Please sign in to comment.