Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Schema evolution code generation #440

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 95 additions & 9 deletions python/podio_class_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
"""Podio class generator script"""

import copy
import os
import sys
import subprocess
Expand All @@ -16,6 +17,7 @@
from podio.podio_config_reader import PodioConfigReader
from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder
from podio_schema_evolution import DataModelComparator # dealing with cyclic imports
from podio_schema_evolution import RenamedMember, root_filter

THIS_DIR = os.path.dirname(os.path.abspath(__file__))
TEMPLATE_DIR = os.path.join(THIS_DIR, 'templates')
Expand Down Expand Up @@ -89,9 +91,14 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr
# schema evolution specific code
self.old_yamlfile = old_description
self.evolution_file = evolution_file
self.old_schema_version = None
self.old_datamodel = None
self.old_datamodels_components = set()
self.old_datamodels_datatypes = set()
self.root_schema_evolution_dict = {} # containing the root relevant schema evolution per datatype
# information to update the selection.xml
self.root_schema_evolution_component_names = set()
self.root_schema_evolution_datatype_names = set()

try:
self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm)
Expand All @@ -115,9 +122,10 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr

def process(self):
"""Run the actual generation"""
self.preprocess_schema_evolution()

for name, component in self.datamodel.components.items():
self._process_component(name, component)

for name, datatype in self.datamodel.datatypes.items():
self._process_datatype(name, datatype)

Expand All @@ -127,11 +135,10 @@ def process(self):
self._create_selection_xml()

self._write_cmake_lists_file()
self.process_schema_evolution()

self.print_report()

def process_schema_evolution(self):
def preprocess_schema_evolution(self):
"""Process the schema evolution"""
# have to make all necessary comparisons
# which are the ones that changed?
Expand All @@ -141,7 +148,8 @@ def process_schema_evolution(self):
evolution_file=self.evolution_file)
comparator.read()
comparator.compare()

self.old_schema_version = "v%i" % comparator.datamodel_old.schema_version
self.old_schema_version_int = comparator.datamodel_old.schema_version
# some sanity checks
if len(comparator.errors) > 0:
print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \
Expand All @@ -156,6 +164,17 @@ def process_schema_evolution(self):
print(warning)
sys.exit(-1)

# now go through all the io_handlers and see what we have to do
if 'ROOT' in self.io_handlers:
for item in root_filter(comparator.schema_changes):
schema_evolutions = self.root_schema_evolution_dict.get(item.klassname)
if (schema_evolutions is None):
schema_evolutions = []
self.root_schema_evolution_dict[item.klassname] = schema_evolutions

# add whatever is relevant to our ROOT schema evolutions
self.root_schema_evolution_dict[item.klassname].append(item)

def print_report(self):
"""Print a summary report about the generated code"""
if not self.verbose:
Expand All @@ -170,8 +189,15 @@ def print_report(self):
print(summaryline)
print()

def _eval_template(self, template, data):
def _eval_template(self, template, data, old_schema_data=None):
"""Fill the specified template"""
# merge the info of data and the old schema into a single dict
if old_schema_data:
data['OneToOneRelations_old'] = old_schema_data['OneToOneRelations']
data['OneToManyRelations_old'] = old_schema_data['OneToManyRelations']
data['VectorMembers_old'] = old_schema_data['VectorMembers']
data['old_schema_version'] = self.old_schema_version_int

return self.env.get_template(template).render(data)

def _write_file(self, name, content):
Expand Down Expand Up @@ -221,7 +247,7 @@ def get_fn_format(tmpl):

return fn_templates

def _fill_templates(self, template_base, data):
def _fill_templates(self, template_base, data, old_schema_data=None):
"""Fill the template and write the results to file"""
# Update the passed data with some global things that are the same for all
# files
Expand All @@ -230,7 +256,7 @@ def _fill_templates(self, template_base, data):
data['incfolder'] = self.incfolder

for filename, template in self._get_filenames_templates(template_base, data['class'].bare_type):
self._write_file(filename, self._eval_template(template, data))
self._write_file(filename, self._eval_template(template, data, old_schema_data))

def _process_component(self, name, component):
"""Process one component"""
Expand All @@ -248,12 +274,72 @@ def _process_component(self, name, component):

component['includes'] = self._sort_includes(includes)
component['class'] = DataType(name)

self._fill_templates('Component', component)

# Add potentially older schema for schema evolution
# based on ROOT capabilities for now
if name in self.root_schema_evolution_dict.keys():
schema_evolutions = self.root_schema_evolution_dict[name]
component = copy.deepcopy(component)
for schema_evolution in schema_evolutions:
if isinstance(schema_evolution, RenamedMember):
for member in component['Members']:
if member.name == schema_evolution.member_name_new:
member.name = schema_evolution.member_name_old
component['class'] = DataType(name + self.old_schema_version)
else:
raise NotImplementedError
self._fill_templates('Component', component)
self.root_schema_evolution_component_names.add(name + self.old_schema_version)

def _replaceComponentInPaths(self, oldname, newname, paths):
# strip the namespace
shortoldname = oldname.split("::")[-1]
shortnewname = newname.split("::")[-1]
# and do the replace in place
for index, thePath in enumerate(paths):
if shortoldname in thePath:
newPath = thePath.replace(shortoldname, shortnewname)
paths[index] = newPath

def _process_datatype(self, name, definition):
"""Process one datatype"""
datatype = self._preprocess_datatype(name, definition)

# ROOT schema evolution preparation
# Compute and prepare the potential schema evolution parts
schema_evolution_datatype = copy.deepcopy(datatype)
needs_schema_evolution = False
# check whether it has a renamed member
# if name in self.root_schema_evolution_dict.keys():
# for member in schema_evolution_datatype['Members']:
# if
# then check for components with a renamed member
for member in schema_evolution_datatype['Members']:
if member.is_array:
if member.array_type in self.root_schema_evolution_dict.keys():
needs_schema_evolution = True
self._replaceComponentInPaths(member.array_type, member.array_type + self.old_schema_version,
schema_evolution_datatype['includes_data'])
member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version)
member.array_type = member.array_type + self.old_schema_version

else:
if member.full_type in self.root_schema_evolution_dict.keys():
needs_schema_evolution = True
self._replaceComponentInPaths(member.full_type, member.full_type + self.old_schema_version,
schema_evolution_datatype['includes'])
member.full_type = member.full_type + self.old_schema_version

if needs_schema_evolution:
print(" Preparing explicit schema evolution for %s" % (name))
schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa
self._fill_templates('Data', schema_evolution_datatype)
self.root_schema_evolution_datatype_names.add(name + self.old_schema_version)
self._fill_templates('Collection', datatype, schema_evolution_datatype)
else:
self._fill_templates('Collection', datatype)

self._fill_templates('Data', datatype)
self._fill_templates('Object', datatype)
self._fill_templates('MutableObject', datatype)
Expand Down Expand Up @@ -487,7 +573,7 @@ def _create_selection_xml(self):
data = {'components': [DataType(c) for c in self.datamodel.components],
'datatypes': [DataType(d) for d in self.datamodel.datatypes],
'old_schema_components': [DataType(d) for d in
self.old_datamodels_datatypes | self.old_datamodels_components]}
self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names]} # noqa
self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data))

def _build_include(self, member):
Expand Down
2 changes: 1 addition & 1 deletion python/podio_schema_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def heuristics_members(self, added_members, dropped_members, schema_changes):
"""make analysis of member changes in a given data type """
for dropped_member in dropped_members:
added_members_in_definition = [member for member in added_members if
dropped_member.definition_name == member.definition_name]
dropped_member.definition_name == member.definition_name]
for added_member in added_members_in_definition:
if added_member.member.full_type == dropped_member.member.full_type:
# this is a rename candidate. So let's see whether it has been explicitly declared by the user
Expand Down
18 changes: 17 additions & 1 deletion python/templates/Collection.cc.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,15 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const {
return {{ package_name }}::meta::schemaVersion;
}

{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, 1) }}
// anonymous namespace for registration with the CollectionBufferFactory. This
// ensures that we don't have to make up arbitrary namespace names here, since
// none of this is publicly visible
namespace {
{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }}

{% if old_schema_version is defined %}
{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }}
{% endif %}

// The usual trick with an IIFE and a static variable inside a funtion and then
// making sure to call that function during shared library loading
Expand All @@ -184,6 +192,14 @@ bool registerCollection() {
podio::SchemaEvolution::Priority::AutoGenerated
);

{% if old_schema_version is defined %}
// register a buffer creation function for the schema evolution buffer
factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}});

//Make the SchemaEvolution aware of any other non-trivial conversion
// TODO
{% endif %}

return true;
}();
return reg;
Expand Down
5 changes: 5 additions & 0 deletions python/templates/CollectionData.h.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
{{ include }}
{% endfor %}

// schema evolution specific includes
{% if schema_evolution_data is defined %}
#include "{{ incfolder }}{{ schema_evolution_data }}Data"
{% endif %}

// podio specific includes
#include "podio/CollectionBuffers.h"
#include "podio/ICollectionProvider.h"
Expand Down
15 changes: 9 additions & 6 deletions python/templates/macros/collections.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,19 @@ void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const

{% macro createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, schemaVersion) %}

// anonymous namespace for registration with the CollectionBufferFactory. This
// ensures that we don't have to make up arbitrary namespace names here, since
// none of this is publicly visible
namespace {
{% if schemaVersion == -1 %}
podio::CollectionReadBuffers createBuffers(bool isSubset) {
{% else %}
podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) {
{% endif %}
auto readBuffers = podio::CollectionReadBuffers{};
readBuffers.type = "{{ class.full_type }}Collection";
{% if schemaVersion == -1 %}
readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion;
readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer;

{% else %}
readBuffers.schemaVersion = {{ schemaVersion }};
{% endif %}
readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; //TODO: replace this part for schema evolution
// The number of ObjectID vectors is either 1 or the sum of OneToMany and
// OneToOne relations
const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }};
Expand Down