-
Notifications
You must be signed in to change notification settings - Fork 4
/
rrd-merge.py
132 lines (106 loc) · 4.42 KB
/
rrd-merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from xml.etree.ElementTree import Element, ElementTree, SubElement, parse
from sys import argv, exit
# Takes RRDTool XML files on the command line. The last one should not exist
# and is written to as the merged version of the previous.
# The first file to contain a data source is used as its source for the merged
# version. All files must contain the same RRAs.
# Destination file - last argument
destinationPath = argv[-1]
try:
with open(destinationPath, 'r') as destination:
print("Destination file '{0}' already exists. Refusing to overwrite.".format(destinationPath))
exit(1)
except IOError:
# Okay - destination file should not exist.
pass
# TODO: Race condition between checking if the file exists and opening it for writing.
# Holds <version/>, <step/>, and <lastupdate/> - the first elements of <rrd/>.
prelude = None
# Keyed by data source name.
# Value is a tuple of the root element and the index of the data source in its tree.
dataSources = dict()
# Every file should have the same <rrd>s as the first.
# For every <rrd/>:
# Check the same: <cf/> <pdp_per_row/> <params><xff/></params>
# Check the same number of elements: <database/>
rras = None
# TODO: Should this go in a module?
def getRraCheck(root):
rras = []
for rra in root.findall('rra'):
rras.append(( rra.find('cf').text,
rra.find('pdp_per_row').text,
rra.find('params').find('xff').text,
len(rra.find('database').findall('row'))
))
return rras
# All command line arguments but the last are input. Read them in.
# Ensure the RRAs are the same.
for inFile in argv[1:-1]:
print("Reading {0}.".format(inFile))
root = parse(inFile).getroot()
# Check the RRAs. No elements means first file.
if rras is None:
rras = getRraCheck(root)
prelude = dict()
for key in [ 'version', 'step', 'lastupdate' ]:
prelude[key] = root.find(key).text
print("Using from the first file: {0}".format(prelude))
else:
# Otherwise other files should match.
if not getRraCheck(root) == rras:
print("RRAs in '{0}' differ from those in '{1}'.".format(inFile, argv[0]))
exit(2)
# Find the data sources in each file.
# Use each occurrence from the first file it appears in.
# Index is which of the data sources in the file is used.
index = 0
for dataSource in [ ds.find('name').text for ds in root.findall('ds') ]:
if dataSource not in dataSources:
print("Using '{0}' from '{1}'.".format(dataSource, inFile))
dataSources[dataSource] = (index, root)
index += 1
# Build new XML file.
out = Element('rrd')
# Add header elements.
for item in prelude.iteritems():
# Key is tag.
element = Element(item[0])
# Value is text.
element.text = item[1]
out.append(element)
# Though dataSources is a dictionary, as no further modifications are made to it
# iteration over its pairs will be consistent. This is important as the order of
# the data source definitions must match that of the values in the RRA rows.
# See http://docs.python.org/2/library/stdtypes.html#dict.items
# Copy top-level data source definitions.
for entry in dataSources.itervalues():
dsRoot = entry[1]
dsIndex = entry[0]
out.append(dsRoot.findall('ds')[dsIndex])
# Copy RRAs; get values from each.
rraIndex = 0
for rra in rras:
element = SubElement(out, 'rra')
# Last element in rra is number of rows, not tag.
# Add RRA descriptions.
SubElement(element, 'cf').text = rra[0]
SubElement(element, 'pdp_per_row').text = rra[1]
SubElement(SubElement(element, 'params'), 'xff').text = rra[2]
database = SubElement(element, 'database')
# Build list of lists of elements for each DS:
# Get list of rows out of this RRA for each ds.
# Select the values for each ds out of the row.
rows = []
for ds in dataSources.itervalues():
dsRoot = ds[1]
dsIndex = ds[0]
rows.append(map(lambda row: row[dsIndex].text,
dsRoot.findall('rra')[rraIndex].find('database').findall('row')))
# Change from list of lists of values to list of tuples of the same row.
for row in zip(*rows):
rowElement = SubElement(database, 'row')
for value in row:
SubElement(rowElement, 'v').text = value
rraIndex += 1
ElementTree(out).write(destinationPath)