forked from pinterest/mysql_utils
-
Notifications
You must be signed in to change notification settings - Fork 2
/
get_recent_checksums.py
executable file
·189 lines (158 loc) · 6.41 KB
/
get_recent_checksums.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/usr/bin/env python
import argparse
import socket
import sys
from lib import host_utils
from lib import mysql_lib
LINE_TEMPLATE = ('{master_instance:<MSPC}'
'{instance:<RSPC}'
'{reported_at:<22}'
'{db:<DBSPC}'
'{tbl:<TSPC}'
'{row_count:<RCSPC}'
'{row_diffs:<DCSPC}'
'{checksum_status}')
def main():
parser = argparse.ArgumentParser(description='MySQL checksum interface')
parser.add_argument('-i',
'--instance',
help='Defaults to localhost:3306',
default=''.join((socket.getfqdn(), ':3306')),
required=True)
parser.add_argument('-d',
'--db',
help="Restrict results to a single named db",
default=False)
parser.add_argument('-t',
'--table',
help='Restrict results to a single table',
default=False)
args = parser.parse_args()
instance = host_utils.HostAddr(args.instance)
checksums = get_checksums(instance, args.db)
if not checksums:
print "No data found!"
sys.exit(1)
format_str, line_length = generate_format_string(checksums)
header = format_str.format(master_instance='Master',
instance='Replica',
reported_at='Date',
db='Database',
tbl='Table',
row_count='Row Count',
row_diffs='Diff Count',
checksum_status='Status')
print header
print '=' * line_length
# now actually print the output.
for checksum in checksums:
if args.table is False or args.table == checksum['tbl']:
checksum['reported_at'] = str(checksum['reported_at'])
print format_str.format(**checksum)
def generate_format_string(checksums):
""" Use the base template and proper string lengths to make the output
look nicer.
Args:
checksums - a collection of checksum rows.
Returns:
format_str - a format string with spacing offsets filled in.
line_length - the maximum length of the line + some extra space
"""
# initial padding values
padding = {'master_instance': len('Master'),
'instance': len('Replica'),
'db': len('Database'),
'tbl': len('Table'),
'reported_at': len('Date'),
'row_count': len('Row Count'),
'row_diffs': len('Diff Count'),
'checksum_status': len('Status')}
line_length = 40 + sum(padding.values())
for checksum in checksums:
# Humans don't care about false positives for diffs
if (checksum['checksum_status'] == 'ROW_DIFFS_FOUND' and
checksum['rows_checked'] == 'YES' and
checksum['row_diffs'] == 0):
checksum['checksum_status'] = 'GOOD'
for key, value in padding.items():
if len(str(checksum[key])) > padding[key]:
line_length += len(str(checksum[key])) - padding[key]
padding[key] = len(str(checksum[key]))
# regenerate the output template based on padding.
format_str = LINE_TEMPLATE.replace(
'MSPC', str(padding['master_instance'] + 3)).replace(
'RSPC', str(padding['instance'] + 3)).replace(
'DBSPC', str(padding['db'] + 3)).replace(
'TSPC', str(padding['tbl'] + 3)).replace(
'RCSPC', str(padding['row_count'] + 3)).replace(
'DCSPC', str(padding['row_diffs'] + 3))
return format_str, line_length
def get_checksums(instance, db=False):
""" Get recent mysql replication checksums
Args:
instance - a hostaddr object for what server to pull results for
db - a string of a data to for which to restrict results
Returns:
A list of dicts from a select * on the relevant rows
"""
vars_for_query = dict()
vars_for_query['instance'] = instance
zk = host_utils.MysqlZookeeper()
host_shard_map = zk.get_host_shard_map()
# extra SQL if this is a sharded data set.
SHARD_DB_IN_SQL = ' AND db in ({sp}) '
if db is False:
cnt = 0
shard_param_set = set()
try:
for entry in host_shard_map[instance.__str__()]:
key = ''.join(('shard', str(cnt)))
vars_for_query[key] = entry
shard_param_set.add(key)
cnt += 1
shard_param = ''.join(('%(',
')s,%('.join(shard_param_set),
')s'))
except KeyError:
# if this is not a sharded data set, don't use this.
shard_param = None
else:
shard_param = '%(shard1)s'
vars_for_query['shard1'] = db
# connect to the instance we care about and get some data.
conn = mysql_lib.connect_mysql(instance, 'scriptrw')
# We only care about the most recent checksum
cursor = conn.cursor()
sql_base = ("SELECT detail.master_instance, "
" detail.instance, "
" detail.db, "
" detail.tbl, "
" detail.reported_at, "
" detail.checksum_status, "
" detail.rows_checked, "
" detail.row_count, "
" detail.row_diffs "
"FROM "
" (SELECT master_instance,"
" instance, "
" db, "
" tbl, "
" MAX(reported_at) AS reported_at "
" FROM test.checksum_detail "
" WHERE master_instance=%(instance)s "
" {in_db}"
" GROUP BY 1,2,3,4 "
" ) AS most_recent "
"JOIN test.checksum_detail AS detail "
"USING(master_instance, instance, db, "
"tbl, reported_at) ")
# and then fill in the variables.
if shard_param:
sql = sql_base.format(in_db=SHARD_DB_IN_SQL.format(sp=shard_param))
else:
sql = sql_base.format(in_db='')
cursor.execute(sql, vars_for_query)
checksums = cursor.fetchall()
return checksums
if __name__ == "__main__":
main()