forked from pinterest/mysql_utils
-
Notifications
You must be signed in to change notification settings - Fork 2
/
mysql_restore.py
executable file
·416 lines (356 loc) · 17 KB
/
mysql_restore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
#!/usr/bin/env python
import argparse
import datetime
import subprocess
import time
import boto
import modify_mysql_zk
import mysql_backup
import mysql_init_server
from lib import backup
from lib import environment_specific
from lib import host_utils
from lib import mysql_lib
SCARY_TIMEOUT = 20
def main():
description = 'Utility to download and restore MySQL xbstream backups'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-b',
'--backup_type',
help='Type of backup to restore. Default is xtrabackup',
default=backup.BACKUP_TYPE_XBSTREAM,
choices=(backup.BACKUP_TYPE_LOGICAL,
backup.BACKUP_TYPE_XBSTREAM))
parser.add_argument('-s',
'--source_instance',
help=('Which instances backups to restore. Default is '
'a best guess based on the hostname.'),
default=None)
parser.add_argument('-d',
'--date',
help='attempt to restore from a specific date')
parser.add_argument('-p',
'--destination_port',
help='Port on localhost on to restore. Default 3306.',
default='3306')
parser.add_argument('--no_repl',
help='Setup replication but do not run START SLAVE',
default='REQ',
action='store_const',
const='SKIP')
parser.add_argument('--add_to_zk',
help=('By default the instance will not be added to '
'zk. This option will attempt to add the '
'instance to zk.'),
default='SKIP',
action='store_const',
const='REQ')
parser.add_argument('--skip_production_check',
help=('DANGEROUS! Skip check of whether the instance '
'to be built is already in use'),
default=False,
action='store_true')
args = parser.parse_args()
if args.source_instance:
source = host_utils.HostAddr(args.source_instance)
else:
source = None
destination = host_utils.HostAddr(':'.join((host_utils.HOSTNAME,
args.destination_port)))
restore_instance(backup_type=args.backup_type,
restore_source=source,
destination=destination,
no_repl=args.no_repl,
date=args.date,
add_to_zk=args.add_to_zk,
skip_production_check=args.skip_production_check)
def restore_instance(backup_type, restore_source, destination,
no_repl, date,
add_to_zk, skip_production_check):
""" Restore a MySQL backup on to localhost
Args:
backup_type - Type of backup to restore
restore_source - A hostaddr object for where to pull a backup from
destination - A hostaddr object for where to restore the backup
no_repl - Should replication be not started. It will always be setup.
date - What date should the backup be from
add_to_zk - Should the instnace be added to zk. If so, the log from the
host being launched will be consulted.
skip_production_check - Do not check if the host is already in zk for
production use.
"""
log.info('Supplied source is {source}'.format(source=restore_source))
log.info('Supplied destination is {dest}'.format(dest=destination))
log.info('Desired date of restore {date}'.format(date=date))
zk = host_utils.MysqlZookeeper()
# Try to prevent unintentional destruction of prod servers
log.info('Confirming no prod instances running on destination')
prod_check(destination, skip_production_check)
# Take a lock to prevent multiple restores from running concurrently
log.info('Taking a flock to block another restore from starting')
lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)
log.info('Looking for a backup to restore')
if restore_source:
possible_sources = [restore_source]
else:
possible_sources = get_possible_sources(destination, backup_type)
backup_key = find_a_backup_to_restore(possible_sources, destination,
backup_type, date)
# Figure out what what we use to as the master when we setup replication
(restore_source, _) = backup.get_metadata_from_backup_file(backup_key.name)
if restore_source.get_zk_replica_set():
replica_set = restore_source.get_zk_replica_set()[0]
master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
else:
# ZK has no idea what this replica set is, probably a new replica set.
master = restore_source
# Start logging
row_id = backup.start_restore_log(master, {'restore_source': restore_source,
'restore_port': destination.port,
'restore_file': backup_key.name,
'source_instance': destination.hostname,
'restore_date': date,
'replication': no_repl,
'zookeeper': add_to_zk})
# Giant try to allow logging if anything goes wrong.
try:
# If we hit an exception, this status will be used. If not, it will
# be overwritten
restore_log_update = {'restore_status': 'BAD'}
# This also ensures that all needed directories exist
log.info('Rebuilding local mysql instance')
mysql_init_server.mysql_init_server(destination, skip_production_check=True,
skip_backup=True, skip_locking=True)
if backup_type == backup.BACKUP_TYPE_XBSTREAM:
xbstream_restore(backup_key, destination.port)
if master == restore_source:
log.info('Pulling replication info from restore to backup source')
(binlog_file, binlog_pos) = backup.parse_xtrabackup_binlog_info(destination.port)
else:
log.info('Pulling replication info from restore to '
'master of backup source')
(binlog_file, binlog_pos) = backup.parse_xtrabackup_slave_info(destination.port)
elif backup_type == backup.BACKUP_TYPE_LOGICAL:
logical_restore(backup_key, destination)
host_utils.stop_mysql(destination.port)
log.info('Running MySQL upgrade')
host_utils.upgrade_auth_tables(destination.port)
log.info('Starting MySQL')
host_utils.start_mysql(destination.port,
options=host_utils.DEFAULTS_FILE_EXTRA_ARG.format(defaults_file=host_utils.MYSQL_NOREPL_CNF_FILE))
# Since we haven't started the slave yet, make sure we've got these
# plugins installed, whether we use them or not.
mysql_lib.setup_semisync_plugins(destination)
restore_log_update = {'restore_status': 'OK'}
# Try to configure replication.
log.info('Setting up MySQL replication')
restore_log_update['replication'] = 'FAIL'
if backup_type == backup.BACKUP_TYPE_XBSTREAM:
mysql_lib.change_master(destination,
master,
binlog_file,
binlog_pos,
no_start=(no_repl == 'SKIP'))
elif backup_type == backup.BACKUP_TYPE_LOGICAL:
if no_repl == 'SKIP':
log.info('As requested, not starting replication.')
else:
mysql_lib.restart_replication(destination)
if no_repl == 'REQ':
mysql_lib.wait_replication_catch_up(destination)
restore_log_update['replication'] = 'OK'
host_utils.restart_pt_daemons(destination.port)
mysql_lib.setup_response_time_metrics(destination)
except Exception as e:
log.error(e)
if row_id is not None:
restore_log_update['status_message'] = e
restore_log_update['finished_at'] = True
raise
finally:
if lock_handle:
log.info('Releasing lock')
host_utils.release_flock_lock(lock_handle)
backup.update_restore_log(master, row_id, restore_log_update)
try:
if add_to_zk == 'REQ':
log.info('Adding instance to zk')
modify_mysql_zk.auto_add_instance_to_zk(destination.port,
dry_run=False)
backup.update_restore_log(master, row_id, {'zookeeper': 'OK'})
else:
log.info('add_to_zk is not set, therefore not adding to zk')
except Exception as e:
log.warning("An exception occurred: {e}".format(e=e))
log.warning("If this is a DB issue, that's fine. "
"Otherwise, you should check ZK.")
backup.update_restore_log(master, row_id, {'finished_at': True})
if no_repl == 'REQ':
log.info('Starting a new backup')
mysql_backup.mysql_backup(destination, initial_build=True)
def prod_check(destination, skip_production_check):
""" Confirm it is ok to overwrite the destination instance
Args:
destination - Hostaddr obect for where to restore the backup
skip_production_check - If set, it is ok to run on slabes
"""
zk = host_utils.MysqlZookeeper()
try:
(_, replica_type) = zk.get_replica_set_from_instance(destination)
except:
# instance is not in production
replica_type = None
if replica_type == host_utils.REPLICA_ROLE_MASTER:
# If the instance, we will refuse to run. No ifs, ands, or buts/
raise Exception('Restore script must never run on a master')
if replica_type:
if skip_production_check:
log.info('Ignoring production check. We hope you know what you '
'are doing and we will try to take a backup in case '
'you are wrong.')
try:
mysql_backup.mysql_backup(destination)
except Exception as e:
log.error(e)
log.warning('Unable to take a backup. We will give you {time} '
'seconds to change your mind and ^c.'
''.format(time=SCARY_TIMEOUT))
time.sleep(SCARY_TIMEOUT)
else:
raise Exception("It appears {instance} is in use. This is"
" very dangerous!".format(instance=destination))
def get_possible_sources(destination, backup_type):
""" Get a possible sources to restore a backup from. This is required due
to mysqldump 5.5 not being able to use both --master_data and
--slave_data
Args:
destination - A hostAddr object
backup_type - backup.BACKUP_TYPE_LOGICAL or backup.BACKUP_TYPE_XTRABACKUP
Returns A list of hostAddr objects
"""
zk = host_utils.MysqlZookeeper()
replica_set = destination.get_zk_replica_set()[0]
possible_sources = []
for role in host_utils.REPLICA_TYPES:
if (role == host_utils.REPLICA_ROLE_MASTER and
backup_type == backup.BACKUP_TYPE_LOGICAL):
continue
else:
instance = zk.get_mysql_instance_from_replica_set(replica_set, role)
if instance:
possible_sources.append(instance)
return possible_sources
def find_a_backup_to_restore(possible_sources, destination,
backup_type, date=None):
""" Based on supplied constains, try to find a backup to restore
Args:
source - A hostaddr object for where to pull a backup from
destination - A hostaddr object for where to restore the backup
backup_type - What sort of backup to restore
date - What date should the backup be from
Returns:
restore_source - Where the backup was taken
retore_file - Where the file exists on whichever storage
restore_size - What is the size of the backup in bytes
"""
log.info('Possible source hosts:{possible_sources}'.format(possible_sources=possible_sources))
if date:
dates = [date]
else:
dates = []
for days in range(0, backup.DEFAULT_MAX_RESTORE_AGE):
dates.append(datetime.date.today() - datetime.timedelta(days=days))
# Find a backup file with a preference for newer
possible_keys = []
for restore_date in dates:
if possible_keys:
# we are looping to older dates, if we already found some keys, we
# quit looking
continue
log.info('Looking for a backup for {restore_date}'.format(restore_date=restore_date))
for possible_source in possible_sources:
try:
possible_keys.extend(backup.get_s3_backup(possible_source,
str(restore_date),
backup_type))
except boto.exception.S3ResponseError:
raise
except Exception as e:
if backup.NO_BACKUP not in e[0]:
raise
log.info('No backup found on in s3 for host {source} '
'on date {date}'
''.format(source=possible_source,
date=restore_date))
if not possible_keys:
raise Exception('Could not find a backup to restore')
most_recent = None
for key in possible_keys:
if not most_recent:
most_recent = key
elif most_recent.last_modified < key.last_modified:
most_recent = key
log.info('Found a backup: {}'.format(key))
return most_recent
def xbstream_restore(xbstream, port):
""" Restore an xtrabackup file
xbstream - An xbstream file in S3
port - The port on which to act on on localhost
"""
datadir = host_utils.get_cnf_setting('datadir', port)
log.info('Shutting down MySQL')
host_utils.stop_mysql(port)
log.info('Removing any existing MySQL data')
mysql_init_server.delete_mysql_data(port)
log.info('Downloading and unpacking backup')
backup.xbstream_unpack(xbstream, datadir)
log.info('Decompressing compressed ibd files')
backup.innobackup_decompress(datadir)
log.info('Applying logs')
backup.apply_log(datadir)
log.info('Removing old innodb redo logs')
mysql_init_server.delete_innodb_log_files(port)
log.info('Setting permissions for MySQL on {dir}'.format(dir=datadir))
host_utils.change_owner(datadir, 'mysql', 'mysql')
def logical_restore(dump, destination):
""" Restore a compressed mysqldump file from s3 to localhost, port 3306
Args:
dump - a mysqldump file in s3
destination - a hostaddr object for where the data should be loaded on
localhost
"""
log.info('Preparing replication')
(restore_source, _) = backup.get_metadata_from_backup_file(dump.name)
# We are importing a mysqldump which was created with --master-data
# so there will be a CHANGE MASTER statement at the start of the dump.
# MySQL will basically just ignore a CHANGE MASTER command if
# master_host is not already setup. So we are setting master_host,
# username and password here. We use BOGUS for master_log_file so that
# the IO thread is intentionally broken. With no argument for
# master_log_file, the IO thread would start downloading the first bin log
# and the SQL thread would start executing...
mysql_lib.change_master(destination, restore_source, 'BOGUS', 0,
no_start=True)
log.info('Restarting MySQL to turn off enforce_storage_engine')
host_utils.stop_mysql(destination.port)
host_utils.start_mysql(destination.port,
host_utils.DEFAULTS_FILE_ARG.format(defaults_file=host_utils.MYSQL_UPGRADE_CNF_FILE))
log.info('Downloading, decompressing and importing backup')
procs = dict()
procs['s3_download'] = backup.create_s3_download_proc(dump)
procs['pv'] = backup.create_pv_proc(procs['s3_download'].stdout,
size=dump.size)
log.info('zcat |')
procs['zcat'] = subprocess.Popen(['zcat'],
stdin=procs['pv'].stdout,
stdout=subprocess.PIPE)
mysql_cmd = ['mysql', '--port', str(destination.port)]
log.info(' '.join(mysql_cmd))
procs['mysql'] = subprocess.Popen(mysql_cmd,
stdin=procs['zcat'].stdout,
stdout=subprocess.PIPE)
while(not host_utils.check_dict_of_procs(procs)):
time.sleep(.5)
if __name__ == "__main__":
log = environment_specific.setup_logging_defaults(__name__)
main()