-
Notifications
You must be signed in to change notification settings - Fork 2
/
upload
executable file
·345 lines (271 loc) · 11.1 KB
/
upload
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#!/usr/bin/env python3
import argparse
import getpass
import json
import os
import subprocess
import sys
import threading
import time
from io import BytesIO
from shutil import get_terminal_size
# Try to import pycurl, error with mediocre installation instructions
try:
import pycurl
except ImportError:
print('Error: Pycurl does not appear to be installed')
print(' Please install it from a package (usually named python3-pycurl)')
print(' or by using pip (pip install pycurl)')
sys.exit(1)
BOX_DAV = 'https://dav.box.com/dav/'
MAX_CONCURR_UPLOADS = 10
class BoxUpload:
def __init__(self, filename, curl_auth):
self.filename = filename
self.auth = curl_auth
self.percentage = 0.0
self.bytes_uploaded = 0
def start(self, folder, upload_folder, conn):
"""Set curl params for the file to upload"""
file_path = folder + '/' + self.filename
conn.upload = self
conn.setopt(conn.PROGRESSFUNCTION, self.progress)
conn.setopt(conn.NOPROGRESS, False)
conn.file_bytes = os.path.getsize(file_path)
conn.fp = open(file_path, 'rb')
conn.setopt(conn.INFILESIZE, conn.file_bytes)
conn.setopt(conn.READFUNCTION, conn.fp.read)
conn.setopt(conn.UPLOAD, True)
conn.setopt(conn.URL, '{}{}/{}'.format(BOX_DAV, upload_folder, self.filename))
conn.setopt(conn.USERPWD, self.auth)
def progress(self, download_t, download_d, upload_t, upload_d):
"""Store the upload progress percentage for periodic printing.
Passed to pycurl as a progress function for each upload
"""
self.bytes_uploaded = upload_d
if upload_t > 0:
self.percentage = 100 * (upload_d / upload_t)
def get_progress(self, terminal_width):
"""Get a nice progress bar including the file and percent complete
The progress bar has a dynamic width, so it must be constructed by
figuring out the correct width based on other data shown
"""
first_half = '{} ['.format(self.filename)
second_half = '] {:.2f}%'.format(self.percentage)
width = terminal_width - len(first_half) - len(second_half)
filled = int(width * self.percentage / 100)
return '{}{}{}{}'.format(
first_half,
filled * '#',
(width - filled) * ' ',
second_half
)
def make_box_folder(folder, auth):
"""Make a folder on Box.com using the MKCOL HTTP request method"""
conn = pycurl.Curl()
print('Creating folder "' + folder + '"')
conn.setopt(conn.CUSTOMREQUEST, 'MKCOL')
conn.setopt(conn.USERPWD, auth)
conn.setopt(conn.URL, BOX_DAV + folder)
conn.perform()
http_status = conn.getinfo(pycurl.HTTP_CODE)
conn.close()
if http_status == 405:
print('Could not create a folder, it already exists, continuing...')
elif http_status == 401:
print('401 error trying to create a folder, check your authentication!')
sys.exit(2)
elif http_status != 201:
print('Error creating folder, HTTP ' + str(http_status))
print('Folder name was ' + folder)
sys.exit(3)
def print_progress(multi, stats, prev_total):
"""Show an updating progress bar for all ongoing uploads"""
width = get_terminal_size((80, 20)).columns # Default size as a fallback
# Move cursor to the top left using ANSII magic to overwrite previous lines
sys.stdout.write('\033[1;1H')
bytes_partial = 0
for conn in multi.handles:
if conn.upload:
print(conn.upload.get_progress(width))
bytes_partial += conn.upload.bytes_uploaded
bytes_total = stats['bytes_uploaded'] + bytes_partial
# Print statistics on upload time and current speed
time_stats = 'Time so far: {} Speed: {:.2f} Mb/s'.format(
friendly_time(time.time() - stats['start_time']),
(bytes_total - prev_total) * 0.000008
)
print(time_stats + (width - len(time_stats)) * ' ')
# Print more stats, this time on amount uploaded
upload_stats = 'Uploaded {}/{} files and {}/{} ({:.2f}%) in total'.format(
stats['num_finished'],
stats['num_total'],
friendly_file_size(bytes_total),
friendly_file_size(stats['bytes_total']),
100 * bytes_total / stats['bytes_total']
)
print(upload_stats + (width - len(upload_stats)) * ' ')
# If there are less progress bars now, add blank rows to overwrite the
# bottom rows as necessary (to get rid of lines with outdated information)
blank_rows = len(multi.free)
for row in range(blank_rows):
print(' ' * width)
# Print the next update in a second, allow interrupts
thread = threading.Timer(1, print_progress, [multi, stats, bytes_total])
thread.daemon = True
thread.start()
def reset_connection(multi, conn):
"""Resets a pycurl connection object to be used again"""
conn.fp.close()
multi.remove_handle(conn)
conn.upload = None
conn.fp = None
conn.file_bytes = 0
multi.free.append(conn)
def friendly_file_size(num_bytes):
"""To convert bytes to human-readable units
Adapted from http://stackoverflow.com/a/1094933/1979001
"""
for unit in ['', 'KB', 'MB', 'GB', 'TB', 'PB']:
if num_bytes < 1024.0:
return '{:3.2f} {}'.format(num_bytes, unit)
num_bytes /= 1024.0
def friendly_time(seconds):
"""Show seconds in terms of larger units for easier reading"""
times = [('seconds', 60), ('minutes', 60), ('hours', 24), ('days', 365)]
for unit, factor in times:
if seconds < factor:
return '{:.2f} {}'.format(seconds, unit)
seconds /= factor
def main(args):
archive = input('Where do you want to upload files from? ')
try:
files_gen = os.walk(archive)
except PermissionError:
print('Could not access files! Make sure the directory exists or try again as root!')
sys.exit(1)
# Get authentication for Box.com and destination folder
email = input('Enter your Box.com email (Berkeley email): ')
password = getpass.getpass('Enter your Box.com password: ')
folder = input('What folder do you want to put the files in on Box.com (will be created)? ')
auth = email + ':' + password
# Create an array of files from the generator
files = [(item[0], item[2]) for item in list(files_gen)]
file_paths = [f[0] + '/' + filename for f in files for filename in f[1]]
# Make the new root folder for the backup
if not args.quiet:
print('Creating folder ' + folder + ' on Box.com...')
make_box_folder(folder, auth)
# Start timer for upload speed and get total upload size
file_bytes = sum(os.lstat(f).st_size for f in file_paths if os.path.exists(f))
if not args.quiet:
print('Uploading {}...'.format(friendly_file_size(file_bytes)))
time.sleep(3)
start_time = time.time()
multi = pycurl.CurlMulti()
multi.handles = []
multi.free = []
stats = {
'num_finished': 0,
'num_total': len(file_paths),
'bytes_uploaded': 0,
'bytes_total': file_bytes,
'start_time': start_time,
}
# Create free connection handlers that can be reused
for i in range(MAX_CONCURR_UPLOADS):
conn = pycurl.Curl()
conn.fp = None
conn.upload = None
conn.file_bytes = 0
multi.handles.append(conn)
multi.free.append(conn)
# Make a queue of all folders to be created with files to be uploaded
queue = []
for f in files:
queue.append(f)
# Files that are safe to upload without having to create a new folder for them
safe_queue = []
# Clear the screen and start printing the progress bar every second
if not args.quiet:
subprocess.call(('clear',))
print_progress(multi, stats, 0)
while stats['num_finished'] < len(file_paths):
# Add uploads if there are connections free to use
if not safe_queue and queue:
item = queue.pop(0)
# Remove the relative path from the folder name
folder_name = item[0][(len(archive) + 1):]
if folder_name:
make_box_folder(folder + '/' + folder_name, auth)
# Add all files to the safe queue for uploading
for f in item[1]:
safe_queue.append((folder_name, f))
while safe_queue and multi.free:
parent_folder, filename = safe_queue.pop(0)
if not os.path.exists(archive + '/' + parent_folder + '/' + filename):
stats['num_finished'] += 1
continue
conn = multi.free.pop()
upload = BoxUpload(filename, auth)
upload.start(archive + '/' + parent_folder, folder + '/' + parent_folder, conn)
multi.add_handle(conn)
# Run curl's internal state machine
while True:
ret, num_handles = multi.perform()
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
# Check for successful or failed curl connections and free them again
while True:
num_queued, ok_list, err_list = multi.info_read()
# Free up connections from files that have finished
for conn in ok_list:
stats['bytes_uploaded'] += conn.file_bytes
reset_connection(multi, conn)
# Retry files that have errored in some way
for conn, errno, errmsg in err_list:
print('Error uploading {}: Curl error #{} {}'.format(
conn.upload.filename,
errno,
errmsg
))
# Allow exits by SIGINT
if (errno == 42): # Callback aborted exception code
sys.exit(4)
# Retry this file
print('Retrying {}'.format(conn.upload.filename))
queue.insert(0, conn.upload.filename)
reset_connection(multi, conn)
stats['num_finished'] += len(ok_list)
# Move on if no more connections need freeing
if num_queued == 0:
break
# Wait for more data to be available
multi.select(1.0)
# Clean up any open files and connections
for conn in multi.handles:
if conn.fp:
conn.fp.close()
conn.close()
multi.close()
# Print statistics about the backup time, size, and speed
if not args.quiet:
subprocess.call(('clear',))
total_time = time.time() - start_time
transfer_rate = file_bytes / (1000000 * total_time)
print('Box.com upload complete!')
print('Took {} to transfer {} files with combined size {}'.format(
friendly_time(total_time),
stats['num_total'],
friendly_file_size(file_bytes)
))
print('Average rate of {:.2f} Mb/s ({:.2f} MB/s)'.format(
transfer_rate * 8,
transfer_rate
))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Back up files to Box.com')
parser.add_argument('-q', '--quiet', action='store_true',
help='do not output progress to stdout')
args = parser.parse_args()
exit(main(args))