forked from sc-zhang/bioscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
quick_merge_file.py
executable file
·46 lines (37 loc) · 1.16 KB
/
quick_merge_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python
import sys, os
import multiprocessing
def sub_merge_file(f_list, s, e, tn):
f_out = open('merge_tmp'+str(tn), 'w')
for i in range(s, e):
with open(f_list[i], 'r') as f_in:
for line in f_in:
f_out.write(line)
f_out.close()
def merge_file(i_folder, f_t, m_f, tn):
f_l = os.listdir(i_folder)
merge_list = []
for fn in f_l:
if fn[-len(f_t):] == f_t:
merge_list.append(os.path.join(i_folder, fn))
task_per_prc = int(len(merge_list)/tn)
task_list = []
for i in range(0, tn):
if i < tn-1:
t = multiprocessing.Process(target=sub_merge_file, args=(merge_list, i*task_per_prc, (i+1)*task_per_prc, i))
else:
t = multiprocessing.Process(target=sub_merge_file, args=(merge_list, i*task_per_prc, len(merge_list), i))
task_list.append(t)
for t in task_list:
t.start()
for t in task_list:
t.join()
for i in range(0, tn):
os.system("cat merge_tmp"+str(i)+" >> "+m_f)
os.remove("merge_tmp"+str(i))
if __name__ == "__main__":
if len(sys.argv) < 5:
print("Usage: python "+sys.argv[0]+" <input_folder> <file_type> <merge_file> <threads>")
else:
prog, i_folder, f_t, m_f, t = sys.argv
merge_file(i_folder, f_t, m_f, int(t))