-
Notifications
You must be signed in to change notification settings - Fork 0
/
parallel_partial_sum_listing_8.11.cpp
executable file
·95 lines (85 loc) · 3.22 KB
/
parallel_partial_sum_listing_8.11.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <numeric> // std::partial_sum
#include <Iterator> //for std::distance, std::advance
#include <future> // std::future, std::promise
#include <algorithm> //std::min
#include <thread>
class join_threads{
std::vector<std::thread>& threads;
public:
explicit join_threads(std::vector<std::thread>& threads_):
threads(threads_)
{}
~join_threads(){
for(unsigned long i = 0; i < threads.size(); ++i){
if(threads[i].joinable())
threads[i].join();
}
}
};
template <typename Iterator>
void parallel_partial_sum(Iterator first, Iterator last){
//Original version
//typedef typename Iterator::value_type value_type;
using value_type = Iterator::value_type;
struct process_chunk{
void operator()(Iterator begin, Iterator last,
std::future<value_type>* previous_end_value,
std::promise<value_type>* end_value){
try{
Iterator end = last;
++end;
std::partial_sum(begin, end, begin);
if(previous_end_value){
value_type& addend = previous_end_value->get();
*last += addend;
if (end_value){
end_value->set_value(*last);
}
std::for_each(begin, last,[addend](value_type& item){
item += addend;
});
}
else if (end_value){
end_value->set_value(*last);
}
}
catch(...){
if(end_value){
end_value->set_exception(std::current_exception());
}
else{
throw;
}
}
}
};
unsigned long const length=std::distance(first, last);
if(!length)
return last;
unsigned long const min_per_thread = 25;
unsigned long const max_thread =
(length + min_per_thread-1)/min_per_thread;
unsigned long const hardware_threads = std::thread::hardware_concurrency();
unsigned long const num_threads =
std::min( (hardware_threads != 0) ? hardware_threads : 2, max_threads);
unsigned long const block_size = length/num_threads;
std::vector<std::thread> threads(num_threads-1);
std::vector<std::promise<value_type> > end_values(num_threads-1);
std::vector<std::future<value_type> > previous_end_values;
previous_end_values.reserve(num_threads-1);
join_threads joiner(threads);
Iterator block_start = first;
for(unsigned long i = 0; i < (num_threads-1); ++i){
Iterator block_last = block_start;
std::advance(block_last, block_size-1);
threads[i] = std::thread(process_chunk(), block_start, block_last,
(i !=0)? &previous_end_values[i-1]:0, &end_values[i]
);
block_start = block_last;
++block_start;
previous_end_values.push_back(end_values[i].get_future());
}
Iterator final_element = block_start;
std::advance(final_element, std::distance(block_start,last)-1);
process_chunk()(block_start, final_element, (num_threads> 1)?previous_end_values.back():0,0 );
}