-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Parallel MC with OpenMP #625
Open
MSallermann
wants to merge
4
commits into
develop
Choose a base branch
from
parallel_mc
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
010d933
MC: Started to lay groundwork for parallel metropolis algorithm
e9d7ae3
MC: First version of parallel Metropolis
be3b778
MC: Fixed bug in the computation of the local basis for the cone algo…
38450f0
Method_MC: Changed Block_Decomposition so that it enforces an even nu…
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,7 @@ namespace Engine | |
this->cone_angle = Constants::Pi * this->parameters_mc->metropolis_cone_angle / 180.0; | ||
this->n_rejected = 0; | ||
this->acceptance_ratio_current = this->parameters_mc->acceptance_ratio_target; | ||
Block_Decomposition(); | ||
} | ||
|
||
// This implementation is mostly serial as parallelization is nontrivial | ||
|
@@ -60,10 +61,74 @@ namespace Engine | |
|
||
// TODO: add switch between Metropolis and heat bath | ||
// One Metropolis step | ||
Metropolis(spins_old, spins_new); | ||
Parallel_Metropolis(spins_old, spins_new); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the parallel implementation slower than the sequential version, when running on a single thread?
|
||
// Metropolis(spins_old, spins_new); | ||
Vectormath::set_c_a(1, spins_new, spins_old); | ||
} | ||
|
||
bool Method_MC::Metropolis_Spin_Trial(int ispin, const vectorfield & spins_old, vectorfield & spins_new, const scalar & rng1, const scalar & rng2, const scalar & rng3, const scalar & cos_cone_angle) | ||
{ | ||
Matrix3 local_basis; | ||
const Vector3 e_z{0,0,1}; | ||
const scalar kB_T = Constants::k_B * this->parameters_mc->temperature; | ||
|
||
// Calculate local basis for the spin | ||
if( std::abs(spins_old[ispin].z()) < 1-1e-10 ) | ||
{ | ||
local_basis.col(2) = spins_old[ispin]; | ||
local_basis.col(0) = (local_basis.col(2).cross(e_z)).normalized(); | ||
local_basis.col(1) = local_basis.col(2).cross(local_basis.col(0)); | ||
} else | ||
{ | ||
local_basis = Matrix3::Identity(); | ||
} | ||
|
||
// Rotation angle between 0 and cone_angle degrees | ||
scalar costheta = 1 - (1 - cos_cone_angle) * rng1; | ||
|
||
scalar sintheta = std::sqrt(1 - costheta*costheta); | ||
|
||
// Random distribution of phi between 0 and 360 degrees | ||
scalar phi = 2*Constants::Pi * rng2; | ||
|
||
Vector3 local_spin_new{ sintheta * std::cos(phi), | ||
sintheta * std::sin(phi), | ||
costheta }; | ||
|
||
// New spin orientation in regular basis | ||
spins_new[ispin] = local_basis * local_spin_new; | ||
|
||
// Energy difference of configurations with and without displacement | ||
scalar Eold = this->systems[0]->hamiltonian->Energy_Single_Spin(ispin, spins_old); | ||
scalar Enew = this->systems[0]->hamiltonian->Energy_Single_Spin(ispin, spins_new); | ||
scalar Ediff = Enew-Eold; | ||
|
||
// Metropolis criterion: reject the step if energy rose | ||
if( Ediff > 1e-14 ) | ||
{ | ||
if( this->parameters_mc->temperature < 1e-12 ) | ||
{ | ||
// Restore the spin | ||
spins_new[ispin] = spins_old[ispin]; | ||
return false; | ||
} | ||
else | ||
{ | ||
// Exponential factor | ||
scalar exp_ediff = std::exp( -Ediff/kB_T ); | ||
// Only reject if random number is larger than exponential | ||
if( exp_ediff < rng3 ) | ||
{ | ||
// Restore the spin | ||
spins_new[ispin] = spins_old[ispin]; | ||
// Counter for the number of rejections | ||
return false; | ||
} | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
// Simple metropolis step | ||
void Method_MC::Metropolis(const vectorfield & spins_old, vectorfield & spins_new) | ||
{ | ||
|
@@ -111,7 +176,7 @@ namespace Engine | |
if( this->parameters_mc->metropolis_step_cone ) | ||
{ | ||
// Calculate local basis for the spin | ||
if( spins_old[ispin].z() < 1-1e-10 ) | ||
if( std::abs(spins_old[ispin].z()) < 1-1e-10 ) | ||
{ | ||
local_basis.col(2) = spins_old[ispin]; | ||
local_basis.col(0) = (local_basis.col(2).cross(e_z)).normalized(); | ||
|
@@ -191,6 +256,203 @@ namespace Engine | |
} | ||
} | ||
|
||
void Method_MC::Block_Decomposition() | ||
{ | ||
/* | ||
Decompose the system into blocks of spins | ||
Each Block may only interact with it's nearest neighbour. | ||
Therefore this decomposition only works for short-ranged interactions (all except DDI) | ||
It is further important that the number of blocks in each dimension is either 1 or an even number. | ||
If this is not fulfilled the block can interact with its nearest periodic neighbour, for open boundary conditions it does not matter. | ||
For example, a valid decomposition of a 9x4x1 system, with up to *next-nearest* neighbour interactions, could look like this: | ||
┌───┬───┬───┬─────┐ | ||
│1 1│2 2│1 1│2 2 2│ | ||
│ │ │ │ │ | ||
│1 1│2 2│1 1│2 2 2│ | ||
├───┼───┼───┼─────┤ | ||
│3 3│4 4│3 3│4 4 4│ | ||
│ │ │ │ │ | ||
│3 3│4 4│3 3│4 4 4│ | ||
└───┴───┴───┴─────┘ | ||
The decomposition enables us to run the MC algorithm in four serial phases (1 to 4), while each phase is parallelized. | ||
All boxes belonging to one specific phase, do not interact directly between each other, therefore we can perform their MC trial moves in parallel. | ||
The number of phases is 2^dim and the number of usable threads is the number of boxes per phase. | ||
In this example the usable number of threads would be 2. | ||
If the size of the boxes does not perfectly divide n_cells, the rest is added or substracted at the edges. | ||
*/ | ||
|
||
// Initialize | ||
n_blocks = {0,0,0}; | ||
block_size_min = {1,1,1}; // Require at least one as block size | ||
rest = {0,0,0}; | ||
|
||
if (this->systems[0]->hamiltonian->Name() != "Heisenberg") | ||
{ | ||
return; | ||
} | ||
|
||
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format("Performing block decomposition for parallel Metropolis algorithm")); | ||
const auto & geom = this->systems[0]->geometry; | ||
const auto & ham = (const Hamiltonian_Heisenberg *) this->systems[0]->hamiltonian.get(); | ||
|
||
for(auto & p : ham->dmi_pairs) | ||
{ | ||
for(int i=0; i<3; i++) | ||
block_size_min[i] = std::max(block_size_min[i], std::abs(p.translations[i])); | ||
} | ||
for(auto & p : ham->exchange_pairs) | ||
{ | ||
for(int i=0; i<3; i++) | ||
block_size_min[i] = std::max(block_size_min[i], std::abs(p.translations[i])); | ||
} | ||
for(auto & q : ham->quadruplets) | ||
{ | ||
for(int i=0; i<3; i++) | ||
{ | ||
block_size_min[i] = std::max(block_size_min[i], std::abs(q.d_j[i])); | ||
block_size_min[i] = std::max(block_size_min[i], std::abs(q.d_k[i])); | ||
block_size_min[i] = std::max(block_size_min[i], std::abs(q.d_l[i])); | ||
} | ||
} | ||
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Block size from interactions = ({}, {}, {})", block_size_min[0], block_size_min[1], block_size_min[2])); | ||
|
||
// If the number of blocks is uneven for a certain direction, we increase the block size until it is even | ||
for(int i=0; i<3; i++) | ||
{ | ||
n_blocks[i] = geom->n_cells[i] / block_size_min[i]; | ||
while(n_blocks[i] % 2 != 0 ) | ||
{ | ||
n_blocks[i] = geom->n_cells[i] / ++block_size_min[i]; | ||
} | ||
} | ||
|
||
for(int i=0; i<3; i++) | ||
{ | ||
if(block_size_min[i] <= geom->n_cells[i]) | ||
{ | ||
rest[i] = geom->n_cells[i] % block_size_min[i]; | ||
} else { // If the block size is larger than n_cells, we use n_blocks=1 | ||
n_blocks[i] = 1; | ||
block_size_min[i] = geom->n_cells[i]; | ||
} | ||
} | ||
|
||
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Actual block size = ({}, {}, {})", block_size_min[0], block_size_min[1], block_size_min[2])); | ||
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" N_blocks = ({}, {}, {})", n_blocks[0], n_blocks[1], n_blocks[2])); | ||
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Rest = ({}, {}, {})", rest[0], rest[1], rest[2])); | ||
int Nthreads = std::max(1,n_blocks[0]/2) * std::max(1,n_blocks[1]/2) * std::max(1,n_blocks[2]/2); | ||
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Supports up to max. {} threads.", Nthreads)); | ||
} | ||
|
||
// Simple metropolis step | ||
void Method_MC::Parallel_Metropolis(const vectorfield & spins_old, vectorfield & spins_new) | ||
{ | ||
const auto & geom = this->systems[0]->geometry; | ||
|
||
scalar diff = 0.01; | ||
// Cone angle feedback algorithm | ||
if( this->parameters_mc->metropolis_step_cone && this->parameters_mc->metropolis_cone_adaptive ) | ||
{ | ||
this->acceptance_ratio_current = 1 - (scalar)this->n_rejected / (scalar)this->nos_nonvacant; | ||
|
||
if( (this->acceptance_ratio_current < this->parameters_mc->acceptance_ratio_target) && (this->cone_angle > diff) ) | ||
this->cone_angle -= diff; | ||
|
||
if( (this->acceptance_ratio_current > this->parameters_mc->acceptance_ratio_target) && (this->cone_angle < Constants::Pi-diff) ) | ||
this->cone_angle += diff; | ||
|
||
this->parameters_mc->metropolis_cone_angle = this->cone_angle * 180.0 / Constants::Pi; | ||
} | ||
|
||
this->n_rejected = 0; | ||
int n_rejected = 0; | ||
scalar cos_cone_angle = std::cos(this->cone_angle); | ||
|
||
auto distribution = std::uniform_real_distribution<scalar>(0, 1); | ||
|
||
// Number of threads | ||
int nt = 1; | ||
#ifdef SPIRIT_USE_OPENMP | ||
nt = omp_get_max_threads(); | ||
#endif | ||
|
||
// We make nt copies of our number generator, so that each thread has its own | ||
if(nt != prng_vec.size()) | ||
{ | ||
prng_vec.resize(nt); | ||
for(int i=0; i<prng_vec.size(); i++) | ||
{ | ||
prng_vec[i] = std::mt19937(this->parameters_mc->rng_seed * (i+1)); | ||
} | ||
} | ||
|
||
// There are up to (2^dim) phases in the algorithm, which have to be executed serially | ||
for(int phase_c = 0; phase_c<2; phase_c++) | ||
{ | ||
for(int phase_b = 0; phase_b<2; phase_b++) | ||
{ | ||
for(int phase_a = 0; phase_a<2; phase_a++) | ||
{ | ||
// In each phase we iterate over blocks of spins, which are **next-nearest** neighbour blocks | ||
// Thus there is no dependence of the spin moves and we can parallelize over the blocks in a phase | ||
#pragma omp parallel for collapse(3) private(distribution) reduction(+:n_rejected) | ||
for(int block_c = phase_c; block_c < n_blocks[2]; block_c+=2) | ||
{ | ||
for(int block_b = phase_b; block_b < n_blocks[1]; block_b+=2) | ||
{ | ||
for(int block_a = phase_a; block_a < n_blocks[0]; block_a+=2) | ||
{ | ||
int block_size_c = (block_c == n_blocks[2] - 1) ? block_size_min[2] + rest[2] : block_size_min[2]; // Account for the remainder of division (n_cells[i] / block_size_min[i]) by increasing the block size at the edges | ||
int block_size_b = (block_b == n_blocks[1] - 1) ? block_size_min[1] + rest[1] : block_size_min[1]; | ||
int block_size_a = (block_a == n_blocks[0] - 1) ? block_size_min[0] + rest[0] : block_size_min[0]; | ||
|
||
// Iterate over the current block | ||
for(int cc=0; cc < block_size_c; cc++) | ||
{ | ||
for(int bb=0; bb < block_size_b; bb++) | ||
{ | ||
for(int aa=0; aa < block_size_a; aa++) | ||
{ | ||
for(int ibasis=0; ibasis < geom->n_cell_atoms; ibasis++) | ||
{ | ||
int a = block_a * block_size_min[0] + aa; // We do not have to worry about the remainder of the division here, it is contained in the 'aa'/'bb'/'cc' offset | ||
int b = block_b * block_size_min[1] + bb; | ||
int c = block_c * block_size_min[2] + cc; | ||
|
||
// Compute the current spin idx | ||
int ispin = ibasis + geom->n_cell_atoms * ( a + geom->n_cells[0] * (b + geom->n_cells[1] * c)); | ||
|
||
// Perform the Metropolis trial step | ||
if( Vectormath::check_atom_type(this->systems[0]->geometry->atom_types[ispin]) ) | ||
{ | ||
int tid = 0; | ||
#ifdef SPIRIT_USE_OPENMP | ||
tid = omp_get_thread_num(); | ||
#endif | ||
|
||
scalar rng1 = distribution(prng_vec[tid]); | ||
scalar rng2 = distribution(prng_vec[tid]); | ||
scalar rng3 = distribution(prng_vec[tid]); | ||
if(!Metropolis_Spin_Trial(ispin, spins_old, spins_new, rng1, rng2, rng3, cos_cone_angle)) | ||
{ | ||
n_rejected++; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
this->n_rejected = n_rejected; | ||
// std::cout << n_rejected << "\n"; | ||
// Log.Send(Utility::Log_Level::Error, Utility::Log_Sender::MC, fmt::format( "n_rejected = {}",n_rejected )); | ||
} | ||
|
||
// TODO: | ||
// Implement heat bath algorithm, see Y. Miyatake et al, J Phys C: Solid State Phys 19, 2539 (1986) | ||
// void Method_MC::HeatBath(const vectorfield & spins_old, vectorfield & spins_new) | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a reference on the implementation (even though you didn't use it as a basis), you could reference David Bauer's diploma thesis, here or in the class comment