Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel MC with OpenMP #625

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions core/include/engine/Method_MC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@ namespace Engine
// Solver_Iteration represents one iteration of a certain Solver
void Iteration() override;

// Performs a trial move of a single spin
bool Metropolis_Spin_Trial(int ispin, const vectorfield & spins_old, vectorfield & spins_new, const scalar & rng1, const scalar & rng2, const scalar & rng3, const scalar & cos_cone_angle);

void Block_Decomposition();

// Metropolis iteration with adaptive cone radius
void Metropolis(const vectorfield & spins_old, vectorfield & spins_new);
// Parallel MC
void Parallel_Metropolis(const vectorfield & spins_old, vectorfield & spins_new);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a reference on the implementation (even though you didn't use it as a basis), you could reference David Bauer's diploma thesis, here or in the class comment


// Save the current Step's Data: spins and energy
void Save_Current(std::string starttime, int iteration, bool initial=false, bool final=false) override;
Expand Down Expand Up @@ -59,6 +66,12 @@ namespace Engine

// Random vector array
vectorfield xi;

// Values for block decompositon
std::array<int, 3> block_size_min;
std::array<int, 3> n_blocks;
std::array<int, 3> rest;
std::vector<std::mt19937> prng_vec = {};
};
}

Expand Down
266 changes: 264 additions & 2 deletions core/src/engine/Method_MC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ namespace Engine
this->cone_angle = Constants::Pi * this->parameters_mc->metropolis_cone_angle / 180.0;
this->n_rejected = 0;
this->acceptance_ratio_current = this->parameters_mc->acceptance_ratio_target;
Block_Decomposition();
}

// This implementation is mostly serial as parallelization is nontrivial
Expand All @@ -60,10 +61,74 @@ namespace Engine

// TODO: add switch between Metropolis and heat bath
// One Metropolis step
Metropolis(spins_old, spins_new);
Parallel_Metropolis(spins_old, spins_new);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the parallel implementation slower than the sequential version, when running on a single thread?

  • If so, it may make sense to be able to switch between them either dynamically (depending on omp_num_threads) or statically #ifdef SPIRIT_USE_OPENMP
  • Otherwise, it would make sense to completely replace the Metropolis implementation, right? Then there would also be no need to call it Parallel_Metropolis.

// Metropolis(spins_old, spins_new);
Vectormath::set_c_a(1, spins_new, spins_old);
}

bool Method_MC::Metropolis_Spin_Trial(int ispin, const vectorfield & spins_old, vectorfield & spins_new, const scalar & rng1, const scalar & rng2, const scalar & rng3, const scalar & cos_cone_angle)
{
Matrix3 local_basis;
const Vector3 e_z{0,0,1};
const scalar kB_T = Constants::k_B * this->parameters_mc->temperature;

// Calculate local basis for the spin
if( std::abs(spins_old[ispin].z()) < 1-1e-10 )
{
local_basis.col(2) = spins_old[ispin];
local_basis.col(0) = (local_basis.col(2).cross(e_z)).normalized();
local_basis.col(1) = local_basis.col(2).cross(local_basis.col(0));
} else
{
local_basis = Matrix3::Identity();
}

// Rotation angle between 0 and cone_angle degrees
scalar costheta = 1 - (1 - cos_cone_angle) * rng1;

scalar sintheta = std::sqrt(1 - costheta*costheta);

// Random distribution of phi between 0 and 360 degrees
scalar phi = 2*Constants::Pi * rng2;

Vector3 local_spin_new{ sintheta * std::cos(phi),
sintheta * std::sin(phi),
costheta };

// New spin orientation in regular basis
spins_new[ispin] = local_basis * local_spin_new;

// Energy difference of configurations with and without displacement
scalar Eold = this->systems[0]->hamiltonian->Energy_Single_Spin(ispin, spins_old);
scalar Enew = this->systems[0]->hamiltonian->Energy_Single_Spin(ispin, spins_new);
scalar Ediff = Enew-Eold;

// Metropolis criterion: reject the step if energy rose
if( Ediff > 1e-14 )
{
if( this->parameters_mc->temperature < 1e-12 )
{
// Restore the spin
spins_new[ispin] = spins_old[ispin];
return false;
}
else
{
// Exponential factor
scalar exp_ediff = std::exp( -Ediff/kB_T );
// Only reject if random number is larger than exponential
if( exp_ediff < rng3 )
{
// Restore the spin
spins_new[ispin] = spins_old[ispin];
// Counter for the number of rejections
return false;
}
}
}
return true;
}

// Simple metropolis step
void Method_MC::Metropolis(const vectorfield & spins_old, vectorfield & spins_new)
{
Expand Down Expand Up @@ -111,7 +176,7 @@ namespace Engine
if( this->parameters_mc->metropolis_step_cone )
{
// Calculate local basis for the spin
if( spins_old[ispin].z() < 1-1e-10 )
if( std::abs(spins_old[ispin].z()) < 1-1e-10 )
{
local_basis.col(2) = spins_old[ispin];
local_basis.col(0) = (local_basis.col(2).cross(e_z)).normalized();
Expand Down Expand Up @@ -191,6 +256,203 @@ namespace Engine
}
}

void Method_MC::Block_Decomposition()
{
/*
Decompose the system into blocks of spins
Each Block may only interact with it's nearest neighbour.
Therefore this decomposition only works for short-ranged interactions (all except DDI)
It is further important that the number of blocks in each dimension is either 1 or an even number.
If this is not fulfilled the block can interact with its nearest periodic neighbour, for open boundary conditions it does not matter.
For example, a valid decomposition of a 9x4x1 system, with up to *next-nearest* neighbour interactions, could look like this:
┌───┬───┬───┬─────┐
│1 1│2 2│1 1│2 2 2│
│ │ │ │ │
│1 1│2 2│1 1│2 2 2│
├───┼───┼───┼─────┤
│3 3│4 4│3 3│4 4 4│
│ │ │ │ │
│3 3│4 4│3 3│4 4 4│
└───┴───┴───┴─────┘
The decomposition enables us to run the MC algorithm in four serial phases (1 to 4), while each phase is parallelized.
All boxes belonging to one specific phase, do not interact directly between each other, therefore we can perform their MC trial moves in parallel.
The number of phases is 2^dim and the number of usable threads is the number of boxes per phase.
In this example the usable number of threads would be 2.
If the size of the boxes does not perfectly divide n_cells, the rest is added or substracted at the edges.
*/

// Initialize
n_blocks = {0,0,0};
block_size_min = {1,1,1}; // Require at least one as block size
rest = {0,0,0};

if (this->systems[0]->hamiltonian->Name() != "Heisenberg")
{
return;
}

Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format("Performing block decomposition for parallel Metropolis algorithm"));
const auto & geom = this->systems[0]->geometry;
const auto & ham = (const Hamiltonian_Heisenberg *) this->systems[0]->hamiltonian.get();

for(auto & p : ham->dmi_pairs)
{
for(int i=0; i<3; i++)
block_size_min[i] = std::max(block_size_min[i], std::abs(p.translations[i]));
}
for(auto & p : ham->exchange_pairs)
{
for(int i=0; i<3; i++)
block_size_min[i] = std::max(block_size_min[i], std::abs(p.translations[i]));
}
for(auto & q : ham->quadruplets)
{
for(int i=0; i<3; i++)
{
block_size_min[i] = std::max(block_size_min[i], std::abs(q.d_j[i]));
block_size_min[i] = std::max(block_size_min[i], std::abs(q.d_k[i]));
block_size_min[i] = std::max(block_size_min[i], std::abs(q.d_l[i]));
}
}
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Block size from interactions = ({}, {}, {})", block_size_min[0], block_size_min[1], block_size_min[2]));

// If the number of blocks is uneven for a certain direction, we increase the block size until it is even
for(int i=0; i<3; i++)
{
n_blocks[i] = geom->n_cells[i] / block_size_min[i];
while(n_blocks[i] % 2 != 0 )
{
n_blocks[i] = geom->n_cells[i] / ++block_size_min[i];
}
}

for(int i=0; i<3; i++)
{
if(block_size_min[i] <= geom->n_cells[i])
{
rest[i] = geom->n_cells[i] % block_size_min[i];
} else { // If the block size is larger than n_cells, we use n_blocks=1
n_blocks[i] = 1;
block_size_min[i] = geom->n_cells[i];
}
}

Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Actual block size = ({}, {}, {})", block_size_min[0], block_size_min[1], block_size_min[2]));
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" N_blocks = ({}, {}, {})", n_blocks[0], n_blocks[1], n_blocks[2]));
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Rest = ({}, {}, {})", rest[0], rest[1], rest[2]));
int Nthreads = std::max(1,n_blocks[0]/2) * std::max(1,n_blocks[1]/2) * std::max(1,n_blocks[2]/2);
Log.Send(Utility::Log_Level::Info, Utility::Log_Sender::MC, fmt::format(" Supports up to max. {} threads.", Nthreads));
}

// Simple metropolis step
void Method_MC::Parallel_Metropolis(const vectorfield & spins_old, vectorfield & spins_new)
{
const auto & geom = this->systems[0]->geometry;

scalar diff = 0.01;
// Cone angle feedback algorithm
if( this->parameters_mc->metropolis_step_cone && this->parameters_mc->metropolis_cone_adaptive )
{
this->acceptance_ratio_current = 1 - (scalar)this->n_rejected / (scalar)this->nos_nonvacant;

if( (this->acceptance_ratio_current < this->parameters_mc->acceptance_ratio_target) && (this->cone_angle > diff) )
this->cone_angle -= diff;

if( (this->acceptance_ratio_current > this->parameters_mc->acceptance_ratio_target) && (this->cone_angle < Constants::Pi-diff) )
this->cone_angle += diff;

this->parameters_mc->metropolis_cone_angle = this->cone_angle * 180.0 / Constants::Pi;
}

this->n_rejected = 0;
int n_rejected = 0;
scalar cos_cone_angle = std::cos(this->cone_angle);

auto distribution = std::uniform_real_distribution<scalar>(0, 1);

// Number of threads
int nt = 1;
#ifdef SPIRIT_USE_OPENMP
nt = omp_get_max_threads();
#endif

// We make nt copies of our number generator, so that each thread has its own
if(nt != prng_vec.size())
{
prng_vec.resize(nt);
for(int i=0; i<prng_vec.size(); i++)
{
prng_vec[i] = std::mt19937(this->parameters_mc->rng_seed * (i+1));
}
}

// There are up to (2^dim) phases in the algorithm, which have to be executed serially
for(int phase_c = 0; phase_c<2; phase_c++)
{
for(int phase_b = 0; phase_b<2; phase_b++)
{
for(int phase_a = 0; phase_a<2; phase_a++)
{
// In each phase we iterate over blocks of spins, which are **next-nearest** neighbour blocks
// Thus there is no dependence of the spin moves and we can parallelize over the blocks in a phase
#pragma omp parallel for collapse(3) private(distribution) reduction(+:n_rejected)
for(int block_c = phase_c; block_c < n_blocks[2]; block_c+=2)
{
for(int block_b = phase_b; block_b < n_blocks[1]; block_b+=2)
{
for(int block_a = phase_a; block_a < n_blocks[0]; block_a+=2)
{
int block_size_c = (block_c == n_blocks[2] - 1) ? block_size_min[2] + rest[2] : block_size_min[2]; // Account for the remainder of division (n_cells[i] / block_size_min[i]) by increasing the block size at the edges
int block_size_b = (block_b == n_blocks[1] - 1) ? block_size_min[1] + rest[1] : block_size_min[1];
int block_size_a = (block_a == n_blocks[0] - 1) ? block_size_min[0] + rest[0] : block_size_min[0];

// Iterate over the current block
for(int cc=0; cc < block_size_c; cc++)
{
for(int bb=0; bb < block_size_b; bb++)
{
for(int aa=0; aa < block_size_a; aa++)
{
for(int ibasis=0; ibasis < geom->n_cell_atoms; ibasis++)
{
int a = block_a * block_size_min[0] + aa; // We do not have to worry about the remainder of the division here, it is contained in the 'aa'/'bb'/'cc' offset
int b = block_b * block_size_min[1] + bb;
int c = block_c * block_size_min[2] + cc;

// Compute the current spin idx
int ispin = ibasis + geom->n_cell_atoms * ( a + geom->n_cells[0] * (b + geom->n_cells[1] * c));

// Perform the Metropolis trial step
if( Vectormath::check_atom_type(this->systems[0]->geometry->atom_types[ispin]) )
{
int tid = 0;
#ifdef SPIRIT_USE_OPENMP
tid = omp_get_thread_num();
#endif

scalar rng1 = distribution(prng_vec[tid]);
scalar rng2 = distribution(prng_vec[tid]);
scalar rng3 = distribution(prng_vec[tid]);
if(!Metropolis_Spin_Trial(ispin, spins_old, spins_new, rng1, rng2, rng3, cos_cone_angle))
{
n_rejected++;
}
}
}
}
}
}
}
}
}
}
}
}
this->n_rejected = n_rejected;
// std::cout << n_rejected << "\n";
// Log.Send(Utility::Log_Level::Error, Utility::Log_Sender::MC, fmt::format( "n_rejected = {}",n_rejected ));
}

// TODO:
// Implement heat bath algorithm, see Y. Miyatake et al, J Phys C: Solid State Phys 19, 2539 (1986)
// void Method_MC::HeatBath(const vectorfield & spins_old, vectorfield & spins_new)
Expand Down