Skip to content

Commit

Permalink
Merge pull request #361 from brucefan1983/multiple_loss
Browse files Browse the repository at this point in the history
Multiple loss
  • Loading branch information
brucefan1983 authored Jan 15, 2023
2 parents 0afe890 + b3b57d2 commit 51a52b7
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 150 deletions.
2 changes: 1 addition & 1 deletion src/main_gpumd/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void print_welcome_information(void)
printf("***************************************************************\n");
printf("* Welcome to use GPUMD *\n");
printf("* (Graphics Processing Units Molecular Dynamics) *\n");
printf("* Version 3.5 *\n");
printf("* Version 3.6 *\n");
printf("* This is the gpumd executable *\n");
printf("***************************************************************\n");
printf("\n");
Expand Down
204 changes: 118 additions & 86 deletions src/main_nep/dataset.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ void Dataset::copy_structures(std::vector<Structure>& structures_input, int n1,
}
}

void Dataset::find_has_type(Parameters& para)
{
has_type.resize((para.num_types + 1) * Nc, false);
for (int n = 0; n < Nc; ++n) {
has_type[para.num_types * Nc + n] = true;
for (int na = 0; na < structures[n].num_atom; ++na) {
has_type[structures[n].type[na] * Nc + n] = true;
}
}
}

void Dataset::find_Na(Parameters& para)
{
Na_cpu.resize(Nc);
Expand Down Expand Up @@ -275,6 +286,7 @@ void Dataset::construct(
{
CHECK(cudaSetDevice(device_id));
copy_structures(structures_input, n1, n2);
find_has_type(para);
error_cpu.resize(Nc);
error_gpu.resize(Nc);

Expand Down Expand Up @@ -344,7 +356,7 @@ static __global__ void gpu_sum_force_error(
}
}

float Dataset::get_rmse_force(Parameters& para, const bool use_weight, int device_id)
std::vector<float> Dataset::get_rmse_force(Parameters& para, const bool use_weight, int device_id)
{
CHECK(cudaSetDevice(device_id));
const int block_size = 256;
Expand All @@ -354,15 +366,25 @@ float Dataset::get_rmse_force(Parameters& para, const bool use_weight, int devic
force_ref_gpu.data() + N, force_ref_gpu.data() + N * 2, error_gpu.data());
int mem = sizeof(float) * Nc;
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
float error_sum = 0.0f;

std::vector<float> rmse_array(para.num_types + 1, 0.0f);
std::vector<int> count_array(para.num_types + 1, 0);
for (int n = 0; n < Nc; ++n) {
if (use_weight) {
error_sum += weight_cpu[n] * weight_cpu[n] * error_cpu[n];
} else {
error_sum += error_cpu[n];
float rmse_temp = use_weight ? weight_cpu[n] * weight_cpu[n] * error_cpu[n] : error_cpu[n];
for (int t = 0; t < para.num_types + 1; ++t) {
if (has_type[t * Nc + n]) {
rmse_array[t] += rmse_temp;
count_array[t] += Na_cpu[n];
}
}
}

for (int t = 0; t <= para.num_types; ++t) {
if (count_array[t] > 0) {
rmse_array[t] = sqrt(rmse_array[t] / (count_array[t] * 3));
}
}
return sqrt(error_sum / (N * 3));
return rmse_array;
}

static __global__ void
Expand Down Expand Up @@ -437,8 +459,12 @@ static __global__ void gpu_sum_pe_error(
}
}

float Dataset::get_rmse_energy(
float& energy_shift_per_structure, const bool use_weight, const bool do_shift, int device_id)
std::vector<float> Dataset::get_rmse_energy(
Parameters& para,
float& energy_shift_per_structure,
const bool use_weight,
const bool do_shift,
int device_id)
{
CHECK(cudaSetDevice(device_id));
energy_shift_per_structure = 0.0f;
Expand All @@ -460,106 +486,112 @@ float Dataset::get_rmse_energy(
energy_shift_per_structure, Na.data(), Na_sum.data(), energy.data(), energy_ref_gpu.data(),
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
float error_ave = 0.0f;

std::vector<float> rmse_array(para.num_types + 1, 0.0f);
std::vector<int> count_array(para.num_types + 1, 0);
for (int n = 0; n < Nc; ++n) {
if (use_weight) {
error_ave += weight_cpu[n] * weight_cpu[n] * error_cpu[n];
} else {
error_ave += error_cpu[n];
float rmse_temp = use_weight ? weight_cpu[n] * weight_cpu[n] * error_cpu[n] : error_cpu[n];
for (int t = 0; t < para.num_types + 1; ++t) {
if (has_type[t * Nc + n]) {
rmse_array[t] += rmse_temp;
++count_array[t];
}
}
}
return sqrt(error_ave / Nc);
for (int t = 0; t <= para.num_types; ++t) {
if (count_array[t] > 0) {
rmse_array[t] = sqrt(rmse_array[t] / count_array[t]);
}
}
return rmse_array;
}

float Dataset::get_rmse_virial(Parameters& para, const bool use_weight, int device_id)
static __global__ void gpu_sum_virial_error(
const int N,
const float shear_weight,
int* g_Na,
int* g_Na_sum,
float* g_virial,
float* g_virial_ref,
float* error_gpu)
{
CHECK(cudaSetDevice(device_id));
int num_virial_configurations = 0;
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
++num_virial_configurations;
}
}
if (num_virial_configurations == 0) {
return 0.0f;
int tid = threadIdx.x;
int bid = blockIdx.x;
int Na = g_Na[bid];
int N1 = g_Na_sum[bid];
int N2 = N1 + Na;
extern __shared__ float s_virial[];
for (int d = 0; d < 6; ++d) {
s_virial[d * blockDim.x + tid] = 0.0f;
}

float error_ave = 0.0;
int mem = sizeof(float) * Nc;

const int block_size = 256;
for (int n = N1 + tid; n < N2; n += blockDim.x) {
for (int d = 0; d < 6; ++d) {
s_virial[d * blockDim.x + tid] += g_virial[d * N + n];
}
}
__syncthreads();

gpu_sum_pe_error<<<Nc, block_size, sizeof(float) * block_size>>>(
0.0f, Na.data(), Na_sum.data(), virial.data(), virial_ref_gpu.data(), error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float total_weight = use_weight ? weight_cpu[n] * weight_cpu[n] : 1.0f;
error_ave += total_weight * error_cpu[n];
for (int offset = blockDim.x >> 1; offset > 32; offset >>= 1) {
if (tid < offset) {
for (int d = 0; d < 6; ++d) {
s_virial[d * blockDim.x + tid] += s_virial[d * blockDim.x + tid + offset];
}
}
__syncthreads();
}

gpu_sum_pe_error<<<Nc, block_size, sizeof(float) * block_size>>>(
0.0f, Na.data(), Na_sum.data(), virial.data() + N, virial_ref_gpu.data() + Nc,
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float total_weight = use_weight ? weight_cpu[n] * weight_cpu[n] : 1.0f;
error_ave += total_weight * error_cpu[n];
for (int offset = 32; offset > 0; offset >>= 1) {
if (tid < offset) {
for (int d = 0; d < 6; ++d) {
s_virial[d * blockDim.x + tid] += s_virial[d * blockDim.x + tid + offset];
}
}
__syncwarp();
}

gpu_sum_pe_error<<<Nc, block_size, sizeof(float) * block_size>>>(
0.0f, Na.data(), Na_sum.data(), virial.data() + N * 2, virial_ref_gpu.data() + Nc * 2,
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float total_weight = use_weight ? weight_cpu[n] * weight_cpu[n] : 1.0f;
error_ave += total_weight * error_cpu[n];
if (tid == 0) {
float error_sum = 0.0f;
for (int d = 0; d < 6; ++d) {
float diff = s_virial[d * blockDim.x + 0] / Na - g_virial_ref[d * gridDim.x + bid];
error_sum += (d >= 3) ? (shear_weight * diff * diff) : (diff * diff);
}
error_gpu[bid] = error_sum;
}
}

if (para.train_mode != 1) {
std::vector<float> Dataset::get_rmse_virial(Parameters& para, const bool use_weight, int device_id)
{
CHECK(cudaSetDevice(device_id));

gpu_sum_pe_error<<<Nc, block_size, sizeof(float) * block_size>>>(
0.0f, Na.data(), Na_sum.data(), virial.data() + N * 3, virial_ref_gpu.data() + Nc * 3,
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float total_weight =
use_weight ? weight_cpu[n] * weight_cpu[n] * para.lambda_shear * para.lambda_shear : 1.0f;
error_ave += total_weight * error_cpu[n];
}
}
std::vector<float> rmse_array(para.num_types + 1, 0.0f);
std::vector<int> count_array(para.num_types + 1, 0);

gpu_sum_pe_error<<<Nc, block_size, sizeof(float) * block_size>>>(
0.0f, Na.data(), Na_sum.data(), virial.data() + N * 4, virial_ref_gpu.data() + Nc * 4,
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float total_weight =
use_weight ? weight_cpu[n] * weight_cpu[n] * para.lambda_shear * para.lambda_shear : 1.0f;
error_ave += total_weight * error_cpu[n];
}
}
int mem = sizeof(float) * Nc;
const int block_size = 256;

gpu_sum_pe_error<<<Nc, block_size, sizeof(float) * block_size>>>(
0.0f, Na.data(), Na_sum.data(), virial.data() + N * 5, virial_ref_gpu.data() + Nc * 5,
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float total_weight =
use_weight ? weight_cpu[n] * weight_cpu[n] * para.lambda_shear * para.lambda_shear : 1.0f;
error_ave += total_weight * error_cpu[n];
float shear_weight =
(para.train_mode != 1) ? (use_weight ? para.lambda_shear * para.lambda_shear : 1.0f) : 0.0f;
gpu_sum_virial_error<<<Nc, block_size, sizeof(float) * block_size * 6>>>(
N, shear_weight, Na.data(), Na_sum.data(), virial.data(), virial_ref_gpu.data(),
error_gpu.data());
CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost));
for (int n = 0; n < Nc; ++n) {
if (structures[n].has_virial) {
float rmse_temp = use_weight ? weight_cpu[n] * weight_cpu[n] * error_cpu[n] : error_cpu[n];
for (int t = 0; t < para.num_types + 1; ++t) {
if (has_type[t * Nc + n]) {
rmse_array[t] += rmse_temp;
count_array[t] += (para.train_mode != 1) ? 6 : 3;
}
}
}
}

int num_components = (para.train_mode == 1) ? 3 : 6;
return sqrt(error_ave / (num_virial_configurations * num_components));
for (int t = 0; t <= para.num_types; ++t) {
if (count_array[t] > 0) {
rmse_array[t] = sqrt(rmse_array[t] / count_array[t]);
}
}
return rmse_array;
}
15 changes: 11 additions & 4 deletions src/main_nep/dataset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,24 @@ public:
std::vector<float> error_cpu; // error in energy, virial, or force
GPU_Vector<float> error_gpu; // error in energy, virial, or force

std::vector<bool> has_type;

std::vector<Structure> structures;

void
construct(Parameters& para, std::vector<Structure>& structures, int n1, int n2, int device_id);
float get_rmse_force(Parameters& para, const bool use_weight, int device_id);
float get_rmse_energy(
float& energy_shift_per_structure, const bool use_weight, const bool do_shift, int device_id);
float get_rmse_virial(Parameters& para, const bool use_weight, int device_id);
std::vector<float> get_rmse_force(Parameters& para, const bool use_weight, int device_id);
std::vector<float> get_rmse_energy(
Parameters& para,
float& energy_shift_per_structure,
const bool use_weight,
const bool do_shift,
int device_id);
std::vector<float> get_rmse_virial(Parameters& para, const bool use_weight, int device_id);

private:
void copy_structures(std::vector<Structure>& structures_input, int n1, int n2);
void find_has_type(Parameters& para);
void find_Na(Parameters& para);
void initialize_gpu_data(Parameters& para);
void find_neighbor(Parameters& para);
Expand Down
43 changes: 28 additions & 15 deletions src/main_nep/fitness.cu
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,19 @@ void Fitness::compute(
potential->find_force(para, individual, train_set[batch_id], false, deviceCount);
for (int m = 0; m < deviceCount; ++m) {
float energy_shift_per_structure_not_used;
fitness[deviceCount * n + m + 0 * para.population_size] =
para.lambda_e * train_set[batch_id][m].get_rmse_energy(
energy_shift_per_structure_not_used, true, true, m);
fitness[deviceCount * n + m + 1 * para.population_size] =
para.lambda_f * train_set[batch_id][m].get_rmse_force(para, true, m);
fitness[deviceCount * n + m + 2 * para.population_size] =
para.lambda_v * train_set[batch_id][m].get_rmse_virial(para, true, m);
auto rmse_energy_array = train_set[batch_id][m].get_rmse_energy(
para, energy_shift_per_structure_not_used, true, true, m);
auto rmse_force_array = train_set[batch_id][m].get_rmse_force(para, true, m);
auto rmse_virial_array = train_set[batch_id][m].get_rmse_virial(para, true, m);

for (int t = 0; t <= para.num_types; ++t) {
fitness[deviceCount * n + m + (6 * t + 3) * para.population_size] =
para.lambda_e * rmse_energy_array[t];
fitness[deviceCount * n + m + (6 * t + 4) * para.population_size] =
para.lambda_f * rmse_force_array[t];
fitness[deviceCount * n + m + (6 * t + 5) * para.population_size] =
para.lambda_v * rmse_virial_array[t];
}
}
}
}
Expand Down Expand Up @@ -239,10 +245,14 @@ void Fitness::report_error(
int batch_id = generation % num_batches;
potential->find_force(para, elite, train_set[batch_id], false, 1);
float energy_shift_per_structure;
float rmse_energy_train =
train_set[batch_id][0].get_rmse_energy(energy_shift_per_structure, false, true, 0);
float rmse_force_train = train_set[batch_id][0].get_rmse_force(para, false, 0);
float rmse_virial_train = train_set[batch_id][0].get_rmse_virial(para, false, 0);
auto rmse_energy_train_array =
train_set[batch_id][0].get_rmse_energy(para, energy_shift_per_structure, false, true, 0);
auto rmse_force_train_array = train_set[batch_id][0].get_rmse_force(para, false, 0);
auto rmse_virial_train_array = train_set[batch_id][0].get_rmse_virial(para, false, 0);

float rmse_energy_train = rmse_energy_train_array.back();
float rmse_force_train = rmse_force_train_array.back();
float rmse_virial_train = rmse_virial_train_array.back();

// correct the last bias parameter in the NN
if (para.train_mode == 0) {
Expand All @@ -255,10 +265,13 @@ void Fitness::report_error(
if (has_test_set) {
potential->find_force(para, elite, test_set, false, 1);
float energy_shift_per_structure_not_used;
rmse_energy_test =
test_set[0].get_rmse_energy(energy_shift_per_structure_not_used, false, false, 0);
rmse_force_test = test_set[0].get_rmse_force(para, false, 0);
rmse_virial_test = test_set[0].get_rmse_virial(para, false, 0);
auto rmse_energy_test_array =
test_set[0].get_rmse_energy(para, energy_shift_per_structure_not_used, false, false, 0);
auto rmse_force_test_array = test_set[0].get_rmse_force(para, false, 0);
auto rmse_virial_test_array = test_set[0].get_rmse_virial(para, false, 0);
rmse_energy_test = rmse_energy_test_array.back();
rmse_force_test = rmse_force_test_array.back();
rmse_virial_test = rmse_virial_test_array.back();
}

FILE* fid_nep = my_fopen("nep.txt", "w");
Expand Down
2 changes: 1 addition & 1 deletion src/main_nep/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void print_welcome_information(void)
printf("***************************************************************\n");
printf("* Welcome to use GPUMD *\n");
printf("* (Graphics Processing Units Molecular Dynamics) *\n");
printf("* Version 3.5 *\n");
printf("* Version 3.6 *\n");
printf("* This is the nep executable *\n");
printf("***************************************************************\n");
printf("\n");
Expand Down
Loading

0 comments on commit 51a52b7

Please sign in to comment.