diff --git a/src/main_gpumd/main.cu b/src/main_gpumd/main.cu index 238bd79fa..83a1a968c 100644 --- a/src/main_gpumd/main.cu +++ b/src/main_gpumd/main.cu @@ -59,7 +59,7 @@ void print_welcome_information(void) printf("***************************************************************\n"); printf("* Welcome to use GPUMD *\n"); printf("* (Graphics Processing Units Molecular Dynamics) *\n"); - printf("* Version 3.5 *\n"); + printf("* Version 3.6 *\n"); printf("* This is the gpumd executable *\n"); printf("***************************************************************\n"); printf("\n"); diff --git a/src/main_nep/dataset.cu b/src/main_nep/dataset.cu index 1bf49759a..1b04b1c9e 100644 --- a/src/main_nep/dataset.cu +++ b/src/main_nep/dataset.cu @@ -63,6 +63,17 @@ void Dataset::copy_structures(std::vector& structures_input, int n1, } } +void Dataset::find_has_type(Parameters& para) +{ + has_type.resize((para.num_types + 1) * Nc, false); + for (int n = 0; n < Nc; ++n) { + has_type[para.num_types * Nc + n] = true; + for (int na = 0; na < structures[n].num_atom; ++na) { + has_type[structures[n].type[na] * Nc + n] = true; + } + } +} + void Dataset::find_Na(Parameters& para) { Na_cpu.resize(Nc); @@ -275,6 +286,7 @@ void Dataset::construct( { CHECK(cudaSetDevice(device_id)); copy_structures(structures_input, n1, n2); + find_has_type(para); error_cpu.resize(Nc); error_gpu.resize(Nc); @@ -344,7 +356,7 @@ static __global__ void gpu_sum_force_error( } } -float Dataset::get_rmse_force(Parameters& para, const bool use_weight, int device_id) +std::vector Dataset::get_rmse_force(Parameters& para, const bool use_weight, int device_id) { CHECK(cudaSetDevice(device_id)); const int block_size = 256; @@ -354,15 +366,25 @@ float Dataset::get_rmse_force(Parameters& para, const bool use_weight, int devic force_ref_gpu.data() + N, force_ref_gpu.data() + N * 2, error_gpu.data()); int mem = sizeof(float) * Nc; CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - float error_sum = 0.0f; + + std::vector rmse_array(para.num_types + 1, 0.0f); + std::vector count_array(para.num_types + 1, 0); for (int n = 0; n < Nc; ++n) { - if (use_weight) { - error_sum += weight_cpu[n] * weight_cpu[n] * error_cpu[n]; - } else { - error_sum += error_cpu[n]; + float rmse_temp = use_weight ? weight_cpu[n] * weight_cpu[n] * error_cpu[n] : error_cpu[n]; + for (int t = 0; t < para.num_types + 1; ++t) { + if (has_type[t * Nc + n]) { + rmse_array[t] += rmse_temp; + count_array[t] += Na_cpu[n]; + } + } + } + + for (int t = 0; t <= para.num_types; ++t) { + if (count_array[t] > 0) { + rmse_array[t] = sqrt(rmse_array[t] / (count_array[t] * 3)); } } - return sqrt(error_sum / (N * 3)); + return rmse_array; } static __global__ void @@ -437,8 +459,12 @@ static __global__ void gpu_sum_pe_error( } } -float Dataset::get_rmse_energy( - float& energy_shift_per_structure, const bool use_weight, const bool do_shift, int device_id) +std::vector Dataset::get_rmse_energy( + Parameters& para, + float& energy_shift_per_structure, + const bool use_weight, + const bool do_shift, + int device_id) { CHECK(cudaSetDevice(device_id)); energy_shift_per_structure = 0.0f; @@ -460,106 +486,112 @@ float Dataset::get_rmse_energy( energy_shift_per_structure, Na.data(), Na_sum.data(), energy.data(), energy_ref_gpu.data(), error_gpu.data()); CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - float error_ave = 0.0f; + + std::vector rmse_array(para.num_types + 1, 0.0f); + std::vector count_array(para.num_types + 1, 0); for (int n = 0; n < Nc; ++n) { - if (use_weight) { - error_ave += weight_cpu[n] * weight_cpu[n] * error_cpu[n]; - } else { - error_ave += error_cpu[n]; + float rmse_temp = use_weight ? weight_cpu[n] * weight_cpu[n] * error_cpu[n] : error_cpu[n]; + for (int t = 0; t < para.num_types + 1; ++t) { + if (has_type[t * Nc + n]) { + rmse_array[t] += rmse_temp; + ++count_array[t]; + } } } - return sqrt(error_ave / Nc); + for (int t = 0; t <= para.num_types; ++t) { + if (count_array[t] > 0) { + rmse_array[t] = sqrt(rmse_array[t] / count_array[t]); + } + } + return rmse_array; } -float Dataset::get_rmse_virial(Parameters& para, const bool use_weight, int device_id) +static __global__ void gpu_sum_virial_error( + const int N, + const float shear_weight, + int* g_Na, + int* g_Na_sum, + float* g_virial, + float* g_virial_ref, + float* error_gpu) { - CHECK(cudaSetDevice(device_id)); - int num_virial_configurations = 0; - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - ++num_virial_configurations; - } - } - if (num_virial_configurations == 0) { - return 0.0f; + int tid = threadIdx.x; + int bid = blockIdx.x; + int Na = g_Na[bid]; + int N1 = g_Na_sum[bid]; + int N2 = N1 + Na; + extern __shared__ float s_virial[]; + for (int d = 0; d < 6; ++d) { + s_virial[d * blockDim.x + tid] = 0.0f; } - float error_ave = 0.0; - int mem = sizeof(float) * Nc; - - const int block_size = 256; + for (int n = N1 + tid; n < N2; n += blockDim.x) { + for (int d = 0; d < 6; ++d) { + s_virial[d * blockDim.x + tid] += g_virial[d * N + n]; + } + } + __syncthreads(); - gpu_sum_pe_error<<>>( - 0.0f, Na.data(), Na_sum.data(), virial.data(), virial_ref_gpu.data(), error_gpu.data()); - CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - float total_weight = use_weight ? weight_cpu[n] * weight_cpu[n] : 1.0f; - error_ave += total_weight * error_cpu[n]; + for (int offset = blockDim.x >> 1; offset > 32; offset >>= 1) { + if (tid < offset) { + for (int d = 0; d < 6; ++d) { + s_virial[d * blockDim.x + tid] += s_virial[d * blockDim.x + tid + offset]; + } } + __syncthreads(); } - gpu_sum_pe_error<<>>( - 0.0f, Na.data(), Na_sum.data(), virial.data() + N, virial_ref_gpu.data() + Nc, - error_gpu.data()); - CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - float total_weight = use_weight ? weight_cpu[n] * weight_cpu[n] : 1.0f; - error_ave += total_weight * error_cpu[n]; + for (int offset = 32; offset > 0; offset >>= 1) { + if (tid < offset) { + for (int d = 0; d < 6; ++d) { + s_virial[d * blockDim.x + tid] += s_virial[d * blockDim.x + tid + offset]; + } } + __syncwarp(); } - gpu_sum_pe_error<<>>( - 0.0f, Na.data(), Na_sum.data(), virial.data() + N * 2, virial_ref_gpu.data() + Nc * 2, - error_gpu.data()); - CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - float total_weight = use_weight ? weight_cpu[n] * weight_cpu[n] : 1.0f; - error_ave += total_weight * error_cpu[n]; + if (tid == 0) { + float error_sum = 0.0f; + for (int d = 0; d < 6; ++d) { + float diff = s_virial[d * blockDim.x + 0] / Na - g_virial_ref[d * gridDim.x + bid]; + error_sum += (d >= 3) ? (shear_weight * diff * diff) : (diff * diff); } + error_gpu[bid] = error_sum; } +} - if (para.train_mode != 1) { +std::vector Dataset::get_rmse_virial(Parameters& para, const bool use_weight, int device_id) +{ + CHECK(cudaSetDevice(device_id)); - gpu_sum_pe_error<<>>( - 0.0f, Na.data(), Na_sum.data(), virial.data() + N * 3, virial_ref_gpu.data() + Nc * 3, - error_gpu.data()); - CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - float total_weight = - use_weight ? weight_cpu[n] * weight_cpu[n] * para.lambda_shear * para.lambda_shear : 1.0f; - error_ave += total_weight * error_cpu[n]; - } - } + std::vector rmse_array(para.num_types + 1, 0.0f); + std::vector count_array(para.num_types + 1, 0); - gpu_sum_pe_error<<>>( - 0.0f, Na.data(), Na_sum.data(), virial.data() + N * 4, virial_ref_gpu.data() + Nc * 4, - error_gpu.data()); - CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - float total_weight = - use_weight ? weight_cpu[n] * weight_cpu[n] * para.lambda_shear * para.lambda_shear : 1.0f; - error_ave += total_weight * error_cpu[n]; - } - } + int mem = sizeof(float) * Nc; + const int block_size = 256; - gpu_sum_pe_error<<>>( - 0.0f, Na.data(), Na_sum.data(), virial.data() + N * 5, virial_ref_gpu.data() + Nc * 5, - error_gpu.data()); - CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); - for (int n = 0; n < Nc; ++n) { - if (structures[n].has_virial) { - float total_weight = - use_weight ? weight_cpu[n] * weight_cpu[n] * para.lambda_shear * para.lambda_shear : 1.0f; - error_ave += total_weight * error_cpu[n]; + float shear_weight = + (para.train_mode != 1) ? (use_weight ? para.lambda_shear * para.lambda_shear : 1.0f) : 0.0f; + gpu_sum_virial_error<<>>( + N, shear_weight, Na.data(), Na_sum.data(), virial.data(), virial_ref_gpu.data(), + error_gpu.data()); + CHECK(cudaMemcpy(error_cpu.data(), error_gpu.data(), mem, cudaMemcpyDeviceToHost)); + for (int n = 0; n < Nc; ++n) { + if (structures[n].has_virial) { + float rmse_temp = use_weight ? weight_cpu[n] * weight_cpu[n] * error_cpu[n] : error_cpu[n]; + for (int t = 0; t < para.num_types + 1; ++t) { + if (has_type[t * Nc + n]) { + rmse_array[t] += rmse_temp; + count_array[t] += (para.train_mode != 1) ? 6 : 3; + } } } } - int num_components = (para.train_mode == 1) ? 3 : 6; - return sqrt(error_ave / (num_virial_configurations * num_components)); + for (int t = 0; t <= para.num_types; ++t) { + if (count_array[t] > 0) { + rmse_array[t] = sqrt(rmse_array[t] / count_array[t]); + } + } + return rmse_array; } diff --git a/src/main_nep/dataset.cuh b/src/main_nep/dataset.cuh index eea14a981..63ff7cb8e 100644 --- a/src/main_nep/dataset.cuh +++ b/src/main_nep/dataset.cuh @@ -59,17 +59,24 @@ public: std::vector error_cpu; // error in energy, virial, or force GPU_Vector error_gpu; // error in energy, virial, or force + std::vector has_type; + std::vector structures; void construct(Parameters& para, std::vector& structures, int n1, int n2, int device_id); - float get_rmse_force(Parameters& para, const bool use_weight, int device_id); - float get_rmse_energy( - float& energy_shift_per_structure, const bool use_weight, const bool do_shift, int device_id); - float get_rmse_virial(Parameters& para, const bool use_weight, int device_id); + std::vector get_rmse_force(Parameters& para, const bool use_weight, int device_id); + std::vector get_rmse_energy( + Parameters& para, + float& energy_shift_per_structure, + const bool use_weight, + const bool do_shift, + int device_id); + std::vector get_rmse_virial(Parameters& para, const bool use_weight, int device_id); private: void copy_structures(std::vector& structures_input, int n1, int n2); + void find_has_type(Parameters& para); void find_Na(Parameters& para); void initialize_gpu_data(Parameters& para); void find_neighbor(Parameters& para); diff --git a/src/main_nep/fitness.cu b/src/main_nep/fitness.cu index 2ce8758ed..0b17ef86a 100644 --- a/src/main_nep/fitness.cu +++ b/src/main_nep/fitness.cu @@ -152,13 +152,19 @@ void Fitness::compute( potential->find_force(para, individual, train_set[batch_id], false, deviceCount); for (int m = 0; m < deviceCount; ++m) { float energy_shift_per_structure_not_used; - fitness[deviceCount * n + m + 0 * para.population_size] = - para.lambda_e * train_set[batch_id][m].get_rmse_energy( - energy_shift_per_structure_not_used, true, true, m); - fitness[deviceCount * n + m + 1 * para.population_size] = - para.lambda_f * train_set[batch_id][m].get_rmse_force(para, true, m); - fitness[deviceCount * n + m + 2 * para.population_size] = - para.lambda_v * train_set[batch_id][m].get_rmse_virial(para, true, m); + auto rmse_energy_array = train_set[batch_id][m].get_rmse_energy( + para, energy_shift_per_structure_not_used, true, true, m); + auto rmse_force_array = train_set[batch_id][m].get_rmse_force(para, true, m); + auto rmse_virial_array = train_set[batch_id][m].get_rmse_virial(para, true, m); + + for (int t = 0; t <= para.num_types; ++t) { + fitness[deviceCount * n + m + (6 * t + 3) * para.population_size] = + para.lambda_e * rmse_energy_array[t]; + fitness[deviceCount * n + m + (6 * t + 4) * para.population_size] = + para.lambda_f * rmse_force_array[t]; + fitness[deviceCount * n + m + (6 * t + 5) * para.population_size] = + para.lambda_v * rmse_virial_array[t]; + } } } } @@ -239,10 +245,14 @@ void Fitness::report_error( int batch_id = generation % num_batches; potential->find_force(para, elite, train_set[batch_id], false, 1); float energy_shift_per_structure; - float rmse_energy_train = - train_set[batch_id][0].get_rmse_energy(energy_shift_per_structure, false, true, 0); - float rmse_force_train = train_set[batch_id][0].get_rmse_force(para, false, 0); - float rmse_virial_train = train_set[batch_id][0].get_rmse_virial(para, false, 0); + auto rmse_energy_train_array = + train_set[batch_id][0].get_rmse_energy(para, energy_shift_per_structure, false, true, 0); + auto rmse_force_train_array = train_set[batch_id][0].get_rmse_force(para, false, 0); + auto rmse_virial_train_array = train_set[batch_id][0].get_rmse_virial(para, false, 0); + + float rmse_energy_train = rmse_energy_train_array.back(); + float rmse_force_train = rmse_force_train_array.back(); + float rmse_virial_train = rmse_virial_train_array.back(); // correct the last bias parameter in the NN if (para.train_mode == 0) { @@ -255,10 +265,13 @@ void Fitness::report_error( if (has_test_set) { potential->find_force(para, elite, test_set, false, 1); float energy_shift_per_structure_not_used; - rmse_energy_test = - test_set[0].get_rmse_energy(energy_shift_per_structure_not_used, false, false, 0); - rmse_force_test = test_set[0].get_rmse_force(para, false, 0); - rmse_virial_test = test_set[0].get_rmse_virial(para, false, 0); + auto rmse_energy_test_array = + test_set[0].get_rmse_energy(para, energy_shift_per_structure_not_used, false, false, 0); + auto rmse_force_test_array = test_set[0].get_rmse_force(para, false, 0); + auto rmse_virial_test_array = test_set[0].get_rmse_virial(para, false, 0); + rmse_energy_test = rmse_energy_test_array.back(); + rmse_force_test = rmse_force_test_array.back(); + rmse_virial_test = rmse_virial_test_array.back(); } FILE* fid_nep = my_fopen("nep.txt", "w"); diff --git a/src/main_nep/main.cu b/src/main_nep/main.cu index 3a82966da..6018cb643 100644 --- a/src/main_nep/main.cu +++ b/src/main_nep/main.cu @@ -70,7 +70,7 @@ void print_welcome_information(void) printf("***************************************************************\n"); printf("* Welcome to use GPUMD *\n"); printf("* (Graphics Processing Units Molecular Dynamics) *\n"); - printf("* Version 3.5 *\n"); + printf("* Version 3.6 *\n"); printf("* This is the nep executable *\n"); printf("***************************************************************\n"); printf("\n"); diff --git a/src/main_nep/snes.cu b/src/main_nep/snes.cu index f84a0be92..8b176d555 100644 --- a/src/main_nep/snes.cu +++ b/src/main_nep/snes.cu @@ -38,19 +38,18 @@ SNES::SNES(Parameters& para, Fitness* fitness_function) population_size = para.population_size; eta_sigma = (3.0f + std::log(number_of_variables * 1.0f)) / (5.0f * sqrt(number_of_variables * 1.0f)) / 2.0f; - fitness.resize(population_size * 6); - fitness_copy.resize(population_size * 6); - index.resize(population_size); + fitness.resize(population_size * 6 * (para.num_types + 1)); + index.resize(population_size * (para.num_types + 1)); population.resize(population_size * number_of_variables); - population_copy.resize(population_size * number_of_variables); s.resize(population_size * number_of_variables); - s_copy.resize(population_size * number_of_variables); mu.resize(number_of_variables); sigma.resize(number_of_variables); utility.resize(population_size); + type_of_variable.resize(number_of_variables, para.num_types); initialize_rng(); initialize_mu_and_sigma(para); calculate_utility(); + find_type_of_variable(para); compute(para, fitness_function); } @@ -93,6 +92,75 @@ void SNES::calculate_utility() } } +void SNES::find_type_of_variable(Parameters& para) +{ + int offset = 0; + + // NN part + if (para.version == 4) { + int num_ann = (para.train_mode == 2) ? 2 : 1; + for (int ann = 0; ann < num_ann; ++ann) { + for (int t = 0; t < para.num_types; ++t) { + for (int n = 0; n < (para.dim + 2) * para.num_neurons1; ++n) { + type_of_variable[n + offset] = t; + } + offset += (para.dim + 2) * para.num_neurons1; + } + ++offset; // the bias + } + } else { + offset += (para.dim + 2) * para.num_neurons1 + 1; + } + + // descriptor part + if (para.version == 2) { + if (para.num_types > 1) { + for (int n = 0; n <= para.n_max_radial; ++n) { + for (int t1 = 0; t1 < para.num_types; ++t1) { + for (int t2 = 0; t2 < para.num_types; ++t2) { + int t12 = t1 * para.num_types + t2; + type_of_variable[n * para.num_types * para.num_types + t12 + offset] = t1; + } + } + } + offset += (para.n_max_radial + 1) * para.num_types * para.num_types; + for (int n = 0; n <= para.n_max_angular; ++n) { + for (int t1 = 0; t1 < para.num_types; ++t1) { + for (int t2 = 0; t2 < para.num_types; ++t2) { + int t12 = t1 * para.num_types + t2; + type_of_variable[n * para.num_types * para.num_types + t12 + offset] = t1; + } + } + } + } + } else { + for (int n = 0; n <= para.n_max_radial; ++n) { + for (int k = 0; k <= para.basis_size_radial; ++k) { + int nk = n * (para.basis_size_radial + 1) + k; + for (int t1 = 0; t1 < para.num_types; ++t1) { + for (int t2 = 0; t2 < para.num_types; ++t2) { + int t12 = t1 * para.num_types + t2; + type_of_variable[nk * para.num_types * para.num_types + t12 + offset] = t1; + } + } + } + } + offset += + (para.n_max_radial + 1) * (para.basis_size_radial + 1) * para.num_types * para.num_types; + for (int n = 0; n <= para.n_max_angular; ++n) { + for (int k = 0; k <= para.basis_size_angular; ++k) { + int nk = n * (para.basis_size_angular + 1) + k; + for (int t1 = 0; t1 < para.num_types; ++t1) { + for (int t2 = 0; t2 < para.num_types; ++t2) { + int t12 = t1 * para.num_types + t2; + type_of_variable[nk * para.num_types * para.num_types + t12 + offset] = t1; + } + } + } + } + } +} + void SNES::compute(Parameters& para, Fitness* fitness_function) { @@ -122,12 +190,19 @@ void SNES::compute(Parameters& para, Fitness* fitness_function) if (para.prediction == 0) { for (int n = 0; n < maximum_generation; ++n) { create_population(para); - fitness_function->compute(n, para, population.data(), fitness.data() + 3 * population_size); + fitness_function->compute(n, para, population.data(), fitness.data()); + regularize(para); - sort_population(); + sort_population(para); + + int best_index = index[para.num_types * population_size]; + float fitness_total = fitness[0 + (6 * para.num_types + 0) * population_size]; + float fitness_L1 = fitness[best_index + (6 * para.num_types + 1) * population_size]; + float fitness_L2 = fitness[best_index + (6 * para.num_types + 2) * population_size]; fitness_function->report_error( - para, n, fitness[0 + 0 * population_size], fitness[0 + 1 * population_size], - fitness[0 + 2 * population_size], population.data()); + para, n, fitness_total, fitness_L1, fitness_L2, + population.data() + number_of_variables * best_index); + update_mu_and_sigma(); if (0 == (n + 1) % 100) { output_mu_and_sigma(para); @@ -174,12 +249,14 @@ void SNES::regularize(Parameters& para) if (para.lambda_1 < 0.0f || para.lambda_2 < 0.0f) { float auto_reg = 1.0e30f; for (int p = 0; p < population_size; ++p) { - float temp = fitness[p + 3 * population_size] + fitness[p + 4 * population_size] + - fitness[p + 5 * population_size]; + float temp = fitness[p + (6 * para.num_types + 3) * population_size] + + fitness[p + (6 * para.num_types + 4) * population_size] + + fitness[p + (6 * para.num_types + 5) * population_size]; if (auto_reg > temp) { auto_reg = temp; } } + auto_reg *= 0.4f; if (para.lambda_1 < 0.0f) { lambda_1 = auto_reg; } @@ -195,12 +272,17 @@ void SNES::regularize(Parameters& para) cost_L1 += std::abs(population[pv]); cost_L2 += population[pv] * population[pv]; } + cost_L1 *= lambda_1 / number_of_variables; cost_L2 = lambda_2 * sqrt(cost_L2 / number_of_variables); - fitness[p] = cost_L1 + cost_L2 + fitness[p + 3 * population_size] + - fitness[p + 4 * population_size] + fitness[p + 5 * population_size]; - fitness[p + 1 * population_size] = cost_L1; - fitness[p + 2 * population_size] = cost_L2; + + for (int t = 0; t <= para.num_types; ++t) { + fitness[p + (6 * t + 0) * population_size] = + cost_L1 + cost_L2 + fitness[p + (6 * t + 3) * population_size] + + fitness[p + (6 * t + 4) * population_size] + fitness[p + (6 * t + 5) * population_size]; + fitness[p + (6 * t + 1) * population_size] = cost_L1; + fitness[p + (6 * t + 2) * population_size] = cost_L2; + } } } @@ -219,40 +301,26 @@ static void insertion_sort(float array[], int index[], int n) } } -void SNES::sort_population() +void SNES::sort_population(Parameters& para) { - for (int n = 0; n < population_size; ++n) { - index[n] = n; - } - insertion_sort(fitness.data(), index.data(), population_size); - for (int n = 0; n < population_size * number_of_variables; ++n) { - s_copy[n] = s[n]; - population_copy[n] = population[n]; - } - for (int n = 0; n < population_size; ++n) { - for (int k = 1; k < 6; ++k) { - fitness_copy[n + k * population_size] = fitness[n + k * population_size]; - } - } - for (int n = 0; n < population_size; ++n) { - int n1 = n * number_of_variables; - int n2 = index[n] * number_of_variables; - for (int m = 0; m < number_of_variables; ++m) { - s[n1 + m] = s_copy[n2 + m]; - population[n1 + m] = population_copy[n2 + m]; - } - for (int k = 1; k < 6; ++k) { - fitness[n + k * population_size] = fitness_copy[index[n] + k * population_size]; + for (int t = 0; t < para.num_types + 1; ++t) { + for (int n = 0; n < population_size; ++n) { + index[t * population_size + n] = n; } + + insertion_sort( + fitness.data() + t * population_size * 6, index.data() + t * population_size, + population_size); } } void SNES::update_mu_and_sigma() { for (int v = 0; v < number_of_variables; ++v) { + int type = type_of_variable[v]; float gradient_mu = 0.0f, gradient_sigma = 0.0f; for (int p = 0; p < population_size; ++p) { - int pv = p * number_of_variables + v; + int pv = index[type * population_size + p] * number_of_variables + v; gradient_mu += s[pv] * utility[p]; gradient_sigma += (s[pv] * s[pv] - 1.0f) * utility[p]; } diff --git a/src/main_nep/snes.cuh b/src/main_nep/snes.cuh index 66b6908a9..a711618c0 100644 --- a/src/main_nep/snes.cuh +++ b/src/main_nep/snes.cuh @@ -32,21 +32,20 @@ protected: float eta_sigma = 0.1f; std::vector index; std::vector fitness; - std::vector fitness_copy; std::vector population; - std::vector population_copy; std::vector mu; std::vector sigma; std::vector utility; std::vector s; - std::vector s_copy; + std::vector type_of_variable; void initialize_rng(); void initialize_mu_and_sigma(Parameters& para); void calculate_utility(); + void find_type_of_variable(Parameters& para); void compute(Parameters&, Fitness*); void create_population(Parameters&); void regularize(Parameters&); - void sort_population(); + void sort_population(Parameters& para); void update_mu_and_sigma(); void output_mu_and_sigma(Parameters& para); };