improve the fitting parallelization

ACEsuit · May 24, 2024 · ec5b7ce · ec5b7ce
1 parent c92ed92
commit ec5b7ce
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 4 deletions.
diff --git a/Project.toml b/Project.toml
@@ -33,10 +33,12 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+TensorCast = "02d47bb6-7ce6-556a-be16-bb1710789e2b"
 
 [compat]
 ACE = "= 0.12.22"

diff --git a/src/fitting.jl b/src/fitting.jl
@@ -124,6 +124,38 @@ end
 
 """
 """
+# function _assemble_ls(basis::SymmetricBasis, data::T, enable_mean::Bool=false) where T<:AbstractFittingDataSet
+#     # This will be rewritten once the other code has been refactored.
+
+#     # Should `A` not be constructed using `acquire_B!`?
+
+#     n₁, n₂, n₃ = size(data)
+#     # Currently the code desires "A" to be an X×Y matrix of Nᵢ×Nⱼ matrices, where X is
+#     # the number of sub-block samples, Y is equal to `size(bos.basis.A2Bmap)[1]`, and
+#     # Nᵢ×Nⱼ is the sub-block shape; i.e. 3×3 for pp interactions. This may be refactored
+#     # at a later data if this layout is not found to be strictly necessary.
+#     cfg = ACEConfig.(data.states)
+#     Aval = evaluate.(Ref(basis), cfg)
+#     A = permutedims(reduce(hcat, _evaluate_real.(Aval)), (2, 1))
+
+#     Y = [data.values[:, :, i] for i in 1:n₃]
+
+#     # Calculate the mean value x̄
+#     if enable_mean && n₁ ≡ n₂ && ison(data) 
+#         x̄ = mean(diag(mean(Y)))*I(n₁)
+#     else
+#         x̄ = zeros(n₁, n₂)
+#     end
+
+#     Y .-= Ref(x̄)
+#     return A, Y, x̄
+
+# end
+
+
+using SharedArrays
+using Distributed
+using TensorCast
 function _assemble_ls(basis::SymmetricBasis, data::T, enable_mean::Bool=false) where T<:AbstractFittingDataSet
     # This will be rewritten once the other code has been refactored.
 
@@ -134,10 +166,31 @@ function _assemble_ls(basis::SymmetricBasis, data::T, enable_mean::Bool=false) w
     # the number of sub-block samples, Y is equal to `size(bos.basis.A2Bmap)[1]`, and
     # Nᵢ×Nⱼ is the sub-block shape; i.e. 3×3 for pp interactions. This may be refactored
     # at a later data if this layout is not found to be strictly necessary.
-    cfg = ACEConfig.(data.states)
-    Aval = evaluate.(Ref(basis), cfg)
-    A = permutedims(reduce(hcat, _evaluate_real.(Aval)), (2, 1))
-
+
+    type = ACE.valtype(basis).parameters[5]
+    Avalr = SharedArray{real(type), 4}(n₃, length(basis), n₁, n₂)
+    np = length(procs(Avalr))
+    nstates = length(data.states)
+    nstates_pp = ceil(Int, nstates/np)
+    idx_begins = [nstates_pp*(idx-1)+1 for idx in 1:np]
+    idx_ends = [nstates_pp*(idx) for idx in 1:(np-1)]
+    push!(idx_ends, nstates)
+    @sync begin
+        for (i, id) in enumerate(procs(Avalr))
+            @async begin
+                @spawnat id begin
+                    cfg = ACEConfig.(data.states[idx_begins[i]:idx_ends[i]])
+                    Aval_ele = evaluate.(Ref(basis), cfg)
+                    Avalr_ele = _evaluate_real.(Aval_ele)
+                    Avalr_ele = permutedims(reduce(hcat, Avalr_ele), (2, 1))
+                    @cast M[i,j,k,l] := Avalr_ele[i,j][k,l]
+                    Avalr[idx_begins[i]: idx_ends[i], :, :, :] = M
+                end
+            end
+        end
+    end
+    @cast A[i,j][k,l] := Avalr[i,j,k,l]
+
     Y = [data.values[:, :, i] for i in 1:n₃]
 
     # Calculate the mean value x̄