Merge pull request #76 from termi-official/master

fverdugo · web-flow · commit 8372c7ca63f3 · 2022-06-06T08:43:10.000+02:00
Add minimal example to docs.
diff --git a/Project.toml b/Project.toml
@@ -15,7 +15,7 @@ SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 [compat]
 Distances = "0.10"
 IterativeSolvers = "0.9"
-MPI = "0.16, 0.17, 0.18, 0.19"
+MPI = "0.16 - 1.0"
 SparseMatricesCSR = "0.6"
 julia = "1.1"
 
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
@@ -1,18 +1,13 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.7.1"
+julia_version = "1.7.3"
 manifest_format = "2.0"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
 uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
 version = "0.0.1"
 
-[[deps.AbstractTrees]]
-git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5"
-uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-version = "0.3.4"
-
 [[deps.ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 
@@ -52,31 +47,13 @@ git-tree-sha1 = "75c6cf9d99e0efc79b724f5566726ad3ad010a01"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 version = "0.27.12"
 
-[[deps.DocumenterTools]]
-deps = ["AbstractTrees", "Base64", "DocStringExtensions", "Documenter", "FileWatching", "Gumbo", "LibGit2", "Sass"]
-git-tree-sha1 = "d0574a2a4950fdcde104b7f3d1b34f0417fa9f10"
-uuid = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
-version = "0.1.13"
-
 [[deps.Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
 uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 
 [[deps.FileWatching]]
 uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
 
-[[deps.Gumbo]]
-deps = ["AbstractTrees", "Gumbo_jll", "Libdl"]
-git-tree-sha1 = "e711d08d896018037d6ff0ad4ebe675ca67119d4"
-uuid = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
-version = "0.8.0"
-
-[[deps.Gumbo_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "29070dee9df18d9565276d68a596854b1764aa38"
-uuid = "528830af-5a63-567c-a44a-034ed33b8444"
-version = "0.10.2+0"
-
 [[deps.IOCapture]]
 deps = ["Logging", "Random"]
 git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
@@ -187,10 +164,10 @@ uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
 version = "2.2.1"
 
 [[deps.PartitionedArrays]]
-deps = ["Distances", "DocumenterTools", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
+deps = ["Distances", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
 path = ".."
 uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
-version = "0.2.9"
+version = "0.2.10"
 
 [[deps.Pkg]]
 deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
@@ -228,12 +205,6 @@ version = "1.3.0"
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
-[[deps.Sass]]
-deps = ["libsass_jll"]
-git-tree-sha1 = "aa841c3738cec78b5dbccd56dda332710f35f6a5"
-uuid = "322a6be2-4ae8-5d68-aaf1-3e960788d1d9"
-version = "0.2.0"
-
 [[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
@@ -290,12 +261,6 @@ uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
 deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
 
-[[deps.libsass_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "663428b7ebaf60c60ee147f0f9466430e9959ad6"
-uuid = "47bcb7c8-5119-555a-9eeb-0afcc36cd728"
-version = "3.5.5+0"
-
 [[deps.nghttp2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
diff --git a/docs/make.jl b/docs/make.jl
@@ -9,6 +9,7 @@ makedocs(
     modules = [PartitionedArrays],
     pages = [
         "Home" => "index.md",
+        "Usage" => "usage.md",
         "reference.md",
     ],
 )
diff --git a/docs/src/usage.md b/docs/src/usage.md
@@ -0,0 +1,272 @@
+# Usage
+
+Distributed linear algebra frameworks are the backbone for efficient parallel
+codes in data analytics, scientific computing and machine learning. The central
+idea is that vectors and matrices can be partitioned into potentially
+overlapping chunks which are distributed across a set of workers on which we
+define the usual operations like products and norms.
+
+## Basic example
+
+In this section we take a look on solving the finite difference discretization
+of a Laplace problem in 1D over the domain [0,1]. As a reminder, the Laplace
+problem states to find function u(x) such that Δu(x) = 0 for all x ∈ [0,1].
+Without boundary conditions the problem is not well-posed, hence we introduce
+the Dirichlet condition u(0) = 1.
+
+Applying the finite difference method with length 0.25 we discretize the problem
+into linear system with 5 unkowns (u₁,...,u₅), which we call degrees of freedom:
+```math
+\frac{1}{4}
+\begin{pmatrix}
+1 &  0 &  0 &  0 &  0 \\
+0 & -2 &  1 &  0 &  0 \\
+0 &  1 & -2 &  1 &  0 \\
+0 &  0 &  1 & -2 &  1 \\
+0 &  0 &  0 &  1 & -1
+\end{pmatrix}
+\begin{pmatrix}
+u₁ \\
+u₂ \\
+u₃ \\
+u₄ \\
+u₅
+\end{pmatrix}
+=
+\begin{pmatrix}
+ 1 \\
+-1 \\
+ 0 \\
+ 0 \\
+ 0
+\end{pmatrix}
+```
+
+A detailed derivation can be found in standard numerical analysis lecture notes and books e.g. [these](https://people.sc.fsu.edu/~jburkardt/classes/math2071_2020/poisson_steady_1d/poisson_steady_1d.pdf). The linear system is then solved with
+conjugate gradients.
+
+### Commented Code
+
+To distribute the problem across two workers we have do choose a partitioning.
+Here we arbitrarily assign the first 3 columns and rows to worker 1 and the
+remaining 2 rows and columns to worker 2.
+
+First include the packages which are used.
+```julia
+using PartitionedArrays, SparseArrays, IterativeSolvers
+```
+
+We want a partitioning into 2 pieces and chose the sequential backend to handle
+the task sequentially so that the code can be executed in a standard Julia REPL (e.g., to simplify debugging).
+```julia
+np = 2
+backend = SequentialBackend()
+```
+
+Most of the codes using `PartitionedArrays` start creating a distributed object that for each part contains its part id. We call it `parts`.
+```julia
+parts = get_part_id(backend,np)
+```
+
+Now, we generate a partitioning of rows and columns. Note that the entry in row 3
+column 4 is visible to the first worker
+```julia
+neighbors, row_partitioning, col_partitioning = map_parts(parts) do part
+    if part == 1
+        (
+        Int32[2],
+        IndexSet(part, [1,2,3], Int32[1,1,1]),
+        IndexSet(part, [1,2,3,4], Int32[1,1,1,2])
+        )
+    else
+        (
+        Int32[1],
+        IndexSet(part, [3,4,5], Int32[1,2,2]),
+        IndexSet(part, [3,4,5], Int32[1,2,2])
+        )
+    end
+end
+```
+
+We create information exchangers to manage the synchronization of visible
+shared portions of the sparse matrix and the actual row/col
+```julia
+global_number_of_dofs = 5
+row_exchanger = Exchanger(row_partitioning,neighbors)
+rows = PRange(global_number_of_dofs,row_partitioning,row_exchanger)
+
+col_exchanger = Exchanger(col_partitioning,neighbors)
+cols = PRange(global_number_of_dofs,col_partitioning,col_exchanger)
+```
+
+Next we create the sparse matrix entries in COO format in their worker-local
+numbering. A note about the exact values of the sparse matrices can be found
+in the subsection below.
+```julia
+I, J, V = map_parts(parts) do part
+    if part == 1
+        (
+        [ 1, 1, 2, 2, 2, 3, 3, 3],
+        [ 1, 2, 1, 2, 3, 2, 3, 4],
+        0.25*Float64[1, 0, 0,-2, 1, 1,-1, 0]
+        )
+    else
+        (
+        [ 1, 1, 2, 2, 2, 3, 3],
+        [ 1, 2, 1, 2, 3, 2, 3],
+        0.25*Float64[-1, 1, 1,-2, 1, 1,-1])
+    end
+end
+A = PSparseMatrix(I, J, V, rows, cols, ids=:local)
+```
+
+Since the previous lines created the local prtions we have to trigger sync
+between the workers.
+```julia
+assemble!(A)
+```
+
+Construct the right hand side. Note that the first entry of the rhs of worker 2
+is shared with worker 1.
+```julia
+b = PVector{Float64}(undef, A.rows)
+map_parts(parts,local_view(b, b.rows)) do part, b_local
+    if part == 1
+        b_local .= [1.0, -1.0, 0.0]
+    else
+        b_local .= [0.0, 0.0, 0.0]
+    end
+end
+```
+
+Now the sparse matrix and right hand side of the linear system are assembled
+globally and we can solve problem with cg. With the end in the last line we
+close the parallel environment.
+```julia
+u = IterativeSolvers.cg(A,b)
+```
+
+### Parallel Code
+
+Now changing the backend to the MPI backend we can solve the problem in parallel.
+This just requires to change the line
+```julia
+backend = SequentialBackend()
+```
+to
+```julia
+backend = MPIBackend()
+```
+and including and initializing MPI. Now launching the script with MPI makes the run parallel.
+
+```sh
+$ mpirun -n 2 julia my-script.jl
+```
+
+Hence the full MPI code is given in the next code box. Note that we have used the `prun` function that automatically includes and initializes MPI for us.
+```julia
+using PartitionedArrays, SparseArrays, IterativeSolvers
+
+np = 2
+backend = MPIBackend()
+
+prun(backend,np) do parts
+    # Construct the partitioning
+    neighbors, row_partitioning, col_partitioning = map_parts(parts) do part
+        if part == 1
+            (
+            Int32[2],
+            IndexSet(part, [1,2,3], Int32[1,1,1]),
+            IndexSet(part, [1,2,3,4], Int32[1,1,1,2])
+            )
+        else
+            (
+            Int32[1],
+            IndexSet(part, [3,4,5], Int32[1,2,2]),
+            IndexSet(part, [3,4,5], Int32[1,2,2])
+            )
+        end
+    end
+
+    global_number_of_dofs = 5
+
+    row_exchanger = Exchanger(row_partitioning,neighbors)
+    rows = PRange(global_number_of_dofs,row_partitioning,row_exchanger)
+
+    col_exchanger = Exchanger(col_partitioning,neighbors)
+    cols = PRange(global_number_of_dofs,col_partitioning,col_exchanger)
+
+    # Construct the sparse matrix
+    I, J, V = map_parts(parts) do part
+      if part == 1
+          (
+          [ 1, 1, 2, 2, 2, 3, 3, 3],
+          [ 1, 2, 1, 2, 3, 2, 3, 4],
+          0.25*Float64[1, 0, 0,-2, 1, 1,-1, 0]
+          )
+      else
+          (
+          [ 1, 1, 2, 2, 2, 3, 3],
+          [ 1, 2, 1, 2, 3, 2, 3],
+          0.25*Float64[-1, 1, 1,-2, 1, 1,-1])
+      end
+    end
+    A = PSparseMatrix(I, J, V, rows, cols, ids=:local)
+    assemble!(A)
+
+    # Construct the dense right hand side
+    b = PVector{Float64}(undef, A.rows)
+    map_parts(parts,local_view(b, b.rows)) do part, b_local
+        if part == 1
+            b_local .= [1.0, -1.0, 0.0]
+        else
+            b_local .= [0.0, 0.0, 0.0]
+        end
+    end
+
+    # Solve the linear problem
+    u = IterativeSolvers.cg(A,b)
+end
+```
+
+### Note on Local Matrices
+
+It should be noted that the local matrices are constructed as if they were
+locally assembled on a process without knowledge of the remaining processes.
+Dropping the coefficient 0.25 the global and local matrices look as follows:
+
+```
+     Global Matrix
+   P1  P1  P1  P2  P2
+P1  1   0   0   0   0
+P1  0  -2   1   0   0
+P1  0   1  -2   1   0
+P2  0   0   1  -2   1
+P2  0   0   0   1  -1
+
+           =
+
+   Process 1 Portion
+   P1  P1  P1  P2  P2
+P1  1   0   0   0   0
+P1  0  -2   1   0   0
+P1  0   1  -1   0   0
+P2  x   x   x   x   x
+P2  x   x   x   x   x
+
+          +
+
+   Process 2 Portion
+   P1  P1  P1  P2  P2
+P1  x   x   x   x   x
+P1  x   x   x   x   x
+P1  0   0  -1   1   0
+P2  0   0   1  -2   1
+P2  0   0   0   1  -1
+```
+
+## Advanced example
+
+A more complex example can be found in the package [PartitionedPoisson.jl](https://github.com/fverdugo/PartitionedPoisson.jl),
+which describes the assembly of the finite element discretization of a
+Poisson problem in 3D.

Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ makedocs(`
`9`	`9`	`modules = [PartitionedArrays],`
`10`	`10`	`pages = [`
`11`	`11`	`"Home" => "index.md",`
	`12`	`+ "Usage" => "usage.md",`
`12`	`13`	`"reference.md",`
`13`	`14`	`],`
`14`	`15`	`)`