Lu matrix decomposition (#1587)

TomasPegado · polvalente · web-flow · commit 27b4026a835b · 2025-03-05T13:07:01.000-03:00
Co-authored-by: Paulo Valente &lt;16843419+polvalente@users.noreply.github.com&gt;
diff --git a/exla/lib/exla/backend.ex b/exla/lib/exla/backend.ex
@@ -381,7 +381,6 @@ defmodule EXLA.Backend do
        [:tensor, :source, :init_value]},
       {:indexed_add, [:tensor, :indices, :updates, :opts], [:tensor, :indices, :updates]},
       {:indexed_put, [:tensor, :indices, :updates, :opts], [:tensor, :indices, :updates]},
-      {:lu, [:tensor, :opts], [:tensor]},
       {:triangular_solve, [:a, :b, :opts], [:a, :b]},
       {:fft, [:tensor, :opts], [:tensor]},
       {:ifft, [:tensor, :opts], [:tensor]}
diff --git a/nx/lib/nx/backend.ex b/nx/lib/nx/backend.ex
@@ -175,6 +175,7 @@ defmodule Nx.Backend do
     top_k: 3,
     fft2: 3,
     ifft2: 3,
+    lu: 3,
     qr: 3,
     cholesky: 2,
     eigh: 3,
diff --git a/nx/lib/nx/binary_backend.ex b/nx/lib/nx/binary_backend.ex
@@ -1258,26 +1258,6 @@ defmodule Nx.BinaryBackend do
     output_batch_groups |> Enum.with_index() |> Enum.map(fn {x, i} -> {x, rem(i, groups)} end)
   end
 
-  @impl true
-  def lu(
-        {%{type: p_type} = p_holder, %{type: l_type} = l_holder, %{type: u_type} = u_holder},
-        %{type: input_type, shape: input_shape} = tensor,
-        opts
-      ) do
-    bin = to_binary(tensor)
-    rank = tuple_size(input_shape)
-    n = elem(input_shape, rank - 1)
-
-    {p, l, u} =
-      bin_batch_reduce(bin, n * n, input_type, {<<>>, <<>>, <<>>}, fn matrix,
-                                                                      {p_acc, l_acc, u_acc} ->
-        {p, l, u} = B.Matrix.lu(matrix, input_type, {n, n}, p_type, l_type, u_type, opts)
-        {p_acc <> p, l_acc <> l, u_acc <> u}
-      end)
-
-    {from_binary(p_holder, p), from_binary(l_holder, l), from_binary(u_holder, u)}
-  end
-
   @impl true
   def triangular_solve(
         %{type: output_type} = out,
@@ -2414,17 +2394,6 @@ defmodule Nx.BinaryBackend do
     bin_zip_reduce_axis(rest1, rest2, s1, s2, bin, acc, fun)
   end
 
-  defp bin_batch_reduce(bin, batch_size, {_, size}, acc, fun) do
-    batch_bit_size = batch_size * size
-    batches = bit_size(bin) |> div(batch_bit_size)
-
-    for i <- 0..(batches - 1), reduce: acc do
-      acc ->
-        batch = bitstring_part(bin, i * batch_bit_size, batch_bit_size)
-        fun.(batch, acc)
-    end
-  end
-
   ## Conversion helpers
 
   defp bitstring_part(bitstring, skip, size) do
diff --git a/nx/lib/nx/binary_backend/matrix.ex b/nx/lib/nx/binary_backend/matrix.ex
@@ -1,8 +1,6 @@
 defmodule Nx.BinaryBackend.Matrix do
   @moduledoc false
   use Complex.Kernel
-  import Kernel, except: [abs: 1]
-  import Complex, only: [abs: 1]
 
   import Nx.Shared
 
@@ -116,107 +114,6 @@ defmodule Nx.BinaryBackend.Matrix do
 
   defp do_ts([], [], _idx, acc), do: acc
 
-  def lu(input_data, input_type, {n, n} = input_shape, p_type, l_type, u_type, opts) do
-    a = binary_to_matrix(input_data, input_type, input_shape)
-    eps = opts[:eps]
-
-    {p, a_prime} = lu_validate_and_pivot(a, n)
-
-    # We'll work with linear indices because of the way each matrix
-    # needs to be updated/accessed
-    zeros_matrix = List.duplicate(List.duplicate(0, n), n)
-
-    {l, u} =
-      for j <- 0..(n - 1), reduce: {zeros_matrix, zeros_matrix} do
-        {l, u} ->
-          l = replace_matrix_element(l, j, j, 1.0)
-
-          u =
-            for i <- 0..j, reduce: u do
-              u ->
-                u_slice = slice_matrix(u, [0, j], [i, 1])
-                l_slice = slice_matrix(l, [i, 0], [1, i])
-                sum = dot_matrix(u_slice, l_slice)
-                [a_ij] = get_matrix_elements(a_prime, [[i, j]])
-
-                value = a_ij - sum
-
-                if abs(value) < eps do
-                  replace_matrix_element(u, i, j, 0)
-                else
-                  replace_matrix_element(u, i, j, value)
-                end
-            end
-
-          l =
-            for i <- j..(n - 1), i != j, reduce: l do
-              l ->
-                u_slice = slice_matrix(u, [0, j], [i, 1])
-                l_slice = slice_matrix(l, [i, 0], [1, i])
-                sum = dot_matrix(u_slice, l_slice)
-
-                [a_ij] = get_matrix_elements(a_prime, [[i, j]])
-                [u_jj] = get_matrix_elements(u, [[j, j]])
-
-                value =
-                  cond do
-                    u_jj != 0 ->
-                      (a_ij - sum) / u_jj
-
-                    a_ij >= sum ->
-                      :infinity
-
-                    true ->
-                      :neg_infinity
-                  end
-
-                if abs(value) < eps do
-                  replace_matrix_element(l, i, j, 0)
-                else
-                  replace_matrix_element(l, i, j, value)
-                end
-            end
-
-          {l, u}
-      end
-
-    # Transpose because since P is orthogonal, inv(P) = tranpose(P)
-    # and we want to return P such that A = P.L.U
-    {p |> transpose_matrix() |> matrix_to_binary(p_type),
-     l |> approximate_zeros(eps) |> matrix_to_binary(l_type),
-     u |> approximate_zeros(eps) |> matrix_to_binary(u_type)}
-  end
-
-  defp lu_validate_and_pivot(a, n) do
-    # pivots a tensor so that the biggest elements of each column lie on the diagonal.
-    # if any of the diagonal elements ends up being 0, raises an ArgumentError
-
-    identity =
-      Enum.map(0..(n - 1), fn i -> Enum.map(0..(n - 1), fn j -> if i == j, do: 1, else: 0 end) end)
-
-    # For each row, find the max value by column.
-    # If its index (max_idx) is not in the diagonal (i.e. j != max_idx)
-    # we need to swap rows j and max_idx in both the permutation matrix
-    # and in the a matrix.
-    Enum.reduce(0..(n - 2), {identity, a}, fn j, {p, a} ->
-      [max_idx | _] =
-        Enum.sort_by(j..(n - 1), fn i -> a |> Enum.at(i) |> Enum.at(j) |> abs() end, &>=/2)
-
-      if max_idx == j do
-        {p, a}
-      else
-        p_row = Enum.at(p, max_idx)
-        p_j = Enum.at(p, j)
-        p = p |> List.replace_at(max_idx, p_j) |> List.replace_at(j, p_row)
-
-        a_row = Enum.at(a, max_idx)
-        a_j = Enum.at(a, j)
-        a = a |> List.replace_at(max_idx, a_j) |> List.replace_at(j, a_row)
-        {p, a}
-      end
-    end)
-  end
-
   ## Matrix (2-D array) manipulation
 
   defp dot_matrix([], _), do: 0
@@ -279,41 +176,9 @@ defmodule Nx.BinaryBackend.Matrix do
     |> Enum.chunk_every(num_cols)
   end
 
-  defp slice_matrix(a, [row_start, col_start], [row_length, col_length]) do
-    a
-    |> Enum.slice(row_start, row_length)
-    |> Enum.flat_map(&Enum.slice(&1, col_start, col_length))
-  end
-
   defp get_matrix_column(m, col) do
     Enum.map(m, fn row ->
       Enum.at(row, col)
     end)
   end
-
-  defp get_matrix_elements(m, row_col_pairs) do
-    Enum.map(row_col_pairs, fn [row, col] ->
-      m
-      |> Enum.at(row, [])
-      |> Enum.at(col)
-      |> case do
-        nil -> raise ArgumentError, "invalid index [#{row},#{col}] for matrix"
-        item -> item
-      end
-    end)
-  end
-
-  defp replace_matrix_element(m, row, col, value) do
-    updated = m |> Enum.at(row) |> List.replace_at(col, value)
-    List.replace_at(m, row, updated)
-  end
-
-  defp approximate_zeros(matrix, tol) do
-    do_round = fn x -> if Complex.abs(x) < tol, do: 0 * x, else: x end
-
-    Enum.map(matrix, fn
-      row when is_list(row) -> Enum.map(row, do_round)
-      e -> do_round.(e)
-    end)
-  end
 end
diff --git a/nx/lib/nx/defn/expr.ex b/nx/lib/nx/defn/expr.ex
@@ -1188,14 +1188,6 @@ defmodule Nx.Defn.Expr do
     expr(out, context, :triangular_solve, [a, b, opts])
   end
 
-  @impl true
-  def lu({p, l, u}, tensor, opts) do
-    tensor = to_expr(tensor)
-    context = tensor.data.context
-    out = %T{names: [], shape: {}, type: {:tuple, 3}}
-    tuple(expr(out, context, :lu, [{p, l, u}, tensor, opts]), [p, l, u])
-  end
-
   @impl true
   def sort(out, tensor, opts) do
     %{data: %{context: context}} = tensor = to_expr(tensor)
diff --git a/nx/lib/nx/defn/grad.ex b/nx/lib/nx/defn/grad.ex
@@ -716,29 +716,6 @@ defmodule Nx.Defn.Grad do
     pairs
   end
 
-  defp grad(:lu, [{p, l, u}, input, _opts], ans, [_dp, dl, du]) do
-    # Definition taken from: https://sethaxen.com/blog/2021/02/differentiating-the-lu-decomposition/
-    # Where dF = tril_strict(L^* . dL) + triu(dU . U^*)
-    # dA = P^t . (L^*)^-1 . dF . (U^*)^-1
-
-    {p, l, u} = Nx.Defn.Expr.tuple(ans, [p, l, u])
-
-    u_h = Nx.LinAlg.adjoint(u)
-    l_h = Nx.LinAlg.adjoint(l)
-    p_t = Nx.LinAlg.adjoint(p)
-
-    lh_dl = Nx.dot(l_h, dl)
-    du_uh = Nx.dot(du, u_h)
-
-    lt_inv = Nx.LinAlg.invert(l_h)
-    ut_inv = Nx.LinAlg.invert(u_h)
-
-    df = lh_dl |> Nx.tril(k: -1) |> Nx.add(Nx.triu(du_uh))
-    da = p_t |> Nx.dot(lt_inv) |> Nx.dot(df) |> Nx.dot(ut_inv)
-
-    [{input, da}]
-  end
-
   defp grad(:sort, [t, opts], _ans, g) do
     idx = Nx.argsort(t, opts)
     take_along_opts = Keyword.take(opts, [:axis])
diff --git a/nx/lib/nx/lin_alg.ex b/nx/lib/nx/lin_alg.ex
@@ -1523,15 +1523,15 @@ defmodule Nx.LinAlg do
         [
           [1.0, 0.0, 0.0],
           [0.5714285969734192, 1.0, 0.0],
-          [0.1428571492433548, 2.0, 1.0]
+          [0.1428571492433548, 2.0000009536743164, 1.0]
         ]
       >
       iex> u
       #Nx.Tensor<
         f32[3][3]
         [
           [7.0, 8.0, 9.0],
-          [0.0, 0.4285714328289032, 0.8571428656578064],
+          [0.0, 0.4285712242126465, 0.857142448425293],
           [0.0, 0.0, 0.0]
         ]
       >
@@ -1607,7 +1607,7 @@ defmodule Nx.LinAlg do
           [
             [1.0, 0.0, 0.0],
             [0.6666666865348816, 1.0, 0.0],
-            [0.3333333432674408, 2.0, 1.0]
+            [0.3333333432674408, 1.9999992847442627, 1.0]
           ],
           [
             [1.0, 0.0, 0.0],
@@ -1622,8 +1622,8 @@ defmodule Nx.LinAlg do
         [
           [
             [9.0, 8.0, 7.0],
-            [0.0, -0.3333333432674408, -0.6666666865348816],
-            [0.0, 0.0, 0.0]
+            [0.0, -0.33333349227905273, -0.6666669845581055],
+            [0.0, 0.0, 5.960464477539063e-8]
           ],
           [
             [-1.0, 0.0, -1.0],
@@ -1638,7 +1638,7 @@ defmodule Nx.LinAlg do
         [
           [
             [9.0, 8.0, 7.0],
-            [6.0, 5.0, 4.0],
+            [6.0, 5.0, 3.999999761581421],
             [3.0, 2.0, 1.0]
           ],
           [
@@ -1676,7 +1676,7 @@ defmodule Nx.LinAlg do
           [
             [1.0, 0.0, 0.0],
             [0.6666666865348816, 1.0, 0.0],
-            [0.3333333432674408, 2.0, 1.0]
+            [0.3333333432674408, 1.9999992847442627, 1.0]
           ],
           [
             [1.0, 0.0, 0.0],
@@ -1692,8 +1692,8 @@ defmodule Nx.LinAlg do
         [
           [
             [9.0, 8.0, 7.0],
-            [0.0, -0.3333333432674408, -0.6666666865348816],
-            [0.0, 0.0, 0.0]
+            [0.0, -0.33333349227905273, -0.6666669845581055],
+            [0.0, 0.0, 5.960464477539063e-8]
           ],
           [
             [-1.0, 0.0, -1.0],
@@ -1709,22 +1709,22 @@ defmodule Nx.LinAlg do
       ** (ArgumentError) tensor must be a square matrix or a batch of square matrices, got shape: {3, 4}
   """
   def lu(tensor, opts \\ []) do
-    apply_vectorized(tensor, fn tensor ->
-      opts = keyword!(opts, eps: 1.0e-10)
-      %T{type: type, shape: shape} = tensor
-
-      output_type = Nx.Type.to_floating(type)
-      {p_shape, l_shape, u_shape} = Nx.Shape.lu(shape)
-      names = List.duplicate(nil, tuple_size(shape))
-
-      impl!(tensor).lu(
-        {%{tensor | type: type, shape: p_shape, names: names},
-         %{tensor | type: output_type, shape: l_shape, names: names},
-         %{tensor | type: output_type, shape: u_shape, names: names}},
-        tensor,
-        opts
-      )
-    end)
+    opts = keyword!(opts, eps: 1.0e-10)
+    %T{vectorized_axes: vectorized_axes} = tensor = Nx.to_tensor(tensor)
+    %T{type: type, shape: shape} = tensor = Nx.devectorize(tensor)
+
+    output_type = Nx.Type.to_floating(type)
+    {p_shape, l_shape, u_shape} = Nx.Shape.lu(shape)
+    names = List.duplicate(nil, tuple_size(shape))
+
+    output =
+      {%{tensor | type: type, shape: p_shape, names: names},
+       %{tensor | type: output_type, shape: l_shape, names: names},
+       %{tensor | type: output_type, shape: u_shape, names: names}}
+
+    :lu
+    |> Nx.Shared.optional([tensor, opts], output, &Nx.LinAlg.LU.lu/2)
+    |> Nx.vectorize(vectorized_axes)
   end
 
   @doc """
@@ -1892,7 +1892,7 @@ defmodule Nx.LinAlg do
       ...> ]))
       #Nx.Tensor<
         f32
-        48.0
+        47.999996185302734
       >
 
       iex> Nx.LinAlg.determinant(Nx.tensor([
@@ -1904,7 +1904,7 @@ defmodule Nx.LinAlg do
       ...> ]))
       #Nx.Tensor<
         f32
-        48.0
+        47.999996185302734
       >
 
       iex> Nx.LinAlg.determinant(Nx.tensor([
diff --git a/nx/lib/nx/lin_alg/lu.ex b/nx/lib/nx/lin_alg/lu.ex
diff --git a/nx/lib/nx/lin_alg/qr.ex b/nx/lib/nx/lin_alg/qr.ex
diff --git a/nx/test/nx/lin_alg_test.exs b/nx/test/nx/lin_alg_test.exs