Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement polar decomposition #1697

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
850bb62
added first drafs regarding PD etc.
Oct 28, 2024
a253ac7
added file for tests
Oct 28, 2024
32d8f32
first draft of condition estimation, split=1 still with bug in solve_…
Oct 29, 2024
13607a8
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
mrfh92 Oct 29, 2024
cd3c007
added condest to linalg.basics, including tests
Oct 31, 2024
84bed5e
Merge branch 'features/1696-Implement_Polar_Decomposition' of github.…
Oct 31, 2024
fd7dbd4
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
mrfh92 Oct 31, 2024
884d179
removed bug
Oct 31, 2024
c051391
test coverage for uncovered line
Oct 31, 2024
ac37f76
Batched QR, update of unit tests is missing so far
Oct 31, 2024
b879471
Merge branch 'main' into features/1707-Batched_QR
Nov 14, 2024
4f67a93
removed old tests that threw errors for batched inputs
Nov 14, 2024
cec049c
final debugging of tests
Nov 14, 2024
4fffb39
added changes to docs
Nov 14, 2024
7d37a40
dummy change for benchmarking run
Nov 15, 2024
6b2a18f
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
Nov 15, 2024
5b4b801
Merge branch 'features/1707-Batched_QR' into features/1696-Implement_…
Nov 15, 2024
d918d2c
started with ZoloPD
Nov 15, 2024
286104e
implementation of ZoloPD + tests
Nov 15, 2024
31db459
added seeds for random in the tests to ensure reproducibility
Nov 18, 2024
efa65b7
removed file "ausprobieren.py"
Nov 18, 2024
a674e2c
final clean up
Nov 18, 2024
8107cca
...
Dec 3, 2024
704d8f0
created branch for QR in case split=0 and non-tall-skinny matrices
Dec 3, 2024
7580552
...
Dec 4, 2024
b087c28
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
mrfh92 Dec 9, 2024
7dbfcbe
Merge branch 'features/1696-Implement_Polar_Decomposition' of github.…
Dec 9, 2024
6430afd
QR for split=0 and non tall-skinny data
Dec 9, 2024
7b26be1
Merge branch 'main' into features/1736-QR_for_non-tall-skinny_matrice…
mrfh92 Dec 9, 2024
3a77070
debugging
Dec 10, 2024
b6bd730
Merge branch 'features/1736-QR_for_non-tall-skinny_matrices_and_split…
Dec 10, 2024
3be5913
Merge branch 'features/1736-QR_for_non-tall-skinny_matrices_and_split…
Dec 10, 2024
159cb18
took new qr for split=0 into account
Dec 10, 2024
505fd4b
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
mrfh92 Dec 10, 2024
d308bdf
added random seed
Dec 10, 2024
241ef2f
further worked on Zolo-PD
Jan 10, 2025
104e5b3
forget to comment in tests again
Jan 13, 2025
5e19ccd
added vmapped single-process qr
Jan 13, 2025
105a48d
implementation of zolo pd
Jan 20, 2025
e04d4ea
bug fix
Jan 20, 2025
d0ac81a
reduce memory for split=0
Jan 21, 2025
f49ffd1
bug fix in communication in QR
Jan 21, 2025
ed843a4
trick to send large data
JuanPedroGHM Jan 22, 2025
45a82ff
Merge branch 'fix/mpi_int_limit_trick' into features/1696-Implement_P…
Jan 27, 2025
b6b1b20
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
mrfh92 Feb 10, 2025
bce2172
added benchmarks for ZoloPD
Feb 14, 2025
6b08970
Merge branch 'main' into features/1696-Implement_Polar_Decomposition
mrfh92 Feb 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions benchmarks/cb/linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ def lanczos(B):
V, T = ht.lanczos(B, m=B.shape[0])


@monitor()
def zolopd_split0(A):
U, H = ht.linalg.pd(A)


@monitor()
def zolopd_split1(A):
U, H = ht.linalg.pd(A)


def run_linalg_benchmarks():
n = 3000
a = ht.random.random((n, n), split=0)
Expand Down Expand Up @@ -74,3 +84,12 @@ def run_linalg_benchmarks():
hierachical_svd_rank(data, 10)
hierachical_svd_tol(data, 1e-2)
del data

n = 1000
A = ht.random.random((n, n), split=0)
zolopd_split0(A)
del A

A = ht.random.random((n, n), split=1)
zolopd_split1(A)
del A
30 changes: 29 additions & 1 deletion heat/core/communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class MPICommunication(Communication):
Handle for the mpi4py Communicator
"""

COUNT_LIMIT = torch.iinfo(torch.int32).max

__mpi_type_mappings = {
torch.bool: MPI.BOOL,
torch.uint8: MPI.UNSIGNED_CHAR,
Expand Down Expand Up @@ -288,7 +290,33 @@ def mpi_type_and_elements_of(

if is_contiguous:
if counts is None:
return mpi_type, elements
if elements > cls.COUNT_LIMIT:
# Uses vector type to get around the MAX_INT limit on certain MPI implementations
# This is at the moment only applied when sending contiguous data, as the construction of data types to get around non-contiguous data naturally aliviates the problem to a certain extent.
# Thanks to: J. R. Hammond, A. Schäfer and R. Latham, "To INT_MAX... and Beyond! Exploring Large-Count Support in MPI," 2014 Workshop on Exascale MPI at Supercomputing Conference, New Orleans, LA, USA, 2014, pp. 1-8, doi: 10.1109/ExaMPI.2014.5. keywords: {Vectors;Standards;Libraries;Optimization;Context;Memory management;Open area test sites},

new_count = elements // cls.COUNT_LIMIT
left_over = elements % cls.COUNT_LIMIT

if new_count > cls.COUNT_LIMIT:
raise ValueError("Tensor is too large")
vector_type = mpi_type.Create_vector(
new_count, cls.COUNT_LIMIT, cls.COUNT_LIMIT
)
if left_over > 0:
left_over_mpi_type = mpi_type.Create_contiguous(left_over).Commit()
_, old_type_extent = mpi_type.Get_extent()
disp = cls.COUNT_LIMIT * new_count * old_type_extent
struct_type = mpi_type.Create_struct(
[1, 1], [0, disp], [vector_type, left_over_mpi_type]
).Commit()
vector_type.Free()
left_over_mpi_type.Free()
return struct_type, 1
else:
return vector_type, 1
else:
return mpi_type, elements
factor = np.prod(obj.shape[1:], dtype=np.int32)
return (
mpi_type,
Expand Down
1 change: 1 addition & 0 deletions heat/core/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .qr import *
from .svdtools import *
from .svd import *
from .pd import *
114 changes: 114 additions & 0 deletions heat/core/linalg/basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@
from .. import statistics
from .. import stride_tricks
from .. import types
from ..random import randn
from .qr import qr
from .solver import solve_triangular

__all__ = [
"condest",
"cross",
"det",
"dot",
Expand All @@ -45,6 +49,116 @@
]


def _estimate_largest_singularvalue(A: DNDarray, algorithm: str = "fro") -> DNDarray:
"""
Computes an upper estimate for the largest singular value of the input 2D DNDarray.

Parameters
----------
A : DNDarray
The matrix, i.e., a 2D DNDarray, for which the largest singular value should be estimated.
algorithm : str
The algorithm to use for the estimation. Currently, only "fro" (default) is implemented.
If "fro" is chosen, the Frobenius norm of the matrix is used as an upper estimate.
"""
if not isinstance(algorithm, str):
raise TypeError(
f"Parameter 'algorithm' needs to be a string, but is {algorithm} with data type {type(algorithm)}."
)
if algorithm == "fro":
return matrix_norm(A, ord="fro").squeeze()
else:
raise NotImplementedError("So far only algorithm='fro' implemented.")


def condest(
A: DNDarray, p: Union[int, str] = None, algorithm: str = "randomized", params: list = None
) -> DNDarray:
"""
Computes a (possibly randomized) upper estimate the l2-condition number of the input 2D DNDarray.

Parameters
----------
A : DNDarray
The matrix, i.e., a 2D DNDarray, for which the condition number shall be estimated.
p : int or str (optional)
The norm to use for the condition number computation. If None, the l2-norm (default, p=2) is used.
So far, only p=2 is implemented.
algorithm : str
The algorithm to use for the estimation. Currently, only "randomized" (default) is implemented.
params : dict (optional)
A list of parameters required for the chosen algorithm; if not provided, default values for the respective algorithm are chosen.
If `algorithm="randomized"` the number of random samples to use can be specified under the key "nsamples"; default is 10.

Notes
----------
The "randomized" algorithm follows the approach described in [1]; note that in the paper actually the condition number w.r.t. the Frobenius norm is estimated.
However, this yields an upper bound for the condition number w.r.t. the l2-norm as well.

References
----------
[1] T. Gudmundsson, C. S. Kenney, and A. J. Laub. Small-Sample Statistical Estimates for Matrix Norms. SIAM Journal on Matrix Analysis and Applications 1995 16:3, 776-792.
"""
if p is None:
p = 2
if p != 2:
raise ValueError(
f"Only the case p=2 (condition number w.r.t. the euclidean norm) is implemented so far, but input was p={p} (type: {type(p)})."
)
if not isinstance(algorithm, str):
raise TypeError(
f"Parameter 'algorithm' needs to be a string, but is {algorithm} with data type {type(algorithm)}."
)
if algorithm == "randomized":
if params is None:
nsamples = 10 # set default value
else:
if not isinstance(params, dict) or "nsamples" not in params:
raise TypeError(
"If not None, 'params' needs to be a dictionary containing the number of samples under the key 'nsamples'."
)
if not isinstance(params["nsamples"], int) or params["nsamples"] <= 0:
raise ValueError(
f"The number of samples needs to be a positive integer, but is {params['nsamples']} with data type {type(params['nsamples'])}."
)
nsamples = params["nsamples"]

m = A.shape[0]
n = A.shape[1]

if n > m:
# the algorithm only works for m >= n, but fortunately, the condition number (w.r.t. l2-norm) is invariant under transposition
return condest(A.T, p=p, algorithm=algorithm, params=params)

_, R = qr(A, mode="r") # only R factor is computed in QR

# random samples from unit sphere
# regarding the split: if A.split == 1, then n is probably large and we should split along an axis of size n; otherwise, both n and nsamples should be small
Q, R_not_used = qr(
randn(
n,
nsamples,
dtype=A.dtype,
split=0 if A.split == 1 else None,
device=A.device,
comm=A.comm,
)
)
del R_not_used

est = (
matrix_norm(R @ Q)
* A.dtype((m / nsamples) ** 0.5, comm=A.comm)
* matrix_norm(solve_triangular(R, Q))
)

return est.squeeze()
else:
raise NotImplementedError(
"So far only algorithm='randomized' is implemented. Please open an issue on GitHub if you would like to suggest implementing another algorithm."
)


def cross(
a: DNDarray, b: DNDarray, axisa: int = -1, axisb: int = -1, axisc: int = -1, axis: int = -1
) -> DNDarray:
Expand Down
Loading
Loading