mmp2 · jrsassen · Jan 9, 2017 · Jan 9, 2017
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ steps and indices to allow for fast re-computation with new parameters.
 
 Package documentation can be found at http://mmp2.github.io/megaman/
 
-If you use our software please cite the following JMLR paper: 
+If you use our software please cite the following JMLR paper:
 
 McQueen, Meila, VanderPlas, & Zhang, "Megaman: Scalable Manifold Learning in Python",
 Journal of Machine Learning Research, Vol 17 no. 14, 2016.
@@ -64,10 +64,11 @@ To install megaman from source requires the following:
 Optional requirements include
 
 - [pyamg](http://pyamg.org/), which allows for faster decompositions of large matrices
+- [pysamg](http://scai.fraunhofer.de/samg/), which allows for even faster decompositions of large matrices. This needs and is included in the commercial software Fraunhofer SAMG.  For licensing (including test or educational licenses) contact [email protected]
 - [pyflann](http://www.cs.ubc.ca/research/flann/) which offers another method of computing distance matrices (this is bundled with the FLANN source code)
 - [nose](https://nose.readthedocs.org/) for running the unit tests
 
-These requirements can be installed on Linux and MacOSX using the following conda command:
+These requirements(except for SAMG) can be installed on Linux and MacOSX using the following conda command:
 
 ```
 $ conda install --channel=conda-forge pip nose coverage gcc cython numpy scipy scikit-learn pyflann pyamg
@@ -91,7 +92,7 @@ to run the unit tests. ``megaman`` is tested on Python versions 2.7, 3.4, and 3.
 - [Zhongyue Zhang](https://github.com/Jerryzcn)
 - [Jake VanderPlas](http://www.vanderplas.com)
 
-## Other Contributors 
+## Other Contributors
 
 - Xiao Wang: lazy rmetric, Nystrom Extension
 

diff --git a/doc/installation.rst b/doc/installation.rst
@@ -30,10 +30,11 @@ To install ``megaman`` from source requires the following:
 Optional requirements include:
 
 - pyamg_, which provides fast decompositions of large sparse matrices
+- pysamg_, which  provides fast decompositions of large sparse matrices. This needs and is included in the commercial software Fraunhofer SAMG.
 - pyflann_, which offers an alternative FLANN interface for computing distance matrices (this is bundled with the FLANN source code)
 - nose_ for running the unit tests
 
-These requirements can be installed on Linux and MacOSX using the following conda command::
+These requirements (except SAMG) can be installed on Linux and MacOSX using the following conda command::
 
     $ conda install --channel=jakevdp pip nose coverage gcc cython numpy scipy scikit-learn pyflann pyamg
 
@@ -60,6 +61,7 @@ or, outside the source directory once ``megaman`` is installed::
 .. _scikit-learn: http://scikit-learn.org
 .. _FLANN: http://www.cs.ubc.ca/research/flann/
 .. _pyamg: http://pyamg.org/
+.. _pysamg: http://scai.fraunhofer.de/samg/
 .. _pyflann: http://www.cs.ubc.ca/research/flann/
 .. _nose: https://nose.readthedocs.org/
 .. _cython: http://cython.org/
diff --git a/megaman/embedding/isomap.py b/megaman/embedding/isomap.py
@@ -34,7 +34,7 @@ def isomap(geom, n_components=8, eigen_solver='auto',
     geom : a Geometry object from megaman.geometry.geometry
     n_components : integer, optional
         The dimension of the projection subspace.
-    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
+    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
         'auto' :
             algorithm will attempt to choose the best method for input data
         'dense' :
@@ -52,9 +52,17 @@ def isomap(geom, n_components=8, eigen_solver='auto',
         'amg' :
             AMG requires pyamg to be installed. It can be faster on very large,
             sparse problems, but may also lead to instabilities.
+        'samg' :
+            Algebraic Multigrid solver from Fraunhofer SCAI (requires
+            ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+            significantly faster on very large, sparse problems. Note that SAMG
+            is a commercial product and one needs a license to use it. For
+            licensing (including test or educational licenses)
+            contact [email protected]
     random_state : int seed, RandomState instance, or None (default)
         A pseudo random number generator used for the initialization of the
-        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
+        lobpcg eigen vectors decomposition when eigen_solver == 'amg' or
+        eigen_solver == 'samg'.
         By default, arpack is used.
     path_method : string, method for computing graph shortest path. One of :
         'auto', 'D', 'FW', 'BF', 'J'. See scipy.sparse.csgraph.shortest_path
@@ -125,13 +133,12 @@ class Isomap(BaseEmbedding):
         specification of geometry parameters: keys are
         ["adjacency_method", "adjacency_kwds", "affinity_method",
          "affinity_kwds", "laplacian_method", "laplacian_kwds"]
-    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
+    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
         'auto' :
             algorithm will attempt to choose the best method for input data
         'dense' :
-            use standard dense matrix operations for the eigenvalue
-            decomposition. Uses a dense data array, and thus should be avoided
-            for large problems.
+            use standard dense matrix operations for the eigenvalue decomposition.
+            For this method, M must be an array or matrix type.  This method should be avoided for large problems.
         'arpack' :
             use arnoldi iteration in shift-invert mode. For this method,
             M may be a dense matrix, sparse matrix, or general linear operator.
@@ -144,6 +151,13 @@ class Isomap(BaseEmbedding):
         'amg' :
             AMG requires pyamg to be installed. It can be faster on very large,
             sparse problems, but may also lead to instabilities.
+        'samg' :
+            Algebraic Multigrid solver from Fraunhofer SCAI (requires
+            ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+            significantly faster on very large, sparse problems. Note that SAMG
+            is a commercial product and one needs a license to use it. For
+            licensing (including test or educational licenses)
+            contact [email protected]
     random_state : numpy.RandomState or int, optional
         The generator or seed used to determine the starting vector for arpack
         iterations.  Defaults to numpy.random.RandomState
@@ -192,17 +206,38 @@ def fit(self, X, y=None, input_type='data'):
             Interpret X as precomputed distance or adjacency graph
             computed from samples.
 
-        eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
-            The eigenvalue decomposition strategy to use. AMG requires pyamg
-            to be installed. It can be faster on very large, sparse problems,
-            but may also lead to instabilities.
+        eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
+            'auto' :
+                algorithm will attempt to choose the best method for input data
+            'dense' :
+                use standard dense matrix operations for the eigenvalue decomposition.
+                For this method, M must be an array or matrix type.  This method should be avoided for large problems.
+            'arpack' :
+                use arnoldi iteration in shift-invert mode. For this method,
+                M may be a dense matrix, sparse matrix, or general linear operator.
+                Warning: ARPACK can be unstable for some problems.  It is best to
+                try several random seeds in order to check results.
+            'lobpcg' :
+                Locally Optimal Block Preconditioned Conjugate Gradient Method.
+                A preconditioned eigensolver for large symmetric positive definite
+                (SPD) generalized eigenproblems.
+            'amg' :
+                AMG requires pyamg to be installed. It can be faster on very large,
+                sparse problems, but may also lead to instabilities.
+            'samg' :
+                Algebraic Multigrid solver from Fraunhofer SCAI (requires
+                ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+                significantly faster on very large, sparse problems. Note that SAMG
+                is a commercial product and one needs a license to use it. For
+                licensing (including test or educational licenses)
+                contact [email protected]
 
         Returns
         -------
         self : object
             Returns the instance itself.
         """
-        
+
         X = self._validate_input(X, input_type)
         self.fit_geometry(X, input_type)
 

diff --git a/megaman/embedding/locally_linear.py b/megaman/embedding/locally_linear.py
@@ -70,7 +70,7 @@ def locally_linear_embedding(geom, n_components, reg=1e-3,
     reg : float
         regularization constant, multiplies the trace of the local covariance
         matrix of the distances.
-    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
+    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
         'auto' :
             algorithm will attempt to choose the best method for input data
         'dense' :
@@ -88,6 +88,13 @@ def locally_linear_embedding(geom, n_components, reg=1e-3,
         'amg' :
             AMG requires pyamg to be installed. It can be faster on very large,
             sparse problems, but may also lead to instabilities.
+        'samg' :
+            Algebraic Multigrid solver from Fraunhofer SCAI (requires
+            ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+            significantly faster on very large, sparse problems. Note that SAMG
+            is a commercial product and one needs a license to use it. For
+            licensing (including test or educational licenses)
+            contact [email protected]
     random_state : numpy.RandomState or int, optional
         The generator or seed used to determine the starting vector for arpack
         iterations.  Defaults to numpy.random.
@@ -143,13 +150,12 @@ class LocallyLinearEmbedding(BaseEmbedding):
         specification of geometry parameters: keys are
         ["adjacency_method", "adjacency_kwds", "affinity_method",
          "affinity_kwds", "laplacian_method", "laplacian_kwds"]
-    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
+    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
         'auto' :
             algorithm will attempt to choose the best method for input data
         'dense' :
-            use standard dense matrix operations for the eigenvalue
-            decomposition. Uses a dense data array, and thus should be avoided
-            for large problems.
+            use standard dense matrix operations for the eigenvalue decomposition.
+            For this method, M must be an array or matrix type.  This method should be avoided for large problems.
         'arpack' :
             use arnoldi iteration in shift-invert mode. For this method,
             M may be a dense matrix, sparse matrix, or general linear operator.
@@ -162,6 +168,13 @@ class LocallyLinearEmbedding(BaseEmbedding):
         'amg' :
             AMG requires pyamg to be installed. It can be faster on very large,
             sparse problems, but may also lead to instabilities.
+        'samg' :
+            Algebraic Multigrid solver from Fraunhofer SCAI (requires
+            ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+            significantly faster on very large, sparse problems. Note that SAMG
+            is a commercial product and one needs a license to use it. For
+            licensing (including test or educational licenses)
+            contact [email protected]
     random_state : numpy.RandomState or int, optional
         The generator or seed used to determine the starting vector for arpack
         iterations.  Defaults to numpy.random.RandomState

diff --git a/megaman/embedding/ltsa.py b/megaman/embedding/ltsa.py
@@ -30,7 +30,7 @@ def ltsa(geom, n_components, eigen_solver='auto',
     geom : a Geometry object from megaman.geometry.geometry
     n_components : integer
         number of coordinates for the manifold.
-    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
+    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
         'auto' :
             algorithm will attempt to choose the best method for input data
         'dense' :
@@ -48,6 +48,13 @@ def ltsa(geom, n_components, eigen_solver='auto',
         'amg' :
             AMG requires pyamg to be installed. It can be faster on very large,
             sparse problems, but may also lead to instabilities.
+        'samg' :
+            Algebraic Multigrid solver from Fraunhofer SCAI (requires
+            ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+            significantly faster on very large, sparse problems. Note that SAMG
+            is a commercial product and one needs a license to use it. For
+            licensing (including test or educational licenses)
+            contact [email protected]
     random_state : numpy.RandomState or int, optional
         The generator or seed used to determine the starting vector for arpack
         iterations.  Defaults to numpy.random.
@@ -125,13 +132,12 @@ class LTSA(BaseEmbedding):
         specification of geometry parameters: keys are
         ["adjacency_method", "adjacency_kwds", "affinity_method",
          "affinity_kwds", "laplacian_method", "laplacian_kwds"]
-    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
+    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', 'amg' or 'samg'}
         'auto' :
             algorithm will attempt to choose the best method for input data
         'dense' :
-            use standard dense matrix operations for the eigenvalue
-            decomposition. Uses a dense data array, and thus should be avoided
-            for large problems.
+            use standard dense matrix operations for the eigenvalue decomposition.
+            For this method, M must be an array or matrix type.  This method should be avoided for large problems.
         'arpack' :
             use arnoldi iteration in shift-invert mode. For this method,
             M may be a dense matrix, sparse matrix, or general linear operator.
@@ -144,6 +150,13 @@ class LTSA(BaseEmbedding):
         'amg' :
             AMG requires pyamg to be installed. It can be faster on very large,
             sparse problems, but may also lead to instabilities.
+        'samg' :
+            Algebraic Multigrid solver from Fraunhofer SCAI (requires
+            ``Fraunhofer SAMG`` and ``pysamg`` to be installed). It can be
+            significantly faster on very large, sparse problems. Note that SAMG
+            is a commercial product and one needs a license to use it. For
+            licensing (including test or educational licenses)
+            contact [email protected]
     random_state : numpy.RandomState or int, optional
         The generator or seed used to determine the starting vector for arpack
         iterations.  Defaults to numpy.random.RandomState