@@ -865,10 +865,31 @@ function practical_polynomial_composition(f::StandardBasisPolynomial, g::Standar
865
865
end
866
866
867
867
# # issue #519 polynomial multiplication via FFT
868
+ # #
869
+ # # This implements [Cooley-Tukey](https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm) radix-2
868
870
# # cf. http://www.cs.toronto.edu/~denisp/csc373/docs/tutorial3-adv-writeup.pdf
869
- # # Compute recursive_fft
870
- # # assumes length(as) = 2^k for some k
871
- # # ωₙ is exp(-2pi*im/n) or Cyclotomics.E(n), the latter slower but non-lossy
871
+ # #
872
+ # # This is **much slower** than that of FFTW.jl (and more restrictive). However it does allow for exact computation
873
+ # # using `Cyclotomics.jl`, or with `Mods.jl`.
874
+ # # This assumes length(as) = 2^k for some k
875
+ # # ωₙ is an nth root of unity, for example `exp(-2pi*im/n)` (also available with `sincos(2pi/n)`) for floating point
876
+ # # or Cyclotomics.E(n), the latter much slower but non-lossy.
877
+ # #
878
+ # # Should implement NTT https://www.nayuki.io/page/number-theoretic-transform-integer-dft to close #519
879
+
880
+ struct Pad{T} <: AbstractVector{T}
881
+ a:: Vector{T}
882
+ n:: Int
883
+ end
884
+ Base. length (a:: Pad ) = a. n
885
+ Base. size (a:: Pad ) = (a. n,)
886
+ function Base. getindex (a:: Pad , i)
887
+ u = length (a. a)
888
+ i ≤ u && return a. a[i]
889
+ return zero (first (a. a))
890
+ end
891
+
892
+
872
893
function recursive_fft (as, ωₙ = nothing )
873
894
n = length (as)
874
895
N = 2 ^ ceil (Int, log2 (n))
@@ -886,7 +907,7 @@ function inverse_fft(as, ωₙ=nothing)
886
907
recursive_fft (as, conj (ω)) / n
887
908
end
888
909
889
- # note: can write version for big coefficients, but still allocates a bit
910
+ # note: could write version for big coefficients, but still allocates a bit
890
911
function recursive_fft! (ys, as, ωₙ)
891
912
892
913
n = length (as)
@@ -915,22 +936,19 @@ end
915
936
916
937
# This *should* be faster -- (O(nlog(n)), but this version is definitely not so.
917
938
# when `ωₙ = Cyclotomics.E` and T,S are integer, this can be exact
939
+ # using `FFTW.jl` over `Float64` types is much better and is
940
+ # implemented in an extension
918
941
function poly_multiplication_fft (p:: P , q:: Q , ωₙ= nothing ) where {T,P<: StandardBasisPolynomial{T} ,
919
942
S,Q<: StandardBasisPolynomial{S} }
920
943
as, bs = coeffs0 (p), coeffs0 (q)
921
944
n = maximum (length, (as, bs))
922
945
N = 2 ^ ceil (Int, log2 (n))
923
-
924
- as′ = zeros (promote_type (T,S), 2 N)
925
- copy! (view (as′, 1 : length (as)), as)
946
+ as′ = Pad (as, 2 N)
947
+ bs′ = Pad (bs, 2 N)
926
948
927
949
ω = something (ωₙ, n -> exp (- 2im * pi / n))(2 N)
928
950
âs = recursive_fft (as′, ω)
929
-
930
- as′ .= 0
931
- copy! (view (as′, 1 : length (bs)), bs)
932
- b̂s = recursive_fft (as′, ω)
933
-
951
+ b̂s = recursive_fft (bs′, ω)
934
952
âb̂s = âs .* b̂s
935
953
936
954
PP = promote_type (P,Q)
0 commit comments