@@ -15,29 +15,28 @@ x = Float32.(rand(N))
1515@time solve_residuals!(deepcopy(x), fes; double_precision = false )
1616# 0.654833 seconds (1.99 k allocations: 390.841 MiB, 3.71% gc time)
1717@time solve_residuals!(deepcopy(x), fes; double_precision = false , method = :Metal)
18- # 0.298326 seconds (129.08 k allocations: 79.208 MiB)
18+ # 1.335206 seconds (3.28 M allocations: 402.660 MiB, 1.80% gc time, 123.64% compilation time: <1% of which was recompilation )
1919@time solve_residuals!([x x x x], fes)
20- # 1.604061 seconds (1.25 M allocations: 416.364 MiB, 4.21 % gc time, 30.57 % compilation time)
20+ # 1.886616 seconds (3.60 M allocations: 731.777 MiB, 1.34 % gc time, 122.12 % compilation time: 5% of which was recompilation )
2121@time solve_residuals!([x x x x], fes; method = :Metal)
22- # 0.790909 seconds (531 .78 k allocations: 204.363 MiB, 3.19% compilation time)
22+ # 1.421205 seconds (2 .78 M allocations: 497.846 MiB, 1.64% gc time, 110.87% compilation time: <1% of which was recompilation )
2323
2424
2525
2626# More complicated problem
27- N = 800000 # number of observations
28- M = 400000 # number of workers
29- O = 50000 # number of firms
27+ N = 8000000 # number of observations
28+ M = 4000000 # number of workers
29+ O = 500000 # number of firms
3030Random. seed!(1234 )
3131pid = rand(1 : M, N)
3232fid = [rand(max(1 , div(x, 8 )- 10 ): min(O, div(x, 8 )+ 10 )) for x in pid]
3333x = rand(N)
3434fes = [FixedEffect(pid), FixedEffect(fid)]
3535
3636
37- @time solve_residuals!([x x x x], fes; double_precision = false )
38- # 8.294446 seconds (225.13 k allocations: 67.777 MiB, 0.11% gc time)
39-
40- @time solve_residuals!([x x x x], fes; double_precision = false , method = :Metal)
41- # 1.605953 seconds (3.25 M allocations: 103.342 MiB, 1.82% gc time)
37+ @time solve_residuals!([x x x x], fes; double_precision = false , maxiter = 100 )
38+ # 36.554763 seconds (98.71 M allocations: 5.253 GiB, 1.11% gc time, 114.45% compilation time: 7% of which was recompilation)
39+ @time solve_residuals!([x x x x], fes; double_precision = false , method = :Metal, maxiter = 100 )
40+ # 20.652590 seconds (79.33 M allocations: 4.114 GiB, 0.75% gc time, 162.10% compilation time: <1% of which was recompilation)
4241
4342
0 commit comments