Skip to content

Tensor to NumpyArray and back requires copy #8

@arkanoid87

Description

@arkanoid87

I've been doing some memory profiling with valgrind

It seems that Tensor to NumpyArray and back requires a copy for every kind of conversion, and this makes NumpyArray costly to use.

Is there a solution for this?

config.nims

--gc: "arc"
--opt: "speed"
--d: "danger"
--d: "useMalloc"

the results under each line is a different compilation and run.
The result shows that an alloc the size of the original Tensor object is required on each transform from Tensor to NumpyArray and vive-versa

import std/[random, hashes]
import arraymancer
import nimpy
import scinim/numpyarrays

var rng {.compileTime.} = initRand(hash(CompileDate & CompileTime) and 0x28037091)


proc rndStr(len: int): string =
  result = newStringOfCap(len)
  for _ in 0..<len:
    result.add char(rng.rand(int('A') .. int('z')))


type Randomizable = float|string|int
proc makeTensor[T: Randomizable](shape: varargs[int]): Tensor[T] =
  result = newTensor[T](shape)
  for i in 0..<shape[0]:
    for j in 0..<shape[1]:
      result[i, j] = block:
        when T is float: rng.rand(0.0 .. 10.0)
        elif T is string: rndStr(5)
        elif T is int: rng.rand(0 .. 10)

proc sequentialAlloc =
  var randomT = makeTensor[int](1000, 1000)
    # total heap usage: 2 allocs, 2 frees, 8,000,095 bytes allocated

  var randomNd = randomT.toNdArray
    # +53,195,577 bytes (python initialization overhead)
    # total heap usage: 17,523 allocs, 13,443 frees, 61,195,672 bytes allocated

  var randomT2 = randomNd.toTensor
    # +16,000,190 bytes
    # total heap usage: 17,525 allocs, 13,445 frees, 69,195,767 bytes allocated

  var randomNd2 = randomT2.toNdArray
    # +8,000,518 bytes
    # total heap usage: 17,539 allocs, 13,458 frees, 77,196,285 bytes allocated

  var randomT3 = randomNd2.toTensor
    # +8,000,095 bytes
    # total heap usage: 17,541 allocs, 13,460 frees, 85,196,380 bytes allocated

  var randomNd3 = randomT3.toNdArray
    # +8,000,518 bytes
    # total heap usage: 17,555 allocs, 13,473 frees, 93,196,898 bytes allocated

  var randomT4 = randomNd3.toTensor
    # +8,000,095 bytes
    #  total heap usage: 17,557 allocs, 13,475 frees, 101,196,993 bytes allocated

  var randomNd4 = randomT4.toNdArray
    # + 8,000,518 bytes
    # total heap usage: 17,571 allocs, 13,488 frees, 109,197,511 bytes allocated

  #[
  =541685== HEAP SUMMARY:
  ==541685==     in use at exit: 5,576,332 bytes in 4,083 blocks
  ==541685==   total heap usage: 17,571 allocs, 13,488 frees, 109,197,511 bytes allocated
  ==541685==
  ==541685== LEAK SUMMARY:
  ==541685==    definitely lost: 64 bytes in 1 blocks
  ==541685==    indirectly lost: 0 bytes in 0 blocks
  ==541685==      possibly lost: 185,881 bytes in 153 blocks
  ==541685==    still reachable: 5,390,387 bytes in 3,929 blocks
  ==541685==         suppressed: 0 bytes in 0 blocks
  ==541685== Rerun with --leak-check=full to see details of leaked memory
  ==541685==
  ==541685== Use --track-origins=yes to see where uninitialised values come from
  ==541685== For lists of detected and suppressed errors, rerun with: -s
  ==541685== ERROR SUMMARY: 9641 errors from 152 contexts (suppressed: 0 from 0)
  ]#



proc sequentialRewriteAlloc =
  var randomT = makeTensor[int](1000, 1000)
    # total heap usage: 2 allocs, 2 frees, 8,000,095 bytes allocated

  var randomNd = randomT.toNdArray
    # +53,195,577 bytes (python initialization overhead)
    # total heap usage: 17,523 allocs, 13,443 frees, 61,195,672 bytes allocated

  randomT = randomNd.toTensor
    # +16,000,190 bytes
    # total heap usage: 17,525 allocs, 13,445 frees, 69,195,767 bytes allocated

  randomNd = randomT.toNdArray
    # +8,000,518 bytes
    # total heap usage: 17,539 allocs, 13,458 frees, 77,196,285 bytes allocated

  randomT = randomNd.toTensor
    # +8,000,095 bytes
    # total heap usage: 17,541 allocs, 13,460 frees, 85,196,380 bytes allocated

  randomNd = randomT.toNdArray
    # +8,000,518 bytes
    # total heap usage: 17,555 allocs, 13,473 frees, 93,196,898 bytes allocated

  randomT = randomNd.toTensor
    # +8,000,095 bytes
    #  total heap usage: 17,557 allocs, 13,475 frees, 101,196,993 bytes allocated

  randomNd = randomT.toNdArray
    # + 8,000,518 bytes
    # total heap usage: 17,571 allocs, 13,488 frees, 109,197,511 bytes allocated

    #[
    ==544560== HEAP SUMMARY:
    ==544560==     in use at exit: 5,576,268 bytes in 4,081 blocks
    ==544560==   total heap usage: 17,569 allocs, 13,488 frees, 109,197,447 bytes allocated
    ==544560==
    ==544560== LEAK SUMMARY:
    ==544560==    definitely lost: 64 bytes in 1 blocks
    ==544560==    indirectly lost: 0 bytes in 0 blocks
    ==544560==      possibly lost: 185,881 bytes in 153 blocks
    ==544560==    still reachable: 5,390,323 bytes in 3,927 blocks
    ==544560==         suppressed: 0 bytes in 0 blocks
    ==544560== Rerun with --leak-check=full to see details of leaked memory
    ==544560==
    ==544560== Use --track-origins=yes to see where uninitialised values come from
    ==544560== For lists of detected and suppressed errors, rerun with: -s
    ==544560== ERROR SUMMARY: 9641 errors from 152 contexts (suppressed: 0 from 0)
    ]#



proc chainAlloc() =
  # discard makeTensor[int](1000, 1000)
  # total heap usage: 2 allocs, 2 frees, 8,000,095 bytes allocated

  # discard makeTensor[int](1000, 1000).toNdArray
  # total heap usage: 17,523 allocs, 13,443 frees, 61,195,672 bytes allocated

  # discard makeTensor[int](1000, 1000).toNdArray.toTensor
  # total heap usage: 17,525 allocs, 13,445 frees, 69,195,767 bytes allocated

  # discard makeTensor[int](1000, 1000).toNdArray.toTensor.toNdArray
  # total heap usage: 17,539 allocs, 13,458 frees, 77,196,285 bytes allocated

  # discard makeTensor[int](1000, 1000).toNdArray.toTensor.toNdArray.toTensor
  # total heap usage: 17,541 allocs, 13,460 frees, 85,196,380 bytes allocated

  # discard makeTensor[int](1000, 1000).toNdArray.toTensor.toNdArray.toTensor.toNdArray
  # total heap usage: 17,555 allocs, 13,473 frees, 93,196,898 bytes allocated

  # discard makeTensor[int](1000, 1000).toNdArray.toTensor.toNdArray.toTensor.toNdArray.toTensor
  #  total heap usage: 17,557 allocs, 13,475 frees, 101,196,993 bytes allocated

  discard makeTensor[int](1000, 1000).toNdArray.toTensor.toNdArray.toTensor.toNdArray.toTensor.toNdArray
  # total heap usage: 17,571 allocs, 13,488 frees, 109,197,511 bytes allocated

  #[
  ==546925== HEAP SUMMARY:
  ==546925==     in use at exit: 5,576,332 bytes in 4,083 blocks
  ==546925==   total heap usage: 17,571 allocs, 13,488 frees, 109,197,511 bytes allocated
  ==546925== 
  ==546925== LEAK SUMMARY:
  ==546925==    definitely lost: 64 bytes in 1 blocks
  ==546925==    indirectly lost: 0 bytes in 0 blocks
  ==546925==      possibly lost: 185,881 bytes in 153 blocks
  ==546925==    still reachable: 5,390,387 bytes in 3,929 blocks
  ==546925==         suppressed: 0 bytes in 0 blocks
  ==546925== Rerun with --leak-check=full to see details of leaked memory
  ==546925== 
  ==546925== Use --track-origins=yes to see where uninitialised values come from
  ==546925== For lists of detected and suppressed errors, rerun with: -s
  ==546925== ERROR SUMMARY: 9641 errors from 152 contexts (suppressed: 0 from 0)
  ]#

chainAlloc()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions