Skip to content

perf: rewrite normalize_latent_structure() in Rust#269

Open
jolars wants to merge 1 commit intofrederikfabriciusbjerre:mainfrom
jolars:optimize-normalize-latents
Open

perf: rewrite normalize_latent_structure() in Rust#269
jolars wants to merge 1 commit intofrederikfabriciusbjerre:mainfrom
jolars:optimize-normalize-latents

Conversation

@jolars
Copy link
Copy Markdown
Collaborator

@jolars jolars commented Apr 22, 2026

Implement normalize_latent_structure() in Rust instead to improve
performance. This change is stacked on top of #268, which needs to be merged first.

library(caugi)

# Old implementation
normalize_latent_structure_reference_r <- function(cg, latents) {
  latents <- unique(latents)

  if (length(latents) == 0L) {
    return(cg)
  }

  cg <- exogenize(cg, nodes = latents)

  changed <- TRUE
  while (changed) {
    changed <- FALSE
    current_latents <- intersect(latents, nodes(cg)$name)

    if (length(current_latents) == 0L) {
      break
    }

    child_counts <- vapply(
      current_latents,
      function(l) {
        ch <- children(cg, l)
        if (is.null(ch)) 0L else length(ch)
      },
      integer(1)
    )
    to_drop <- current_latents[child_counts <= 1L]

    if (length(to_drop) > 0L) {
      cg <- remove_nodes(cg, name = to_drop)
      changed <- TRUE
      next
    }

    current_latents <- intersect(latents, nodes(cg)$name)
    if (length(current_latents) < 2L) {
      break
    }

    child_sets <- lapply(
      current_latents,
      function(l) {
        ch <- children(cg, l)
        if (is.null(ch)) character(0) else sort(unique(ch))
      }
    )

    drop_one <- NULL
    for (i in seq_len(length(current_latents) - 1L)) {
      for (j in (i + 1L):length(current_latents)) {
        ch_i <- child_sets[[i]]
        ch_j <- child_sets[[j]]

        if (length(ch_i) < length(ch_j) && all(ch_i %in% ch_j)) {
          drop_one <- current_latents[i]
          break
        }
        if (length(ch_j) < length(ch_i) && all(ch_j %in% ch_i)) {
          drop_one <- current_latents[j]
          break
        }
      }
      if (!is.null(drop_one)) {
        break
      }
    }

    if (!is.null(drop_one)) {
      cg <- remove_nodes(cg, name = drop_one)
      changed <- TRUE
    }
  }

  cg
}

bench::press(
  n = c(100, 200),
  p = c(0.5, 0.9),
  {
    p_mod <- 10 * log10(n) / n * p
    cg <- caugi::generate_graph(n = n, p = p_mod, class = "DAG")
    k <- max(2L, as.integer(round(0.1 * n)))
    latents <- sample(caugi::nodes(cg)$name, size = k)

    bench::mark(
      rust = caugi::normalize_latent_structure(cg, latents = latents),
      reference_r = normalize_latent_structure_reference_r(
        cg,
        latents = latents
      )
    )
  }
) |>
  plot()
#> Running with:
#>       n     p
#> 1   100   0.5
#> 2   200   0.5
#> 3   100   0.9
#> 4   200   0.9

Created on 2026-04-22 with reprex v2.1.1

Implement `normalize_latent_structure()` in Rust instead to improve
performance. This change is stacked on top of frederikfabriciusbjerre#268.
@jolars jolars force-pushed the optimize-normalize-latents branch from f840e27 to c876736 Compare April 22, 2026 09:26
@jolars
Copy link
Copy Markdown
Collaborator Author

jolars commented Apr 22, 2026

Okay, this one is unblocked and rebased. Ready for review!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant