WIP calculations

pearsonca · pearsonca · commit 43a1b3888d85 · 2024-11-25T15:51:02.000-05:00
diff --git a/R/vfisher.R b/R/vfisher.R
@@ -1,41 +1,25 @@
 
-support_spans <- function(m, n, k) {
-  return(mapply(
-    function(lo, hi) { lo:hi },
-    lo = pmax(0L, k - n), hi = pmin(k, m),
-    SIMPLIFY = FALSE
-  ))
-}
-
-logdc <- function(m, n, k, support) {
+logdc_calc <- function(m, n, k, support) {
   return(mapply(
     function(support, m, n, k) dhyper(support, m, n, k, log = TRUE),
     support = support, m = m, n = n, k = k, SIMPLIFY = FALSE
   ))
 }
 
 dnhyper <- function(ncp, logdc, support) {
-  return(mapply(
-    function(ncp, logdc, support) {
-      d <- logdc + log(ncp) * support
-      d <- exp(d - max(d))
-      return(d/sum(d))
-    },
-    ncp = ncp, logdc = logdc, support = support, SIMPLIFY = FALSE
-  ))
+    d <- logdc + log(ncp) * support
+    d <- exp(d - max(d))
+    return(d/sum(d))
 }
 
 mnhyper <- function(ncp, lo, hi, logdc, support) {
-  lims <- which(ncp == 0 | is.infinite(ncp))
-  res <- integer(length(lo))
-  res[lims] <- ifelse(ncp[lims] == 0, lo[lims], hi[lims])
-  res[-lims] <- mapply(
-    function(ncp, logdc, support) {
-      sum(support * dnhyper(ncp, logdc, support))
-    },
-    ncp = ncp[-lims], logdc = logdc[-lims], support = support[-lims]
-  )
-  return(res)
+  if (ncp == 0) {
+    return(lo)
+  } else if (is.infinite(ncp)) {
+    return(hi)
+  } else {
+    return(sum(support * dnhyper(ncp, logdc, support)))
+  }
 }
 
 pnhyper <- function(
@@ -69,6 +53,24 @@ pnhyper <- function(
   return(res)
 }
 
+# ncp.U == ncp_ci(..., lower = FALSE)
+# ncp.L == ncp_ci(..., lower = TRUE)
+ncp_ci <- function(x, alpha, m, n, k, lo, hi, support, logdc, lower = FALSE) {
+  if (x == hi) {
+    return(Inf)
+  } else {
+    p <- pnhyper(x, 1, upper.tail = lower, m, n, k, lo, hi, support, logdc)
+    ple <- p < alpha
+    if (p == alpha) {
+      return(1)
+    } else if (ple != lower) {
+      return(uniroot(function(t) pnhyper(x, t, upper.tail = lower, m, n, k, lo, hi, support, logdc) - alpha, c(0, 1))$root)
+    } else { # ple == lower
+      return(1/uniroot(function(t) pnhyper(x, 1/t, upper.tail = lower, m, n, k, lo, hi, support, logdc) - alpha, c(.Machine$double.eps, 1))$root)
+    }
+  }
+}
+
 #' @title Vectorized fisher.test
 #'
 #' @description
@@ -89,15 +91,19 @@ pnhyper <- function(
 #' arguments to `fisher.test` associated with other tests (e.g. `hybrid`) do not
 #' appear.
 #'
-#' @return a data.frame, columns `a`, `b`, `c`, `d`, `or`, `estimate`,
-#'   `p.value`. If `conf.int == TRUE` (the default), will also include column(s)
-#'   for the confidence interval (two if `alternative == "two.sided"` (default)
-#'   or one otherwise.). The column names are `ci.lo` and/or `ci.hi`.
+#' @return data.table, columns `a`, `b`, `c`, `d`, `or`, `p.value`, `estimate`.
+#'
+#' If `conf.int == TRUE` (the default), will also include columns for the
+#' confidence interval, `ci.lo` and `ci.hi`. If `alternative == "less"`, `ci.lo`
+#' will be `0`, and similarly for `alternative == "greater"`, `ci.hi` will be
+#' `Inf`. Otherwise (the default), the CI will be centered and both low and high
+#' ends will take on values between 0 and Inf.
 #'
 #' @export
+#' @import data.table
 vfisher.test <- function(
-    a, b, c, d, conf.int = TRUE, conf.level = 0.95, or = rep(1, length(a)),
-    alternative = c("two.sided", "less", "greater")
+  a, b, c, d, conf.int = TRUE, conf.level = 0.95, or = rep(1, length(a)),
+  alternative = c("two.sided", "less", "greater")
 ) {
 
   if (
@@ -112,80 +118,77 @@ vfisher.test <- function(
   if (any(c(a, b, c, d) < 0) || anyNA(c(a, b, c, d)))
     stop("all entries of 'a', 'b', 'c', 'd' must be nonnegative and finite")
 
-  con <- list(mult = 30)
-  con[names(control)] <- control
-  if ((mult <- as.integer(con$mult)) < 2)
-    stop("'mult' must be integer >= 2, typically = 30")
-
-  alternative <- match.arg(alternative)
+  if (any(!is.numeric(or) | is.na(or) | or < 0))
+    stop("'or' must be a non-NA number between 0 and Inf")
 
-  if (!((length(conf.level) == 1L) && is.finite(conf.level) &&
-        (conf.level > 0) && (conf.level < 1)))
-    stop("'conf.level' must be a single number between 0 and 1")
-
-  if (any(is.na(or) | or < 0))
-    stop("'or' must be a single number between 0 and Inf")
+  if (length(or) != length(a)) {
+    warning("`length(or) != length(a)`; using `rep(or, length.out = length(a))` to extend.")
+    or <- rep(or, length.out = length(a))
+  }
 
   # matrix =
   # a, b
   # c, d
 
-  m <- a + c
-  n <- b + d
-  k <- a + b
-  x <- a
-
-  lo <- pmax(0L, k - n)
-  hi <- pmin(k, m)
-
-  mle <- numeric(length(lo))
-  mle[x == lo] <- 0
-  mle[x == hi] <- Inf
-  nothilo <- !((x == lo) | (x == hi))
-
-  mle[nothilo] <- {
-    mu <- mnhyper(1, lo[nothilo], hi[nothilo], logdc[nothilo], support[nothilo])
-    lemu <- mi < x[nothilo]
-    res <- numeric(length(x[nothilo]))
-
-    res[lemu] <- mapply(function(lo, hi, logdc) {
-      1/uniroot(function(t) mnhyper(1/t, lo, hi, logdc, support) - x, c(.Machine$double.eps, 1))$root
-    }, lo = lo[lemu], hi = hi[lemu], logdc = logdc[lemu])
-    res[-lemu] <- mapply(function(lo, hi, logdc) {
-      uniroot(function(t) mnhyper(t, lo, hi, logdc) - x, c(0, 1))$root
-    }, lo = lo[-lemu], hi = hi[-lemu], logdc = logdc[-lemu])
-    res
-  }
+  result_dt <- data.table(a = a, b = b, c = c, d = d, or = or)
+  result_dt[,
+    m := a + c
+  ][,
+    n := b + d
+  ][,
+    k := a + b
+  ][,
+    lo := pmax(0L, k - n)
+  ][,
+    hi := pmin(k, m)
+  ]
+
+  result_dt[, rowid := 1:.N]
+
+  result_dt[,
+    support := .(list(lo:hi)), by = rowid
+  ][,
+    logdc :=  .(list(dhyper(support[[1]], m, n, k, log = TRUE))), by = rowid
+  ]
+
+  # x == a
+
+  result_dt[a == lo, estimate := 0]
+  result_dt[a == hi, estimate := Inf]
+  result_dt[!(a == lo | a == hi), mnhyper1 := mnhyper(1, lo, hi, logdc[[1]], support[[1]]), by = rowid]
+  result_dt[mnhyper1 == a, estimate := 1]
+  result_dt[mnhyper1 < a, estimate := 1/uniroot(function(t) mnhyper(1/t, lo, hi, logdc[[1]], support[[1]]) - a, c(.Machine$double.eps, 1))$root, by = rowid]
+  result_dt[mnhyper1 > a, estimate := uniroot(function(t) mnhyper(t, lo, hi, logdc[[1]], support[[1]]) - a, c(0, 1))$root, by = rowid ]
+
+  sdcols <- c("a", "b", "c", "d", "or", "p.value", "estimate")
 
   if (conf.int) {
-    ncp.U <- function(x, alpha) {
-      if (x == hi) return(Inf)
-      p <- pnhyper(x, 1)
-      if (p < alpha)
-        uniroot(function(t) pnhyper(x, t) - alpha,
-                c(0, 1))$root
-      else if (p > alpha)
-        1/uniroot(function(t) pnhyper(x, 1/t) - alpha,
-                  c(.Machine$double.eps, 1))$root
-      else 1
-    }
-    ncp.L <- function(x, alpha) {
-      if (x == lo) return(0)
-      p <- pnhyper(x, 1, upper.tail = TRUE)
-      if (p > alpha)
-        uniroot(function(t) pnhyper(x, t, upper.tail = TRUE) -
-                  alpha, c(0, 1))$root
-      else if (p < alpha)
-        1/uniroot(function(t) pnhyper(x, 1/t, upper.tail = TRUE) -
-                    alpha, c(.Machine$double.eps, 1))$root
-      else 1
+
+    alternative <- match.arg(alternative)
+
+    if (!((length(conf.level) == 1L) && is.finite(conf.level) &&
+          (conf.level > 0) && (conf.level < 1)))
+      stop("'conf.level' must be a single number between 0 and 1")
+
+    sdcols <- c(sdcols, c("ci.lo", "ci.hi"))
+
+    setattr(result_dt, "conf.level", conf.level)
+
+    if (alternative == "less") {
+      result_dt[, ci.lo := 0]
+      result_dt[, ci.hi := ncp_ci(a, 1 - conf.level, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = FALSE), by = rowid]
+    } else if (alternative == "greater") {
+      result_dt[, ci.lo := ncp_ci(a, 1 - conf.level, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = TRUE), by = rowid]
+      result_dt[, ci.hi := Inf]
+    } else {
+      alpha <- (1 - conf.level)/2
+      result_dt[, ci.lo := ncp_ci(a, alpha, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = TRUE), by = rowid]
+      result_dt[, ci.hi := ncp_ci(a, alpha, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = FALSE), by = rowid]
     }
-    CINT <- switch(alternative, less = c(0, ncp.U(x,
-                                                  1 - conf.level)), greater = c(ncp.L(x, 1 - conf.level),
-                                                                                Inf), two.sided = {
-                                                                                  alpha <- (1 - conf.level)/2
-                                                                                  c(ncp.L(x, alpha), ncp.U(x, alpha))
-                                                                                })
+
+  }
+
+  result_dt[, .SD, .SDcols = sdcols]
 
 }
 
@@ -209,19 +212,7 @@ function (x, y = NULL, workspace = 2e+05, hybrid = FALSE, hybridPars = c(expect
                                                                }
                    })
   }
-  mle <- function(x) {
-    if (x == lo)
-      return(0)
-    if (x == hi)
-      return(Inf)
-    mu <- mnhyper(1)
-    if (mu > x)
-      uniroot(function(t) mnhyper(t) - x, c(0, 1))$root
-    else if (mu < x)
-      1/uniroot(function(t) mnhyper(1/t) - x, c(.Machine$double.eps,
-                                                1))$root
-    else 1
-  }
+
   ESTIMATE <- c(`odds ratio` = mle(x))
   if (conf.int) {
     ncp.U <- function(x, alpha) {