Skip to content

Commit 43a1b38

Browse files
committed
WIP calculations
1 parent 2766321 commit 43a1b38

File tree

1 file changed

+102
-111
lines changed

1 file changed

+102
-111
lines changed

R/vfisher.R

Lines changed: 102 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,25 @@
11

2-
support_spans <- function(m, n, k) {
3-
return(mapply(
4-
function(lo, hi) { lo:hi },
5-
lo = pmax(0L, k - n), hi = pmin(k, m),
6-
SIMPLIFY = FALSE
7-
))
8-
}
9-
10-
logdc <- function(m, n, k, support) {
2+
logdc_calc <- function(m, n, k, support) {
113
return(mapply(
124
function(support, m, n, k) dhyper(support, m, n, k, log = TRUE),
135
support = support, m = m, n = n, k = k, SIMPLIFY = FALSE
146
))
157
}
168

179
dnhyper <- function(ncp, logdc, support) {
18-
return(mapply(
19-
function(ncp, logdc, support) {
20-
d <- logdc + log(ncp) * support
21-
d <- exp(d - max(d))
22-
return(d/sum(d))
23-
},
24-
ncp = ncp, logdc = logdc, support = support, SIMPLIFY = FALSE
25-
))
10+
d <- logdc + log(ncp) * support
11+
d <- exp(d - max(d))
12+
return(d/sum(d))
2613
}
2714

2815
mnhyper <- function(ncp, lo, hi, logdc, support) {
29-
lims <- which(ncp == 0 | is.infinite(ncp))
30-
res <- integer(length(lo))
31-
res[lims] <- ifelse(ncp[lims] == 0, lo[lims], hi[lims])
32-
res[-lims] <- mapply(
33-
function(ncp, logdc, support) {
34-
sum(support * dnhyper(ncp, logdc, support))
35-
},
36-
ncp = ncp[-lims], logdc = logdc[-lims], support = support[-lims]
37-
)
38-
return(res)
16+
if (ncp == 0) {
17+
return(lo)
18+
} else if (is.infinite(ncp)) {
19+
return(hi)
20+
} else {
21+
return(sum(support * dnhyper(ncp, logdc, support)))
22+
}
3923
}
4024

4125
pnhyper <- function(
@@ -69,6 +53,24 @@ pnhyper <- function(
6953
return(res)
7054
}
7155

56+
# ncp.U == ncp_ci(..., lower = FALSE)
57+
# ncp.L == ncp_ci(..., lower = TRUE)
58+
ncp_ci <- function(x, alpha, m, n, k, lo, hi, support, logdc, lower = FALSE) {
59+
if (x == hi) {
60+
return(Inf)
61+
} else {
62+
p <- pnhyper(x, 1, upper.tail = lower, m, n, k, lo, hi, support, logdc)
63+
ple <- p < alpha
64+
if (p == alpha) {
65+
return(1)
66+
} else if (ple != lower) {
67+
return(uniroot(function(t) pnhyper(x, t, upper.tail = lower, m, n, k, lo, hi, support, logdc) - alpha, c(0, 1))$root)
68+
} else { # ple == lower
69+
return(1/uniroot(function(t) pnhyper(x, 1/t, upper.tail = lower, m, n, k, lo, hi, support, logdc) - alpha, c(.Machine$double.eps, 1))$root)
70+
}
71+
}
72+
}
73+
7274
#' @title Vectorized fisher.test
7375
#'
7476
#' @description
@@ -89,15 +91,19 @@ pnhyper <- function(
8991
#' arguments to `fisher.test` associated with other tests (e.g. `hybrid`) do not
9092
#' appear.
9193
#'
92-
#' @return a data.frame, columns `a`, `b`, `c`, `d`, `or`, `estimate`,
93-
#' `p.value`. If `conf.int == TRUE` (the default), will also include column(s)
94-
#' for the confidence interval (two if `alternative == "two.sided"` (default)
95-
#' or one otherwise.). The column names are `ci.lo` and/or `ci.hi`.
94+
#' @return data.table, columns `a`, `b`, `c`, `d`, `or`, `p.value`, `estimate`.
95+
#'
96+
#' If `conf.int == TRUE` (the default), will also include columns for the
97+
#' confidence interval, `ci.lo` and `ci.hi`. If `alternative == "less"`, `ci.lo`
98+
#' will be `0`, and similarly for `alternative == "greater"`, `ci.hi` will be
99+
#' `Inf`. Otherwise (the default), the CI will be centered and both low and high
100+
#' ends will take on values between 0 and Inf.
96101
#'
97102
#' @export
103+
#' @import data.table
98104
vfisher.test <- function(
99-
a, b, c, d, conf.int = TRUE, conf.level = 0.95, or = rep(1, length(a)),
100-
alternative = c("two.sided", "less", "greater")
105+
a, b, c, d, conf.int = TRUE, conf.level = 0.95, or = rep(1, length(a)),
106+
alternative = c("two.sided", "less", "greater")
101107
) {
102108

103109
if (
@@ -112,80 +118,77 @@ vfisher.test <- function(
112118
if (any(c(a, b, c, d) < 0) || anyNA(c(a, b, c, d)))
113119
stop("all entries of 'a', 'b', 'c', 'd' must be nonnegative and finite")
114120

115-
con <- list(mult = 30)
116-
con[names(control)] <- control
117-
if ((mult <- as.integer(con$mult)) < 2)
118-
stop("'mult' must be integer >= 2, typically = 30")
119-
120-
alternative <- match.arg(alternative)
121+
if (any(!is.numeric(or) | is.na(or) | or < 0))
122+
stop("'or' must be a non-NA number between 0 and Inf")
121123

122-
if (!((length(conf.level) == 1L) && is.finite(conf.level) &&
123-
(conf.level > 0) && (conf.level < 1)))
124-
stop("'conf.level' must be a single number between 0 and 1")
125-
126-
if (any(is.na(or) | or < 0))
127-
stop("'or' must be a single number between 0 and Inf")
124+
if (length(or) != length(a)) {
125+
warning("`length(or) != length(a)`; using `rep(or, length.out = length(a))` to extend.")
126+
or <- rep(or, length.out = length(a))
127+
}
128128

129129
# matrix =
130130
# a, b
131131
# c, d
132132

133-
m <- a + c
134-
n <- b + d
135-
k <- a + b
136-
x <- a
137-
138-
lo <- pmax(0L, k - n)
139-
hi <- pmin(k, m)
140-
141-
mle <- numeric(length(lo))
142-
mle[x == lo] <- 0
143-
mle[x == hi] <- Inf
144-
nothilo <- !((x == lo) | (x == hi))
145-
146-
mle[nothilo] <- {
147-
mu <- mnhyper(1, lo[nothilo], hi[nothilo], logdc[nothilo], support[nothilo])
148-
lemu <- mi < x[nothilo]
149-
res <- numeric(length(x[nothilo]))
150-
151-
res[lemu] <- mapply(function(lo, hi, logdc) {
152-
1/uniroot(function(t) mnhyper(1/t, lo, hi, logdc, support) - x, c(.Machine$double.eps, 1))$root
153-
}, lo = lo[lemu], hi = hi[lemu], logdc = logdc[lemu])
154-
res[-lemu] <- mapply(function(lo, hi, logdc) {
155-
uniroot(function(t) mnhyper(t, lo, hi, logdc) - x, c(0, 1))$root
156-
}, lo = lo[-lemu], hi = hi[-lemu], logdc = logdc[-lemu])
157-
res
158-
}
133+
result_dt <- data.table(a = a, b = b, c = c, d = d, or = or)
134+
result_dt[,
135+
m := a + c
136+
][,
137+
n := b + d
138+
][,
139+
k := a + b
140+
][,
141+
lo := pmax(0L, k - n)
142+
][,
143+
hi := pmin(k, m)
144+
]
145+
146+
result_dt[, rowid := 1:.N]
147+
148+
result_dt[,
149+
support := .(list(lo:hi)), by = rowid
150+
][,
151+
logdc := .(list(dhyper(support[[1]], m, n, k, log = TRUE))), by = rowid
152+
]
153+
154+
# x == a
155+
156+
result_dt[a == lo, estimate := 0]
157+
result_dt[a == hi, estimate := Inf]
158+
result_dt[!(a == lo | a == hi), mnhyper1 := mnhyper(1, lo, hi, logdc[[1]], support[[1]]), by = rowid]
159+
result_dt[mnhyper1 == a, estimate := 1]
160+
result_dt[mnhyper1 < a, estimate := 1/uniroot(function(t) mnhyper(1/t, lo, hi, logdc[[1]], support[[1]]) - a, c(.Machine$double.eps, 1))$root, by = rowid]
161+
result_dt[mnhyper1 > a, estimate := uniroot(function(t) mnhyper(t, lo, hi, logdc[[1]], support[[1]]) - a, c(0, 1))$root, by = rowid ]
162+
163+
sdcols <- c("a", "b", "c", "d", "or", "p.value", "estimate")
159164

160165
if (conf.int) {
161-
ncp.U <- function(x, alpha) {
162-
if (x == hi) return(Inf)
163-
p <- pnhyper(x, 1)
164-
if (p < alpha)
165-
uniroot(function(t) pnhyper(x, t) - alpha,
166-
c(0, 1))$root
167-
else if (p > alpha)
168-
1/uniroot(function(t) pnhyper(x, 1/t) - alpha,
169-
c(.Machine$double.eps, 1))$root
170-
else 1
171-
}
172-
ncp.L <- function(x, alpha) {
173-
if (x == lo) return(0)
174-
p <- pnhyper(x, 1, upper.tail = TRUE)
175-
if (p > alpha)
176-
uniroot(function(t) pnhyper(x, t, upper.tail = TRUE) -
177-
alpha, c(0, 1))$root
178-
else if (p < alpha)
179-
1/uniroot(function(t) pnhyper(x, 1/t, upper.tail = TRUE) -
180-
alpha, c(.Machine$double.eps, 1))$root
181-
else 1
166+
167+
alternative <- match.arg(alternative)
168+
169+
if (!((length(conf.level) == 1L) && is.finite(conf.level) &&
170+
(conf.level > 0) && (conf.level < 1)))
171+
stop("'conf.level' must be a single number between 0 and 1")
172+
173+
sdcols <- c(sdcols, c("ci.lo", "ci.hi"))
174+
175+
setattr(result_dt, "conf.level", conf.level)
176+
177+
if (alternative == "less") {
178+
result_dt[, ci.lo := 0]
179+
result_dt[, ci.hi := ncp_ci(a, 1 - conf.level, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = FALSE), by = rowid]
180+
} else if (alternative == "greater") {
181+
result_dt[, ci.lo := ncp_ci(a, 1 - conf.level, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = TRUE), by = rowid]
182+
result_dt[, ci.hi := Inf]
183+
} else {
184+
alpha <- (1 - conf.level)/2
185+
result_dt[, ci.lo := ncp_ci(a, alpha, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = TRUE), by = rowid]
186+
result_dt[, ci.hi := ncp_ci(a, alpha, m, n, k, lo, hi, support[[1]], logdc[[1]], lower = FALSE), by = rowid]
182187
}
183-
CINT <- switch(alternative, less = c(0, ncp.U(x,
184-
1 - conf.level)), greater = c(ncp.L(x, 1 - conf.level),
185-
Inf), two.sided = {
186-
alpha <- (1 - conf.level)/2
187-
c(ncp.L(x, alpha), ncp.U(x, alpha))
188-
})
188+
189+
}
190+
191+
result_dt[, .SD, .SDcols = sdcols]
189192

190193
}
191194

@@ -209,19 +212,7 @@ function (x, y = NULL, workspace = 2e+05, hybrid = FALSE, hybridPars = c(expect
209212
}
210213
})
211214
}
212-
mle <- function(x) {
213-
if (x == lo)
214-
return(0)
215-
if (x == hi)
216-
return(Inf)
217-
mu <- mnhyper(1)
218-
if (mu > x)
219-
uniroot(function(t) mnhyper(t) - x, c(0, 1))$root
220-
else if (mu < x)
221-
1/uniroot(function(t) mnhyper(1/t) - x, c(.Machine$double.eps,
222-
1))$root
223-
else 1
224-
}
215+
225216
ESTIMATE <- c(`odds ratio` = mle(x))
226217
if (conf.int) {
227218
ncp.U <- function(x, alpha) {

0 commit comments

Comments
 (0)