# Function to estimate alpha and beta of a Beta distribution # from given HPI [L, U] and mean m estimate_beta_params_from_HPI <- function(L, U, m, conf = 0.95, verbose = TRUE) { # Initial guesses for alpha and beta alpha0 <- m * 10 beta0 <- (1 - m) * 10
# Objective function: minimize error between estimated HPI/mean and given values objective_fn <- function(par) { a <- par[1] b <- par[2] if (a <= 0 || b <= 0) return(Inf) hpi <- hdi(qbeta, shape1 = a, shape2 = b, credMass = conf) est_mean <- a / (a + b) hpi_error <- (hpi[1] - L)^2 + (hpi[2] - U)^2 mean_error <- (est_mean - m)^2 return(hpi_error + mean_error * 10) # Penalize deviation in mean }
options(warn = -1) alpha=0.05 sim=\(N=100){ A=sample(1:(N-2),1) # Ensure A leaves enough room for B and C remaining = N - A if(remaining > 1){ B=sample(1:(remaining-1),1) # Ensure B is at least 1 C=N-A-B ABC=c(A,B,C) abc=sapply(ABC,\(x) if(x > 0) sample(1:x,1) else 0) # Handle case where a group size might be 0 (though unlikely with the new A and B sampling) x=abc n=ABC contig=rbind(s=x,f=n-x) pg=chisq.test(contig,correct=FALSE)$p.value minpp=min(as.vector( pairwise.prop.test(x,n,correct=FALSE,p.adj="bon")$p.value),na.rm=TRUE) list(pg=pg,minpp=minpp,x=x,n=n) } else { # If A is too large, return NULL or some indicator of an invalid simulation return(NULL) } }
res = sim() while(is.null(res) || res$pg < alpha | res$minpp > alpha) { res = sim() } res
options(warn = -1) alpha=0.05 sim=\(N=100){ A=sample(1:(N-2),1) # Ensure A leaves enough room for B and C remaining = N - A if(remaining > 1){ B=sample(1:(remaining-1),1) # Ensure B is at least 1 C=N-A-B ABC=c(A,B,C) abc=sapply(ABC,\(x) if(x > 0) sample(1:x,1) else 0) # Handle case where a group size might be 0 (though unlikely with the new A and B sampling) x=abc n=ABC contig=rbind(s=x,f=n-x) pg=chisq.test(contig,correct=FALSE)$p.value minpp=min(as.vector( pairwise.prop.test(x,n,correct=FALSE,p.adj="bon")$p.value),na.rm=TRUE) list(pg=pg,minpp=minpp,x=x,n=n) } else { # If A is too large, return NULL or some indicator of an invalid simulation return(NULL) } }
res = sim() while(is.null(res) || res$pg < alpha | res$minpp > alpha) { res = sim() } res
res = sim() while(is.null(res) || res$pg > alpha | res$minpp < alpha) { res = sim() } res
sim_fisher <- function(N = 100) { # Function to simulate data for three groups and perform Fisher's exact test. # N: Total sample size.
# Determine sample sizes for each of the three groups. A <- sample(1:(N - 2), 1) # Ensure A leaves enough room for B and C. remaining <- N - A if (remaining > 1) { B <- sample(1:(remaining - 1), 1) # Ensure B is at least 1. C <- N - A - B ABC <- c(A, B, C) # Vector of group sizes.
# Randomly generate the number of successes for each group (cannot exceed group size). abc <- sapply(ABC, function(x) if (x > 0) sample(0:x, 1) else 0) x <- abc # Vector of number of successes per group. n <- ABC # Vector of total samples per group.
# Create a data frame for the overall Fisher's exact test. data_all <- data.frame( group = factor(rep(c("A", "B", "C"), times = n)), success = unlist(sapply(1:3, function(i) c(rep(1, x[i]), rep(0, n[i] - x[i])))) ) table_all <- table(data_all$group, data_all$success) fisher_pg <- fisher.test(table_all)$p.value # P-value of the overall Fisher's exact test.
# Find data where the overall Fisher's exact test is not significant, # but at least one pairwise Fisher's exact test (with Bonferroni correction) is significant. res_no_overall_sig_pairwise_sig <- NULL while (is.null(res_no_overall_sig_pairwise_sig) || res_no_overall_sig_pairwise_sig$fisher_pg > alpha || res_no_overall_sig_pairwise_sig$min_pairwise_p_bonf > alpha) { res_no_overall_sig_pairwise_sig <- sim_fisher() } cat("Data where overall Fisher's test is not significant, but pairwise is:\n") print(res_no_overall_sig_pairwise_sig) cat("\n")
# Find data where the overall Fisher's exact test is significant, # but all pairwise Fisher's exact tests (with Bonferroni correction) are not significant. res_overall_sig_no_pairwise_sig <- NULL while (is.null(res_overall_sig_no_pairwise_sig) || res_overall_sig_no_pairwise_sig$fisher_pg < alpha || res_overall_sig_no_pairwise_sig$min_pairwise_p_bonf < alpha) { res_overall_sig_no_pairwise_sig <- sim_fisher() } cat("Data where overall Fisher's test is significant, but pairwise is not:\n") print(res_overall_sig_no_pairwise_sig)
sim_chisq <- function(N = 100) { # Function to simulate data for three groups and perform Chi-squared test (without Yates' correction). # N: Total sample size.
# Determine sample sizes for each of the three groups. A <- sample(1:(N - 2), 1) # Randomly select a size for group A, ensuring space for B and C. remaining <- N - A if (remaining > 1) { B <- sample(1:(remaining - 1), 1) # Randomly select a size for group B, ensuring space for C. C <- N - A - B # Calculate the size for group C. ABC <- c(A, B, C) # Vector containing the sample sizes of the three groups.
# Randomly generate the number of successes for each group (must be between 0 and the group size). abc <- sapply(ABC, function(x) if (x > 0) sample(0:x, 1) else 0) x <- abc # Vector containing the number of successes for each group. n <- ABC # Vector containing the total number of trials for each group.
# Create a contingency table for the overall Chi-squared test. contig_all <- rbind(s = x, f = n - x) # Rows: successes (s), failures (f); Columns: groups. chisq_pg <- chisq.test(contig_all, correct = FALSE)$p.value # Perform Chi-squared test (no correction) and get the p-value.
# Perform pairwise proportion tests with Bonferroni correction. pairwise_prop_p_values <- as.vector( pairwise.prop.test(x, n, correct = FALSE, p.adj = "bon")$p.value ) # Perform pairwise proportion tests (no correction) and get Bonferroni-adjusted p-values. min_pairwise_p_bonf <- min(pairwise_prop_p_values, na.rm = TRUE) # Get the minimum of the adjusted pairwise p-values.
# Return a list containing the overall p-value, the minimum Bonferroni-corrected pairwise p-value, successes, and total trials. list(chisq_pg = chisq_pg, min_pairwise_p_bonf = min_pairwise_p_bonf, x = x, n = n) } else { return(NULL) # Return NULL if the group sizes are invalid. } }
# Find data where the overall Chi-squared test is not significant (p > alpha), # but at least one pairwise proportion test (with Bonferroni correction) is significant (p <= alpha). res_no_overall_sig_pairwise_sig <- NULL while (is.null(res_no_overall_sig_pairwise_sig) || res_no_overall_sig_pairwise_sig$chisq_pg > alpha || res_no_overall_sig_pairwise_sig$min_pairwise_p_bonf > alpha) { res_no_overall_sig_pairwise_sig <- sim_chisq() # Keep simulating until the condition is met. } cat("Data where overall Chi-squared test is not significant, but pairwise proportion test is:\n") print(res_no_overall_sig_pairwise_sig) cat("\n")
# Find data where the overall Chi-squared test is significant (p <= alpha), # but all pairwise proportion tests (with Bonferroni correction) are not significant (p > alpha). res_overall_sig_no_pairwise_sig <- NULL while (is.null(res_overall_sig_no_pairwise_sig) || res_overall_sig_no_pairwise_sig$chisq_pg < alpha || res_overall_sig_no_pairwise_sig$min_pairwise_p_bonf < alpha) { res_overall_sig_no_pairwise_sig <- sim_chisq() # Keep simulating until the condition is met. } cat("Data where overall Chi-squared test is significant, but pairwise proportion test is not:\n") print(res_overall_sig_no_pairwise_sig)
# Plotting (example: posterior distribution of probabilities for each outcome) cat("\nPosterior Distribution Plots for Each Outcome:\n") par(mfrow = c(2, 3)) for (i in 1:6) { plot(prob_simplex_posterior_jags[, i], type = "l", main = paste("Prob[", i, "]"), xlab = "Iteration", ylab = "Probability") abline(h = 1/6, col = "red", lty = 2) }
dice_prob_mean=prob_simplex_posterior_jags colors <- c("skyblue", "lightcoral", "lightgreen", "gold", "lightsalmon", "lightcyan") for (i in 1:ncol(dice_prob_mean)) { BEST::plotPost(dice_prob_mean[, i], compVal=1/6, xlab=paste("pip ", i), xlim=c(0, 0.4), main="", col=colors[i], border="black") }
# Function to perform a single simulation sim_single <- function(N = 100) { A <- sample(1:N, 1) while (A > N - 2) A <- sample(1:N, 1) B <- sample(N - A - 1, 1) C <- N - A - B ABC <- c(A, B, C) abc <- sapply(ABC, function(x) sample(x, 1)) x <- abc n <- ABC m <- rbind(s = x, f = n - x)
bonf_res <- pairwise.fisher.test(x, n, p.adj = 'bonf') holm_res <- pairwise.fisher.test(x, n, p.adj = 'holm') fdr_res <- pairwise.fisher.test(x, n, p.adj = 'fdr') none_res <- pairwise.fisher.test(x, n, p.adj = 'none')
# Function to find a result that meets the criteria using parallel processing find_result_parallel_loop <- function(alpha = 0.05, num_cores = detectCores() - 1) { # Create a cluster of worker processes cl <- makeCluster(num_cores) # Ensure the cluster is stopped when the function exits on.exit(stopCluster(cl))
# Export the sim_single function to the worker processes clusterExport(cl, "sim_single")
# Load the fmsb library in the worker processes clusterEvalQ(cl, library(fmsb))
cat("Searching for a result that meets the criteria...\n")
while (TRUE) { # Run simulations in parallel results <- parLapply(cl, 1:num_cores, function(i) { # Run as many simulations as cores sim_single() })
# Check the results for the desired condition for (res in results) { if (res$bonf > alpha && res$holm < alpha) { cat("Result meeting the criteria found:\n") return(res) } } # If no result found in this batch, continue the loop } }
# Find the result using parallel processing until found res_parallel_loop <- find_result_parallel_loop(alpha = alpha)
# Output the result (will be printed within the loop when found) print(res_parallel_loop)
This function helps visualize posterior distribution samples from Bayesian inference and displays various informative elements. It can show the mean, mode, median, a credible interval (either HDI or quantiles), the Region of Practical Equivalence (ROPE), and a comparison value.
Arguments:
posterior_samples: A numeric vector of posterior distribution samples. credMass: The width of the credible interval (a numeric value between 0 and 1; default is 0.95). ROPE: A numeric vector specifying the ROPE range (e.g., c(lower_bound, upper_bound)). compVal: A numeric value for comparison. showMean: A logical value indicating whether to display the mean (TRUE/FALSE). showMode: A logical value indicating whether to display the mode (TRUE/FALSE). showMedian: A logical value indicating whether to display the median (TRUE/FALSE). showCurve: A logical value indicating whether to display a density plot (TRUE/FALSE; if FALSE, a histogram is shown). showQuantile: A logical value indicating whether to display the credible interval as quantiles instead of HDI (TRUE/FALSE). xlab: The label for the x-axis. main: The title of the plot. hist_color: The color of the histogram. textSize: The size of the text elements. yaxt: The y-axis style (default is to show only numbers). ...: Additional arguments to be passed to the plotting function.
# This R function, riskratio.boot, calculates the risk ratio and its Highest Density Interval (HDI) # using a bootstrap method. It takes the number of events and the total number of observations # for two groups as input.
riskratio.boot <- function(r1, r2, n1, n2, nboot = 5000, conf.level = 0.95, verbose = FALSE){ # Combine the number of events and total observations for both groups. r <- c(r1, r2) n <- c(n1, n2) # Create a 2x2 matrix representing the contingency table (number of events and non-events). m <- cbind(r, n - r) ; m # Perform bootstrap resampling to estimate the risk ratio distribution. rr <- replicate(nboot, { # Simulate the number of events in the first group by sampling with replacement # from a population with the observed event rate. sample(c(rep(1, r1), rep(0, n1 - r1)), n1, replace = TRUE) |> sum() -> R1 # Simulate the number of events in the second group similarly. sample(c(rep(1, r2), rep(0, n2 - r2)), n2, replace = TRUE) |> sum() -> R2 # Calculate the risk ratio from the bootstrapped event counts. (R1 / n1) / (R2 / n2) }) # If verbose is TRUE, plot the posterior distribution of the risk ratio. if(verbose){ source('plotpost.R') # Assuming 'plotpost.R' is a script for plotting posterior distributions. plotpost(rr, compVal = 1, showCurve = 1, lwd = 4) # Plot the distribution with a comparison value of 1. } # Calculate the mean of the bootstrapped risk ratios. b_mean <- mean(rr) # Calculate the Highest Density Interval (HDI) of the bootstrapped risk ratios. b_ci <- HDInterval::hdi(rr, credMass = conf.level) # Return a list containing the bootstrap mean and the HDI. list(b_mean = b_mean, b_ci = b_ci) }
# Example usage of the riskratio.boot function. riskratio.boot(244, 282, 2345, 2333)
The `riskratio.boot` function in R estimates the risk ratio between two groups and provides its Highest Density Interval (HDI) using a bootstrap resampling approach. It takes the counts of events and the total number of observations for each of the two groups as input.
* `r1`: The number of events in the first group. * `r2`: The number of events in the second group. * `n1`: The total number of observations in the first group. * `n2`: The total number of observations in the second group. * `nboot`: The number of bootstrap replicates to perform (default is 5000). A larger number of replicates generally provides more stable estimates. * `conf.level`: The credibility level for the Highest Density Interval (HDI) (default is 0.95, corresponding to a 95% HDI). * `verbose`: A logical value indicating whether to display a plot of the bootstrap distribution of the risk ratio (default is `FALSE`). If `TRUE`, it assumes a script named `plotpost.R` is available in the working directory for plotting.
The function works by simulating the event outcomes in each group through bootstrap resampling. For each group, it draws `n1` (or `n2`) samples with replacement from a hypothetical population that has the observed proportion of events (`r1/n1` or `r2/n2`). The number of events in each resampled set (`R1` and `R2`) is then used to calculate a bootstrapped risk ratio `(R1/n1) / (R2/n2)`. This process is repeated `nboot` times to generate a distribution of risk ratios. The function then calculates the mean of this distribution and its Highest Density Interval (HDI), which represents the most credible range for the true risk ratio given the data and the bootstrap procedure.
If `verbose` is set to `TRUE`, the function attempts to plot the distribution of the bootstrapped risk ratios using a script named `plotpost.R`. This requires that the `plotpost.R` script exists in the current working directory and is capable of handling the vector of bootstrapped risk ratios.
Value:
The function returns a list with the following components:
* `b_mean`: The mean of the bootstrapped risk ratios. * `b_ci`: The Highest Density Interval (HDI) of the bootstrapped risk ratios, as a vector with the lower and upper bounds.
Note:
The `verbose = TRUE` option depends on an external script `plotpost.R`. If you intend to use this option, ensure that the script is available and correctly implemented for plotting posterior-like distributions. The HDI is calculated using the `hdi` function from the `HDInterval` package, so this package must be installed (`install.packages("HDInterval")`) and loaded (`library(HDInterval)`) if you intend to use the default behavior.
ド底辺医大の十二箇条 (The Twelve Laws of Do-Teihen Medical School)
第1法則 ド底辺シリツ医大が悪いのではない、本人の頭が悪いんだ。 It is not the bottom medical school but its enrollee that is despicable, which deserves to be called a bona fide moron beyond redemption.
第2法則 ド底辺シリツ医大卒は恥ずかしくて、学校名を皆さま言いません。 The graduates of Do-Teihen are so ashamed that none of them dare to mention their own alma mater they had gone through.
第3法則 ド底辺特殊シリツ医大卒は裏口入学の負い目から裏口馬鹿を暴く人間を偽医者扱いしたがる。 The Do-Teihen graduates are so ashamed of having bought their way into the exclusively bottom-leveled medical school that they tend to call a genuine doctor a charlatan who elucidates their imbecility.
第4法則 ド底辺医大卒は、偏差値や出身校を気にするなと言いながら、自分の子どもには絶対にそんな大学へは行かせたくないと思っている。 While claiming that academic ranking or alma mater does not matter, a Do-Teihen graduate would never let their own child attend such a university.
第5法則 ド底辺医大卒は、裏口入学を否定しない。否定できない。なぜなら、実際に自分がそうだからである。 A Do-Teihen graduate never denies the existence of backdoor admissions—because deep down, they know they were one of them.
第6法則 ド底辺医大は、「差別するな」と叫びながら、偏差値・財力・コネがない者を最もあからさまに差別する。 While crying out against discrimination, Do-Teihen medical schools are the very institutions that blatan
860 名前:tly discriminate against those without test scores, wealth, or connections. []
第7法則 ド底辺医大卒は「実力で入った」と言うが、その“実力”の定義を決して口にしない。 A Do-Teihen graduate may claim, “I got in on merit,” but they will never define what that 'merit' actually was.
第8法則 ド底辺医大卒の最大の敵は、同級生ではなく、偏差値という現実である。 The greatest enemy of a Do-Teihen graduate is not their classmates—but the cold, numerical reality of standardized test scores.
第9法則 ド底辺医大では、人格者は浮く。媚びる者とカネ持ちが残る。 In Do-Teihen med schools, the virtuous are outcasts; only flatterers and the wealthy thrive.
第10法則 ド底辺医大に入る者は、嘘をついて入学し、嘘をつき続けて卒業する。 Those who enter Do-Teihen medical schools do so with lies—and graduate by continuing to lie.
第11法則 ド底辺医大卒は、知性を持つ批判者を最も憎む。それは、自分が決してなれない姿だからだ。 Graduates of Do-Teihen reserve their deepest hatred for intelligent critics—because those critics reflect everything they can never become.
第12法則 ド底辺医大は、医者を育てる場ではない。医師免許を与える「通行証発行所」である。 Do-Teihen is not a school to train doctors. It is a toll booth that issues medical licenses for a price.
# " Construct a Monte Carlo study that investigates how the probability of coverage depends on the sample size and true proportion value. In the study, let n be 10, 25, 50, and 100 and let p be .05, .25, and .50. Write an R function that has three inputs, n, p, and the number of Monte Carlo simulations m,and will output the estimate of the exact coverage probability. Implement your function using each combination of n and p and m = 1000 simulations. Describe how the actual probability of coverage of the traditional interval depends on the sample size and true proportion value. "
f = \(n,p,m=1000){ y=rbinom(m,n,p) phat=y/n se=sqrt(phat*(1-phat)/n) lo=phat - qnorm(0.975)*se up=phat + qnorm(0.975)*se mean(lo < p & p < up) } f=Vectorize(f)
# Fit a Bayesian logistic regression model using JAGS and return predictions and posterior summaries fit_bayesian_logistic_jags <- function(data, formula, newdata, n.chains = 3, n.iter = 5000, n.burnin = 1000) { # Extract response variable name from the formula response_var <- all.vars(formula)[1] y <- data[[response_var]]
# Convert factor response to binary numeric (0/1) if (is.factor(y)) y <- as.numeric(y) - 1 y <- as.numeric(y)
# Construct design matrices for training and new data X <- model.matrix(formula, data) new_X <- model.matrix(delete.response(terms(formula)), newdata)
# Prepare data list for JAGS jags_data <- list( y = y, X = X, n = nrow(X), p = ncol(X), new_X = new_X, scale_beta = rep(2.5, ncol(X)) # Prior scale for each coefficient )
# Define the JAGS model model_string <- " model { for (j in 1:p) { beta[j] ~ dt(0, 1 / pow(scale_beta[j], 2), 1) }
for (i in 1:n) { logit_p[i] <- inprod(X[i,], beta[]) y[i] ~ dbern(1 / (1 + exp(-logit_p[i]))) }
# Initialize and run the JAGS model model <- jags.model(textConnection(model_string), data = jags_data, n.chains = n.chains, quiet = TRUE) update(model, n.burnin)
# New observation to predict newdata <- data.frame( donation = 2500, score = 40, parent = 1 )
# Fit model and obtain results set.seed(123) result <- fit_bayesian_logistic_jags( data = data, formula = admission ~ donation + score + parent, newdata = newdata )
# Fit a Bayesian logistic regression model using JAGS and return predictions and posterior summaries fit_bayesian_logistic_jags <- function(data, formula, newdata, n.chains = 3, n.iter = 5000, n.burnin = 1000) { # Extract response variable name from the formula response_var <- all.vars(formula)[1] y <- data[[response_var]]
# Convert factor response to binary numeric (0/1) if (is.factor(y)) y <- as.numeric(y) - 1 y <- as.numeric(y)
# Construct design matrices for training and new data X <- model.matrix(formula, data) new_X <- model.matrix(delete.response(terms(formula)), newdata)
# Prepare data list for JAGS jags_data <- list( y = y, X = X, n = nrow(X), p = ncol(X), new_X = new_X, scale_beta = rep(2.5, ncol(X)) # Prior scale for each coefficient )
# Define the JAGS model model_string <- " model { for (j in 1:p) { beta[j] ~ dt(0, 1 / pow(scale_beta[j], 2), 1) }
for (i in 1:n) { logit_p[i] <- inprod(X[i,], beta[]) y[i] ~ dbern(1 / (1 + exp(-logit_p[i]))) }
# Initialize and run the JAGS model model <- jags.model(textConnection(model_string), data = jags_data, n.chains = n.chains, quiet = TRUE) update(model, n.burnin)
# Helper function to convert decimal numbers to fraction strings using MASS::fractions fractionStr <- function(x) { as.character(MASS::fractions(x)) }
# Function to compute beta parameter (beta) based on alpha, # derived from the condition on the shape of the distribution between lower and upper f <- function(alpha) { 1 + ((alpha - 1) * log(upper / lower)) / log((1 - lower) / (1 - upper)) }
# Root-finding function: difference between desired credible mass # and the Beta CDF probability between lower and upper with parameters (alpha, f(alpha)) g <- function(alpha) { pbeta(upper, alpha, f(alpha)) - pbeta(lower, alpha, f(alpha)) - credMass }
# Find the root of g(alpha) = 0 over the interval [1, 1e5] # to find the alpha value that satisfies the credible mass condition re <- uniroot(g, c(1, 1e5)) alpha <- re$root beta <- f(alpha)
# Calculate the mean of the Beta distribution mean <- alpha / (alpha + beta)
# Calculate the mode if defined (alpha > 1 and beta > 1), # otherwise set mode to NA as it is undefined mode <- if (alpha > 1 && beta > 1) { (alpha - 1) / (alpha + beta - 2) } else { NA }
# If verbose flag is TRUE, plot the Beta distribution and annotate results if (verbose) { # Generate x values from 0 to 1 for plotting the density x <- seq(0, 1, length.out = 1000)
# Compute Beta density values at x y <- dbeta(x, alpha, beta)
# Color bars within the credible interval [lower, upper] as "lightcoral", # others as light gray ("gray70") col <- ifelse(x >= lower & x <= upper, "lightcoral", "gray70")
# Plot histogram-like vertical lines representing the Beta density plot(x, y, type = "h", col = col, lwd = 2, main = sprintf("Beta(%.2f, %.2f) Distribution\n[%s, %s] with %.0f%% Probability Mass", alpha, beta, fractionStr(lower), fractionStr(upper), credMass * 100), xlab = "x", ylab = "Density", bty = "n")
# Add vertical dashed line for the mean, colored skyblue abline(v = mean, col = "skyblue", lwd = 1, lty = 2)
# Add vertical dashed line for the mode if defined, colored dark green if (!is.na(mode)) { abline(v = mode, col = "darkgreen", lwd = 1, lty = 2) }
# Corresponding colors for legend items colors <- c( "skyblue", if (!is.na(mode)) "darkgreen" else NULL, "lightcoral" )
# Line types for legend items (dashed lines for mean and mode, none for credible interval) ltys <- c(2, if (!is.na(mode)) 2 else NULL, NA)
# Plot characters for legend (none for lines, solid square for interval) pchs <- c(NA, if (!is.na(mode)) NA else NULL, 15)
# Add legend at the top of the plot with no box, scaled text size legend("top", legend = labels, col = colors, bty = "n", cex = 0.9, lty = ltys, pch = pchs) }
# Return a named vector of calculated parameters and statistics c(alpha = alpha, beta = beta, mean = mean, mode = mode) }
p_post_null <- function(p0, prior, alpha, beta, success, failure){ # Calculate the total number of trials from successes and failures. total = success + failure
# Calculate the likelihood of the data under the null hypothesis (H0). # This assumes a binomial distribution where the success probability is p0. m0 = dbinom(success, total, p0)
# Calculate the marginal likelihood of the data under the alternative hypothesis (H1). # Under H1, the success probability is assumed to follow a Beta distribution # with parameters alpha and beta. This function (from the 'extraDistr' package) # computes the marginal likelihood by integrating over all possible success probabilities. m1 = extraDistr::dbbinom(success, total, alpha, beta)
# Calculate the Bayes Factor (BF01). # This is the ratio of the likelihood under the null hypothesis (m0) # to the marginal likelihood under the alternative hypothesis (m1). BF01 = m0 / m1
# Calculate the posterior probability of the null hypothesis. # This updates the prior belief (prior) based on the evidence from the data (BF01). p_post = prior * BF01 / (prior * BF01 + 1 - prior)
# Return both the Bayes Factor and the posterior probability as a named vector. c(BF01 = BF01, p_post = p_post) }
# Optimize the 'alpha' parameter to maximize the posterior probability of the null hypothesis. # We're trying to find the 'alpha' value (within the range 0 to 1e6) that makes the # null hypothesis most plausible, given the data and the relationship beta = 5*alpha - 4. # p_post_null(...)[2] specifically extracts the 'p_post' value from the function's output. optimize(function(alpha) p_post_null(p0 = 1/6, prior = 1/2, alpha, 5 * alpha - 4, # Beta parameter is a function of alpha success = 4, failure = 50 - 4)[2], c(0, 1e6)) # Search range for alpha
# Calculate the Bayes Factor and posterior probability using specific alpha and beta values. # These specific values (alpha = 50/9, beta = 5*50/9 - 4) are likely the result of the # optimization step above, or pre-determined values that are of interest. p_post_null(1/6, 1/2, 50/9, 5 * 50/9 - 4, 4, 50 - 4)
> stancode(fit) // generated with brms 2.22.0 functions { /* compute monotonic effects * Args: * scale: a simplex parameter * i: index to sum over the simplex * Returns: * a scalar between 0 and rows(scale) */ real mo(vector scale, int i) { if (i == 0) { return 0; } else { return rows(scale) * sum(scale[1:i]); } } } data { int<lower=1> N; // total number of observations array[N] int Y; // response variable int<lower=1> K; // number of population-level effects matrix[N, K] X; // population-level design matrix int<lower=1> Kc; // number of population-level effects after centering int<lower=1> Ksp; // number of special effects terms int<lower=1> Imo; // number of monotonic variables array[Imo] int<lower=1> Jmo; // length of simplexes array[N] int Xmo_1; // monotonic variable vector[Jmo[1]] con_simo_1; // prior concentration of monotonic simplex int prior_only; // should the likelihood be ignored? } transformed data { matrix[N, Kc] Xc; // centered version of X without an intercept vector[Kc] means_X; // column means of X before centering for (i in 2:K) { means_X[i - 1] = mean(X[, i]); Xc[, i - 1] = X[, i] - means_X[i - 1]; } } parameters { vector[Kc] b; // regression coefficients real Intercept; // temporary intercept for centered predictors simplex[Jmo[1]] simo_1; // monotonic simplex vector[Ksp] bsp; // special effects coefficients } transformed parameters { real lprior = 0; // prior contributions to the log posterior lprior += normal_lpdf(b[1] | 0.15, 0.3); lprior += normal_lpdf(b[2] | 0.08, 0.3); lprior += normal_lpdf(b[3] | 0.8, 0.3); lprior += normal_lpdf(b[4] | 0.5, 0.3); lprior += normal_lpdf(Intercept | -4, 2); lprior += dirichlet_lpdf(simo_1 | con_simo_1); lprior += normal_lpdf(bsp[1] | -0.5, 0.3); } model { // likelihood including constants if (!prior_only) { // initialize linear predictor term vector[N] mu = rep_vector(0.0, N); mu += Intercept; for (n in 1:N) { // add more terms to the linear predictor mu[n] += (bsp[1]) * mo(simo_1, Xmo_1[n]); } target += bernoulli_logit_glm_lpmf(Y | Xc, mu, b); } // priors including constants target += lprior; } generated quantities { // actual population-level intercept real b_Intercept = Intercept - dot_product(means_X, b); }
Bayesian Computation with RでRのコードが理解できなかった。バグだとおもったのだが、 https://bayesball.github.io/bcwr/corrections.2nd.edition.txt のerrataにも掲載がないのでAIに聞いてみた。
>> 対数ヤコビアン項が間違っていると思う。
# theta=c(log(eta/(1-eta)),log(K)) > LearnBayes::betabinexch function (theta, data) { eta = exp(theta[1])/(1 + exp(theta[1])) K = exp(theta[2]) y = data[, 1] n = data[, 2] N = length(y) logf = function(y, n, K, eta) lbeta(K * eta + y, K * (1 - eta) + n - y) - lbeta(K * eta, K * (1 - eta)) val = sum(logf(y, n, K, eta)) val = val + theta[2] - 2 * log(1 + exp(theta[2])) return(val) } <bytecode: 0x000001a5a980e758> <environment: namespace:LearnBayes>
これが正しいのでは? betabinexch <- function (theta, data) { eta = exp(theta[1])/(1 + exp(theta[1])) K = exp(theta[2]) y = data[, 1] n = data[, 2] N = length(y) logf = function(y, n, K, eta) lbeta(K * eta + y, K * (1 - eta) + n - y) - lbeta(K * eta, K * (1 - eta)) val = sum(logf(y, n, K, eta)) val = val + theta[1] + theta[2] - 2 * log(1 + exp(theta[1])) # log Jacobian term return(val) } <<