\(\newcommand{\mathbbm}[1]{\mathbb{#1}}\)
library(tidyverse)
library(ggplot2)
The normal distribution has density:
\[ f(x) = \frac{1}{\sqrt{2\pi\sigma^{2}}} exp\left(-\frac{(x-\mu)^2}{2\sigma^2}\right) \]
help("Normal")
Plotting the standard normal distribution
# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
#
# ui <- fluidPage(
# column(4,
# wellPanel(
# sliderInput('Mu','Mu', min = - 3, max = 3, value = 0, step = 1),
# sliderInput('Sigma','Sigma', min = 1, max = 5, value = 1, step = 1)
# )
# ),
# column(8,
# plotOutput('plotDensity')
# )
# )
#
# server <- function(input,output) {
#
# selectedData <- reactive({
# data_frame(x = c(input$Mu - 4*input$Sigma, input$Mu + 4*input$Sigma))
# })
#
# output$plotDensity <- renderPlot({
# ggplot(selectedData(), aes(x=x)) +
# geom_vline(xintercept = input$Mu, linetype = "dashed", alpha = 0.4) +
# stat_function(fun = dnorm, args = c(input$Mu,input$Sigma),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("Normal Distribution N (", input$Mu," , ", input$Sigma^2, ")")) +
# xlab("x") +
# ylab("dnorm(x)") +
# coord_cartesian(ylim = c(0,0.5)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
#
# })
#
# }
#
# shinyApp(ui = ui, server = server)
mu <- 0
sigma <- 1
mean <- mu
var <- sigma^2
sd <- sigma
q9999 <- qnorm(p = 0.9999, mean = mu, sd = sigma, lower.tail = TRUE)
p <- data_frame(x = c(-q9999, q9999)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mu, linetype = "dashed", alpha = 0.4) +
stat_function(fun = dnorm, args = c(mu,sigma),
color = "#84CA72", size = 1) +
ggtitle(paste0("Normal Distribution N (", mu," , ", sigma^2, ")")) +
xlab("x") +
ylab("dnorm(x)") +
theme(plot.title = element_text(hjust = 0.5))
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0; True Variance: 1"
ggplot(data.frame(x = -5:5), aes(x)) +
stat_function(fun = dnorm, args = c(0,1), geom = 'area', xlim = c(1,5), fill = 4) +
stat_function(fun = dnorm, args = c(0,1)) +
labs(x = "", y = "") +
ggtitle("Normal distribution") +
theme(plot.title = element_text(hjust = 0.5))
# Built-in R function
paste0("Built-in R function:")
## [1] "Built-in R function:"
# rnrom(n, mean=0, sd=1)
mu <- 0
sigma <- 1
set.seed(123)
x <- rnorm(n = 1e5, mean = mu, sd = sigma)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dnorm(x, mean = mu, sd = sigma), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.000976748821684996; Variance: 0.999466876209305"
# Built-in Rcpp function
paste0("Built-in Rcpp function:")
## [1] "Built-in Rcpp function:"
library("Rcpp")
# evalCpp("Rcpp::rnorm(n,0,1)")
mu <- 0
sigma <- 1
set.seed(123)
x <- evalCpp("Rcpp::rnorm(1e5, 0, 1)")
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dnorm(x, mean = mu, sd = sigma), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.000976748821684996; Variance: 0.999466876209305"
library("Rcpp")
set.seed(123)
rnorm(1,0,1)
set.seed(123)
evalCpp("R::rnorm(0,1)")
set.seed(123)
rnorm(3,0,1)
set.seed(123)
evalCpp("Rcpp::rnorm(3,0,1)")
The t distribution with df
= \(\nu\) degrees of freedom has density
\[ f(x) = \]
help("TDist")
Plotting the Central t distribution
# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
#
# ui <- fluidPage(
# column(4,
# wellPanel(
# sliderInput('df','df', min = 1, max = 12, value = 6, step = 1),
# sliderInput('ncp','ncp', min = 1, max = 5, value = 1, step = 1)
# )
# ),
# column(8,
# plotOutput('plotDensity')
# )
# )
#
# server <- function(input,output) {
#
# mean <- 0
# sd <- reactive({sqrt(input$df/(input$df-2))})
# selectedData <- reactive({
# data_frame(x = c(mean - 4*sd(), mean + 4*sd()))
# })
#
# output$plotDensity <- renderPlot({
# ggplot(selectedData(), aes(x=x)) +
# geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
# stat_function(fun = dt, args = c(input$df),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("t Distribution (df = ", input$df,")")) +
# xlab("x") +
# ylab(paste0("dt(x, df = ", input$df,")")) +
# coord_cartesian(ylim = c(0,0.5)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
#
# })
#
# }
#
# shinyApp(ui = ui, server = server)
df <- 6
ncp <- NA # non-centrality parameter
mean <- 0
var <- df/(df-2)
sd <- sqrt(df/(df-2))
q9999 <- qt(p = 0.9999, df = df, lower.tail = TRUE)
p <- data_frame(x = c(-q9999, q9999)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
stat_function(fun = dt, args = c(df),
color = "#84CA72", size = 1) +
labs(caption = paste0("t Distribution (df = ", df,")")) +
xlab("x") +
ylab(paste0("dt(x, df = ", df,")")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0; True Variance: 1.5"
# Built-in R function
paste0("Built-in R function:")
## [1] "Built-in R function:"
# rt(n, df, ncp)
df <- 6
ncp <- NA # non-centrality parameter
set.seed(123)
x <- rt(n = 1e5, df = df)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dt(x, df = df), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: -0.00191542325893477; Variance: 1.51823043058928"
The Gamma distribution with parameters shape
= \(c\) and rate
= \(d\) has density
\[ f(x \mid c,d) = \frac{d^c}{\Gamma(c)} x^{c - 1} e^{(-dx)} \] for \(x \geq 0, c > 0\) and \(d > 0\).
The mean and variance are
\[ E(X) = \frac{c}{d}; \quad Var(X) = \frac{c}{d^2} \]
The Gamma distribution with parameters shape
= \(c\) and scale
= \(\delta\) has density
\[ f(x \mid c, \delta) = \frac{1}{\delta^c \Gamma(c)} x^{c - 1} e^{(-x/\delta)} \] for \(x \geq 0, c > 0\) and \(\delta > 0\).
The mean and variance are
\[ E(X) = c \delta; \quad Var(X) = c \delta^2 \]
help("GammaDist")
Plotting the Gamma distribution
# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
#
# ui <- fluidPage(
# column(4,
# wellPanel(
# sliderInput('alpha','alpha (shape)', min = 1, max = 12, value = 1, step = 1),
# sliderInput('sigma','sigma (scale)', min = 0, max = 3, value = 0.5, step = 0.1)
# )
# ),
# column(8,
# plotOutput('plotDensity')
# )
# )
#
# server <- function(input,output) {
#
# mean <- reactive({input$alpha*input$sigma})
# sd <- reactive({sqrt(input$alpha*input$sigma^2)})
#
# selectedData <- reactive({
# data_frame(x = c(0, qgamma(p = 0.9999, shape = input$alpha, scale = input$sigma)))
# })
#
# output$plotDensity <- renderPlot({
# ggplot(selectedData(), aes(x=x)) +
# geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
# stat_function(fun = dgamma, args = c(input$alpha,input$sigma),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("Gamma (alpha = ", input$alpha, ", sigma = ", input$sigma,") Distribution")) +
# xlab("x") +
# ylab(paste0("dgamma(x, alpha = ", input$alpha, ", sigma = ", input$sigma,")")) +
# coord_cartesian(ylim = c(0,0.5)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
#
# })
#
# }
#
# shinyApp(ui = ui, server = server)
# dgamma(x, shape, rate = 1, scale = 1/rate, log = FALSE)
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate
mean <- c/d
var <- c/d^2
sd <- sqrt(c/d^2)
q9999 <- qgamma(p = 0.9999, shape = c, rate = d)
p <- data_frame(x = c(0, q9999)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
stat_function(fun = dgamma, args = c(shape = c, rate = d),
color = "#84CA72", size = 1) +
ggtitle(paste0("Gamma (c = ", c, ", d = ", d,") Distribution")) +
xlab("x") +
ylab(paste0("dgamma")) +
theme(plot.title = element_text(hjust = 0.5))
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 6; True Variance: 12"
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate
# Built-in function
paste0("Built-in R function:")
## [1] "Built-in R function:"
set.seed(123)
# x <- rgamma(1e5, c, d)
x <- rgamma(n = 1e5, shape = c, rate = d)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 6.00987751542745; Variance: 11.9980901747901"
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate
# Built-in function
paste0("Built-in Rcpp function:")
## [1] "Built-in Rcpp function:"
set.seed(123)
x <- Rcpp::evalCpp("randg(1e5, arma::distr_param(3,2))", depends = "RcppArmadillo") # shape and scale parameters
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 5.99775522536012; Variance: 11.9708700496215"
library(Rcpp)
library(RcppArmadillo)
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate
Draw_Gamma <- cppFunction(depends = "RcppArmadillo",
'
arma::Col<double> rrgamma(int n, double c, double d) {
return(rgamma(n, c, 1/d)); // shape and scale parameters
}
'
)
# My Rcpp function
paste0("My Rcpp function:")
## [1] "My Rcpp function:"
set.seed(123)
x <- Draw_Gamma(1e5, c, d)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 6.00987751542745; Variance: 11.9980901747901"
The Inverse-Gamma distribution with parameters shape
=
\(a\) and rate
= \(b\) has density
\[ f(x \mid a, b) = \frac{1}{b^a\Gamma(a)} x^{-a-1} e^{\left(-\frac{1}{bx}\right)} \] for \(x \geq 0, a > 0\) and \(b > 0\).
The mean and variance are
\[ E(X) = \frac{1}{b(a-1)} \quad (a>1) ; \quad Var(X) = \frac{1}{b^2(a-1)^2(a-2)} \quad (a>2) \]
The Inverse-Gamma distribution with parameters shape
=
\(a\) and scale
= \(\beta\) has density
\[ f(x \mid a, \beta) = \frac{\beta^a}{\Gamma(a)} x^{-a-1} e^{(-\beta/x)} \] for \(x \geq 0, a > 0\) and \(\beta > 0\).
The mean and variance are
\[ E(X) = \frac{\beta}{a-1} \quad (a>1) ; \quad Var(X) = \frac{\beta^2}{(a-1)^2(a-2)} \quad (a>2) \]
Plotting the Inverse-Gamma distribution
# # My Shiny App
#
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# library(invgamma)
#
# ui <- fluidPage(
# column(4,
# wellPanel(
# sliderInput('alpha','alpha (shape)', min = 1, max = 12, value = 1, step = 1),
# sliderInput('beta','beta (scale)', min = 0, max = 3, value = 0.5, step = 0.5)
# )
# ),
# column(8,
# plotOutput('plotDensity')
# )
# )
#
# server <- function(input,output) {
#
# mode <- reactive({input$beta/(input$alpha+1)})
#
# selectedData <- reactive({
# data_frame(x = c(0, qinvgamma(p = 0.9, shape = input$alpha, scale = input$beta)))
# })
#
# output$plotDensity <- renderPlot({
# ggplot(selectedData(), aes(x=x)) +
# geom_vline(xintercept = mode(), linetype = "dashed", alpha = 0.4, color = "blue") +
# stat_function(fun = dinvgamma, args = c(input$alpha,input$beta),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("Inverse Gamma (alpha = ", input$alpha, ", beta = ", input$beta,") Distribution")) +
# xlab("x") +
# ylab(paste0("dinvgamma(x, alpha = ", input$alpha, ", beta = ", input$beta,")")) +
# coord_cartesian(xlim = c(0,3)) +
# coord_cartesian(ylim = c(0,5)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
#
# })
#
# }
#
# shinyApp(ui = ui, server = server)
a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate
library(extraDistr)
mean <- 1/(b*(a-1))
var <- 1/(b^2*(a-1)^2*(a-2))
sd <- sqrt(1/(b^2*(a-1)^2*(a-2)))
q9999 <- extraDistr::qinvgamma(p = 0.9999, alpha = a, beta = 1/b) # shape and scale parameters
p <- data_frame(x = c(0, q9999)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
stat_function(fun = extraDistr::dinvgamma, args = c(alpha = a, beta = 1/b),
color = "#84CA72", size = 1) +
ggtitle(paste0("Inverse-Gamma (a = ", a, ", b = ", b,") Distribution")) +
xlab("x") +
ylab(paste0("dinvgamma")) +
theme(plot.title = element_text(hjust = 0.5))
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0.25; True Variance: 0.0625"
a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate
library(extraDistr)
# Built-in function
paste0("Built-in function:")
## [1] "Built-in function:"
set.seed(123)
x <- extraDistr::rinvgamma(n = 1e5, alpha = a, beta = 1/b) # shape and scale parameters
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"
# My R function
Draw_InvGamma_R <- function(n, shape, rate) {
x <- rgamma(n = n, shape = shape, rate = 1/rate)
y <- 1/x
return(y)
}
a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate
paste0("My R function:")
## [1] "My R function:"
set.seed(123)
x <- Draw_InvGamma_R(n = 1e5, shape = a, rate = b)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"
# My Rcpp function
Draw_InvGamma_Rcpp <- cppFunction(depends = "RcppArmadillo",
'
arma::Col<double> rrinvgamma(int n, double a, double b) {
return(1/rgamma(n, a, b)); // shape and scale parameters
}
'
)
a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate
paste0("My Rcpp function:")
## [1] "My Rcpp function:"
set.seed(123)
x <- Draw_InvGamma_Rcpp(1e5, a, b)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"
The Inverse Gaussian distribution (also called the Wald distribution)
with parameters location
\(=
\mu\) and shape
\(=
\lambda\) has density \[
f(x \mid \mu, \lambda) = \frac{\lambda}{\sqrt{2\pi
x^3}}exp\left(\frac{-\lambda(x-\mu)^2}{2\mu^2x}\right)
\] for \(x > 0, \mu > 0\)
and \(\lambda > 0\)
The mean and variance are \[ E(X) = \mu; \quad Var(X) = \frac{\mu^3}{\lambda} \]
Plotting the \(IG(\mu, \lambda)\) distribution
library(extraDistr)
# dwald(x, mu, lambda, log = FALSE)
mu <- 1
lambda <- 1
mean <- mu
var <- mu^3/lambda
sd <- sqrt(mu^3/lambda)
p <- data_frame(x = c(0, mean + 15*sd)) %>%
ggplot(aes(x = x))
p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
stat_function(fun = dwald, args = c(mu = mu, lambda = lambda),
color = "#84CA72", size = 1) +
ggtitle(paste0("Inverse Gaussian (mu = ", mu, ", lambda = ", lambda,") Distribution")) +
xlab("x") +
ylab(paste0("dInverseGaussian")) +
theme(plot.title = element_text(hjust = 0.5))
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 1; True Variance: 1"
library(extraDistr)
# Built-in function
paste0("Built-in function:")
## [1] "Built-in function:"
set.seed(123)
x <- rwald(n = 1e5, mu = 1, lambda = 1)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 1.00506842306895; Variance: 1.02736098395571"
# My R function
Draw_IG <- function(n, mu, lambda) {
y <- rep(NA, n)
for (i in 1:n) {
# First generate a chi_square(1) variate
v0 <- rnorm(1,0,1)^2
# Now find the two roots in Eq. (5) of the paper
x1 <- mu + (0.5*(mu^2)*v0)/lambda - (0.5*mu/lambda)*sqrt(4*mu*lambda*v0 + (mu^2)*(v0^2));
x2 <- (mu^2)/x1
# Accept x1 with probability p1_v0, otherwise accept x2
p1_v0 <- mu/(mu+x1)
if (runif(1) <= p1_v0) {
y[i] <- x1
} else {
y[i] <- x2
}
}
return(y)
}
paste0("My R function:")
## [1] "My R function:"
set.seed(123)
x <- Draw_IG(n = 1e5, mu = 1, lambda = 1)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 1.00350059340242; Variance: 1.01015584280717"
library(Rcpp)
library(RcppArmadillo)
Dfuns <- cppFunction(depends = "RcppArmadillo",
'
arma::Col<double> rrinvgauss(int n, double mu, double lambda){
arma::Col<double> random_vector(n);
double z,y,x,u;
for(int i=0; i<n; ++i){
z=R::rnorm(0,1);
y=z*z;
x=mu+0.5*mu*mu*y/lambda - 0.5*(mu/lambda)*sqrt(4*mu*lambda*y+mu*mu*y*y);
u=R::runif(0,1);
if(u <= mu/(mu+x)){
random_vector(i)=x;
}else{
random_vector(i)=mu*mu/x;
};
}
return(random_vector);
}
'
)
paste0("My Rcpp function:")
## [1] "My Rcpp function:"
set.seed(123)
x <- Dfuns(n = 1e5, mu = 1, lambda = 1)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 1.00350059340242; Variance: 1.01015584280717"
The exponential distribution with parameter rate
= \(r\) has density
\[ f(x \mid r) = r e^{-r x} \]
for \(x \in [0, \infty), r > 0\).
The mean and variance are
\[ E(X) = \frac{1}{r}; \quad Var(X) = \frac{1}{r^2} \]
Plotting the Exponential distribution
# # My Shiny App
#
# library(shiny)
# library(ggplot2)
# library(tidyverse)
#
# ui <- fluidPage(
# column(4,
# wellPanel(
# sliderInput('lambda','lambda (rate)', min = 0.5, max = 2, value = 1, step = 0.5),
# )
# ),
# column(8,
# plotOutput('plotDensity')
# )
# )
#
# server <- function(input,output) {
#
# mean <- reactive({1/input$lambda})
# sd <- reactive({1/input$lambda})
#
# selectedData <- reactive({
# data_frame(x = c(0, qexp(p = 0.9999, rate = input$lambda)))
# })
#
# output$plotDensity <- renderPlot({
# ggplot(selectedData(), aes(x=x)) +
# geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
# stat_function(fun = dexp, args = c(input$lambda),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("Exponential (lambda = ", input$lambda,") Distribution")) +
# xlab("x") +
# ylab(paste0("dexp(x, lambda = ", input$lambda,")")) +
# coord_cartesian(ylim = c(0,2)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
#
# })
#
# }
#
# shinyApp(ui = ui, server = server)
# dexp(x, rate = 1, log = FALSE)
r <- 1.5 # rate
mean <- 1/r
var <- 1/r^2
sd <- 1/r
q9999 <- qexp(p = 0.9999, rate = r)
p <- data_frame(x = c(0, q9999)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
stat_function(fun = dexp, args = c(rate = r),
color = "#84CA72", size = 1) +
ggtitle(paste0("Exponential (r = ", r,") Distribution")) +
xlab("x") +
ylab(paste0("dexp")) +
theme(plot.title = element_text(hjust = 0.5))
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0.666666666666667; True Variance: 0.444444444444444"
r <- 1.5 # rate
# Built-in function
paste0("Built-in R function:")
## [1] "Built-in R function:"
# rexp(n, rate = 1)
set.seed(123)
x <- rexp(n = 1e5, rate = r)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) dexp(x, rate = r), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.664974647598273; Variance: 0.439985915259907"
The Laplace distribution with parameters location
=
\(\mu\) and scale
= \(s\) has density
\[ f(x \mid \mu, s) = \frac{1}{2s} exp\left(-\frac{\mid x - \mu \mid}{s}\right) \]
which can be written as the following Normal-Exponential mixture (Double Exponential)
\[ f(x \mid \mu, s) = \int_{0}^{\infty}\frac{1}{\sqrt{2 \pi t}} exp \left(-\frac{(x-\mu)^2}{2t}\right) \frac{1}{2s^2} exp\left(-\frac{t}{2s^2}\right) dt \]
The mean and variance are
\[ E(X) = \mu; \quad Var(X) = 2 s^2 \]
The Laplace distribution with parameters location
=
\(\mu\) and rate
= \(\lambda\) has density
\[ f(x \mid \mu, \lambda) = \frac{\lambda}{2} exp\left(-\lambda\mid x - \mu \mid\right) \]
which can be written as the following Normal-Exponential mixture (Double Exponential)
\[ f(x \mid \mu, \lambda) = \int_{0}^{\infty}\frac{1}{\sqrt{2 \pi t}} exp \left(-\frac{(x-\mu)^2}{2t}\right) \frac{\lambda^2}{2} exp\left(-\frac{\lambda^2t}{2}\right) dt \]
The mean and variance are
\[ E(X) = \mu; \quad Var(X) = \frac{2}{\lambda^2} \]
Note: Representation from Park and Casella (2008)
\[ \frac{a}{2} e^{-a \mid z \mid} = \int_{0}^{\infty} \frac{1}{\sqrt{2\pi s}} e^{-z^2/(2s^2)} \frac{a^2}{2}e^{(-a^2s/2)}ds \] for \(a>0\)
Plotting the \(Laplace(\mu, \lambda)\) distribution
library(rmutil) # for double-exponential
library(LaplacesDemon)
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# library(rmutil) # for double-exponential
# library(LaplacesDemon)
#
# ui <- fluidPage(
# column(4,
# wellPanel(
# sliderInput('mu','mu (location)', min = -3, max = 3, value = 0, step = 1),
# sliderInput('lambda','lambda (scale)', min = 0, max = 10, value = 1, step = 1)
# )
# ),
# column(8,
# plotOutput('plotDensity')
# )
# )
#
# server <- function(input,output) {
#
# mean <- reactive({input$mu})
# sd <- reactive({sqrt(2*input$lambda^2)})
# q9999 <- reactive({qlaplace(p = 0.9999, location = input$mu, scale = input$lambda)})
#
# selectedData <- reactive({
# data_frame(x = c(c(-q9999(),q9999())))
# })
#
# output$plotDensity <- renderPlot({
# ggplot(selectedData(), aes(x=x)) +
# geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
# stat_function(fun = dlaplace, args = c(location = input$mu, scale = input$lambda),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("Laplace (mu = ", input$mu, ", lambda = ", input$lambda,") Distribution")) +
# xlab("x") +
# ylab(paste0("dgamma(x, Laplace (mu = ", input$mu, ", lambda = ", input$lambda,")")) +
# coord_cartesian(ylim = c(0,0.5)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
#
# })
#
# }
#
# shinyApp(ui = ui, server = server)
library(LaplacesDemon)
# dlaplace(x, location=0, scale=1, log=FALSE)
mu <- 0 # location
lambda <- 2 # rate
scale <- 1/lambda # scale = 1/rate
mean <- mu
var <- 2/lambda^2
sd <- sqrt(2/lambda^2)
q9999 <- LaplacesDemon::qlaplace(p = 0.9999, location = mu, scale = 1/lambda)
p <- data_frame(x = c(-q9999,q9999)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
stat_function(fun = LaplacesDemon::dlaplace, args = c(location = mu, scale = 1/lambda),
color = "#84CA72", size = 1) +
ggtitle(paste0("Laplace (mu = ", mu, ", lambda = ", lambda,") Distribution")) +
xlab("x") +
ylab(paste0("dlaplace")) +
theme(plot.title = element_text(hjust = 0.5))
paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0; True Variance: 0.5"
library(LaplacesDemon)
# Built-in function
paste0("Built-in function:")
## [1] "Built-in function:"
# rlaplace(n, location=0, scale=1)
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate
set.seed(123)
x <- LaplacesDemon::rlaplace(n = 1e5, location = mu, scale = 1/lambda)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) LaplacesDemon::dlaplace(x, location = mu, scale = 1/lambda), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: -0.000332018113338988; Variance: 0.495343727783127"
# My R function
Draw_Laplace_R <- function(n, mu, lambda) {
t <- rexp(n = n, rate = lambda^2/2)
x <- rep(NA,n)
for (i in 1:n) {
x[i] <- rnorm(1, mean = mu, sd = sqrt(t[i]))
}
return(x)
}
# Draw_Laplace2 <- function(n, mu, lambda) {
# t <- rexp(n = n, rate = lambda^2/2)
#
# library(purrr)
# x <- map_dbl(t, ~ rnorm(1, mu, sd = sqrt(.x)))
#
# return(x)
# }
paste0("My R function:")
## [1] "My R function:"
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate
set.seed(123)
x <- Draw_Laplace_R(n = 1e5, mu = mu, lambda = lambda)
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = 'green') +
geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
stat_function(fun = function(x) LaplacesDemon::dlaplace(x, location = mu, scale = 1/lambda), color = "red", alpha = 0.4)
paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: -0.00254867097691125; Variance: 0.50212479220306"
The Assymmetric Laplace distribution with parameters
location
= \(\mu\),
scale
= \(\sigma\) and
skewness
= \(\tau\) has
density
\[ f(x \mid \mu, \sigma, \tau) = \frac{\tau(1-\tau)}{\sigma} exp\left\{-\rho_\tau \left(\frac{x-\mu}{\sigma}\right)\right\} \] where \[ \rho_\tau(x) = x(\tau - I(x<0)) \] can be written as a normal-exponential mixture of the form
\[ f(x \mid \mu, \sigma, \tau) = \]
library(ald)
# dALD(y, mu = 0, sigma = 1, p = 0.5)
# pALD(q, mu = 0, sigma = 1, p = 0.5, lower.tail = TRUE)
# qALD(prob, mu = 0, sigma = 1, p = 0.5, lower.tail = TRUE)
# rALD(n, mu = 0, sigma = 1, p = 0.5)
# dALD(y, mu = 0, sigma = 1, p = 0.5)
mu <- 0 # location
sigma <- 1 # scale
tau <- 0.8
p <- data_frame(x = seq(-40,80,0.5)) %>%
ggplot(aes(x=x))
p + geom_vline(xintercept = mu, linetype = "dashed", alpha = 0.4) +
stat_function(fun = dALD, args = c(mu = mu, sigma = sigma, p = tau),
color = "#84CA72", size = 1) +
ggtitle(paste0("ALD (mu = ", mu, ", sigma = ", sigma,", tau = ", tau, " ) Distribution")) +
xlab("x") +
ylab(paste0("dALD")) +
theme(plot.title = element_text(hjust = 0.5))
The binomial distribution with size
= \(n\) and prob
= \(p\) has density \[
p(x) = \left(\begin{array}
n
\\
x
\end{array}\right)
\] for \(x = 0,\ldots,n\).
rbinom(4, size=1, prob=0.5)
## [1] 1 1 0 1
dbinom(4, size=12, prob=0.2)
## [1] 0.1328756
CTS-studt
The Normal-Inv-Gamma prior is the scale mixture of Normals representation of the fat-tailed Student-t distribution. This hierarchical prior, which is also called “sparse Bayesian Learning” prior in signal processing, takes the form
\[ \mathbf{\beta} \mid \{\tau_j^2\}_{j=1}^p, \sigma^2 \sim N_{p}(\mathbf{0}, \sigma^2 \mathbf{Q}), \] \[ \tau_j^2 \sim inv-Gamma(a,b), \quad \text{for} \quad j = 1,\ldots,p, \] \[ \sigma^2 \sim \frac{1}{\sigma^2} \] where \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\)
The conditional posteriors are the form
\[ \mathbf{\beta} \mid \bullet \sim N_p(\mathbf{V}\times \mathbf{X'y}, \sigma^2\mathbf{V}), \] \[ \tau_j^2 \mid \bullet \sim Inv-Gamma\left(a + \frac{1}{2}, b + \frac{\beta_j^2}{2\sigma^2}\right), \quad \text{for} \quad j = 1,\ldots,p, \]
\[ \sigma^2 \mid \bullet \sim Inv-Gamma\left(\frac{n+p}{2},\frac{\psi + \mathbf{\beta'}\mathbf{Q}^{-1}\mathbf{\beta}}{2}\right) \]
where \(\mathbf{V} = (\mathbf{X'X} + \mathbf{Q}^{-1})^{-1}\), \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\) and \(\psi = (\mathbf{y} - \mathbf{X\beta})'(\mathbf{y} - \mathbf{X\beta})\)
# Student-t distribution ====
# df <- 6
# ncp <- NA # non-centrality parameter
#
# mean <- 0
# sd <- sqrt(df/(df-2))
#
# p <- data_frame(x = c(mean - 4*sd, mean + 4*sd)) %>%
# ggplot(aes(x=x))
#
# p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
# stat_function(fun = dt, args = c(df),
# color = "#84CA72", size = 1) +
# labs(caption = paste0("t Distribution (df = ", df,")")) +
# xlab("x") +
# ylab(paste0("dt(x, df = ", df,")")) +
# coord_cartesian(ylim = c(0,0.5)) +
# theme(
# plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
# )
dt2 <- function(df) {
function (x) dt(x, df = df)
}
stat_studt <- function(df) {
stat_function(aes(colour = df), fun = dt2(df), size = 0.5)
}
ggplot(data_frame(x = c(-5,5)), aes(x)) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
lapply(c(2,6,10), stat_studt) +
scale_colour_viridis_c(limits = c(2,10)) +
labs(caption = paste0("t Distribution (df)")) +
xlab("x") +
ylab(paste0("dt(x, df)")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
# CTS-studt prior ====
ndraws <- 1e5
a <- 1
b <- 0.001
tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
library(purrr)
x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = "red", fill = "lightgreen") +
xlim(-10,10) + coord_cartesian(c(-5,5))+
labs(caption = paste0("CTS-studt prior (a = ", a, ", b = ", b, ")")) +
xlab("x") +
ylab(paste0("density")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
# ggplot(data_frame(x), aes(x=x)) +
# stat_density(geom = "line", position = "identity") +
# xlim(-10,10) + coord_cartesian(c(-5,5))
# Choose hyper-parameters
library(purrr)
ndraws <- 1e5
CTS_studt_pa1 <- function(a){
set.seed(123)
ndraws <- 1e5
b <- 0.001
tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
return(x)
}
df1_pa1 <- lapply(c(1,2,3), CTS_studt_pa1)
df2_pa1 <- data_frame(x = unlist(df1_pa1), a = factor(rep(c(1,2,3), each = ndraws)))
plot_pa1 <- ggplot(df2_pa1, aes(x=x, color = a)) +
geom_density() +
xlim(-10,10) + coord_cartesian(c(-5,5))
tail_pa1 <- ggplot(df2_pa1, aes(x=x, color = a)) +
geom_line(aes(y = 1 - ..y.., linetype = a), stat='ecdf') +
scale_linetype_manual(values = c(1, 2, 3)) +
coord_cartesian(xlim = c(0, 1.5), ylim = c(0, 0.5), expand = FALSE) +
theme(axis.title = element_blank(), legend.position = "bottom") +
guides(lty=guide_legend(nrow=1,byrow=TRUE))
CTS_studt_pa2 <- function(b){
set.seed(123)
ndraws <- 1e5
a <- 1
tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
return(x)
}
df1_pa2 <- lapply(c(0.001,0.01,0.1), CTS_studt_pa2)
df2_pa2 <- data_frame(x = unlist(df1_pa2), b = factor(rep(c(0.001,0.01,0.1), each = ndraws)))
plot_pa2 <- ggplot(df2_pa2, aes(x=x, color = b)) +
geom_density() +
xlim(-10,10) + coord_cartesian(c(-5,5))
tail_pa2 <- ggplot(df2_pa2, aes(x=x, color = b)) +
geom_line(aes(y = 1 - ..y.., linetype = b), stat='ecdf') +
scale_linetype_manual(values = c(1, 2, 3)) +
coord_cartesian(xlim = c(0, 1.5), ylim = c(0, 0.5), expand = FALSE) +
theme(axis.title = element_blank(), legend.position = "bottom") +
guides(lty=guide_legend(nrow=1,byrow=TRUE))
# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian
# percentage of obs removed
sum((df2_pa2$x < 10) & (df2_pa2$x > -10))/length(df2_pa2$x)*100
## [1] 99.961
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(plot_pa1,plot_pa2, ncol = 2)
# tail behavior
grid.arrange(tail_pa1,tail_pa2, ncol = 2)
CTS-lasso
Reference: Park and Casella (2008) - The Bayesian Lasso
The Laplace prior takes the form \[ \mathbf{\beta} \mid \{\tau_j^2\}_{j=1}^p, \sigma^2 \sim N_p(\mathbf{0}, \sigma^2\mathbf{Q}), \] \[ \tau_j^2 \mid \lambda^2 \sim Exponential\left(\frac{\lambda^2}{2}\right), \quad \text{for} \quad j = 1,\ldots, p, \]
\[ \lambda^2 \sim Gamma(c,d) \]
\[ \sigma^2 \sim \frac{1}{\sigma^2} \]
where \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\)
The conditional posteriors are of the form \[ \mathbf{\beta} \mid \bullet \sim N_p\left(\mathbf{V}\times \mathbf{X}'\mathbf{y}, \sigma^2 \mathbf{V}\right) \] \[ \frac{1}{\tau_j^2} \mid \bullet \sim IG\left(\sqrt{\frac{\lambda^2\sigma^2}{\beta_j^2}}, \lambda^2\right), \quad \text{for} \quad j = 1,\ldots,p, \]
\[ \lambda^2 \mid \bullet \sim Gamma\left(c + p, d + \frac{\sum_{j=1}^p\tau_j^2}{2}\right) \]
\[ \sigma^2 \mid \bullet \sim Inv-Gamma\left(\frac{n+p}{2},\frac{\psi + \mathbf{\beta'}\mathbf{Q}^{-1}\mathbf{\beta}}{2}\right) \]
where \(\mathbf{V} = (\mathbf{X'X} + \mathbf{Q}^{-1})^{-1}\), \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\) and \(\psi = (\mathbf{y} - \mathbf{X\beta})'(\mathbf{y} - \mathbf{X\beta})\)
# Double exponential prior ====
ndraws <- 1e5
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate
tausq <- rexp(ndraws, rate = lambda^2/2)
library(purrr)
x <- map_dbl(tausq, ~ rnorm(1, mean = mu, sd = sqrt(.x)))
ggplot(data_frame(x), aes(x=x)) +
geom_density(fill = "lightgreen") +
stat_function(fun = LaplacesDemon::dlaplace, args = c(location = mu, scale = 1/lambda),
color = "red", size = 1) +
labs(caption = paste0("Laplace prior (rate = ", lambda, ")")) +
xlab("x") +
ylab(paste0("density")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
# Choice of rate ====
library(LaplacesDemon)
dlaplace2 <- function(lambda) {
function (x) LaplacesDemon::dlaplace(x, location = 0, scale = 1/lambda)
}
stat_laplace <- function(lambda) {
stat_function(aes(colour = lambda),
fun = dlaplace2(lambda),
size = 0.5)
}
ggplot(data_frame(x = c(-5,5)), aes(x=x)) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
lapply(c(2,1,0.1), stat_laplace) +
scale_colour_viridis_c(limits = c(0.1,2)) +
labs(caption = paste0("Laplace Prior (lambda)")) +
xlab("x") +
ylab(paste0("dlapalce")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
# Hierarchical prior ====
ndraws <- 1e5
library(purrr)
c <- 1
d <- 1.05
lambdasq <- rgamma(ndraws, shape = c, rate = d)
tausq <- map_dbl(lambdasq, ~rexp(1, rate = .x/2))
x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
# c(mean(lambdasq), sd(lambdasq))
rate <- sqrt(mean(lambdasq)) # for corresponding Laplace distribution
ggplot(data_frame(x), aes(x=x)) +
geom_density(fill = "lightgreen") +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
stat_function(fun = LaplacesDemon::dlaplace, args = c(location = 0, scale = 1/rate),
color = "red", size = 0.5) +
xlim(-10,10) + coord_cartesian(c(-5,5)) +
labs(caption = paste0("Hierarchical prior (c = ", c, ", d = ", d, ")")) +
xlab("x") +
ylab(paste0("density")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
paste0("Corresponding Laplace distribution - Rate = ", rate)
## [1] "Corresponding Laplace distribution - Rate = 0.977472489051276"
# Choice of hyper-parameters
library(purrr)
ndraws <- 1e5
CTS_lasso_pa1 <- function(c) {
set.seed(123)
d = 1
lambdasq <- rgamma(ndraws, shape = c, rate = d)
tausq <- map_dbl(lambdasq, ~rexp(1, rate = .x/2))
x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
return(x)
}
df1_pa1 <- lapply(c(1,2,3), CTS_lasso_pa1)
df2_pa1 <- data_frame(x = unlist(df1_pa1), c = factor(rep(c(1,2,3), each = ndraws)))
plot_pa1 <- ggplot(df2_pa1, aes(x=x, color = c)) +
geom_density() +
xlim(-10,10) + coord_cartesian(c(-5,5))
tail_pa1 <- ggplot(df2_pa1, aes(x=x, color = c)) +
geom_line(aes(y = 1 - ..y.., linetype = c), stat='ecdf') +
scale_linetype_manual(values = c(1, 2, 3)) +
coord_cartesian(xlim = c(0, 2), ylim = c(0, 0.5), expand = FALSE) +
theme(axis.title = element_blank(), legend.position = "bottom") +
guides(lty=guide_legend(nrow=1,byrow=TRUE))
CTS_lasso_pa2 <- function(d) {
set.seed(123)
c = 1
lambdasq <- rgamma(ndraws, shape = c, rate = d)
tausq <- map_dbl(lambdasq, ~rexp(1, rate = .x/2))
x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
return(x)
}
df1_pa2 <- lapply(c(1,0.5,0.1), CTS_lasso_pa2)
df2_pa2 <- data_frame(x = unlist(df1_pa2), d = factor(rep(c(1,0.5,0.1), each = ndraws)))
plot_pa2 <- ggplot(df2_pa2, aes(x=x, color = d)) +
geom_density() +
xlim(-10,10) + coord_cartesian(c(-5,5))
tail_pa2 <- ggplot(df2_pa2, aes(x=x, color = d)) +
geom_line(aes(y = 1 - ..y.., linetype = d), stat='ecdf') +
scale_linetype_manual(values = c(1, 2, 3)) +
coord_cartesian(xlim = c(0, 2), ylim = c(0, 0.5), expand = FALSE) +
theme(axis.title = element_blank(), legend.position = "bottom") +
guides(lty=guide_legend(nrow=1,byrow=TRUE))
# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian
# percentage of obs removed
# sum((df2_pa1$x < 10) & (df2_pa1$x > -10))/length(df2_pa1$x)*100
# sum((df2_pa2$x < 10) & (df2_pa2$x > -10))/length(df2_pa2$x)*100
library(gridExtra)
grid.arrange(plot_pa1,plot_pa2, ncol = 2)
# tail behavior
grid.arrange(tail_pa1,tail_pa2, ncol = 2)
CTS-horse-mx
ndraws = 1e5
library(purrr)
nu <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
lamdasq <- map_dbl(nu, ~ 1/rgamma(1, shape = 1/2, scale = .x))
xi <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
tausq <- map_dbl(xi, ~ 1/rgamma(1, shape = 1/2, scale = .x))
temp <- lamdasq*tausq
x <- map_dbl(temp, ~rnorm(1, mean = 0, sd = .x))
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = "red", fill = "lightgreen") +
xlim(-10,10) + coord_cartesian(c(-5,5))+
labs(caption = paste0("CTS-horseshoe-mx prior")) +
xlab("x") +
ylab(paste0("density")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
## Warning: Removed 24095 rows containing non-finite values (stat_density).
CTS-horse-sl
ndraws = 1e5
library(purrr)
lambda <- rhalfcauchy(ndraws, scale= 1)
tau <- map_dbl(lambda, ~rhalfcauchy(1, scale = .x))
x <- map_dbl(tau, ~rnorm(1, mean = 0, sd = .x))
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = "red", fill = "lightgreen") +
xlim(-10,10) + coord_cartesian(c(-5,5))+
labs(caption = paste0("CTS-horseshoe-sl prior")) +
xlab("x") +
ylab(paste0("density")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
## Warning: Removed 10430 rows containing non-finite values (stat_density).
SSVS-normal
SSVS-studt
ndraws <- 1e5
# SSVS-studt1
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))
tausq0 <- 1e-3
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
# SSVS-studt2
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))
tausq0 <- tausq1*1e-3
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
#mean(tausq0) #0.012
# SSVS-studt3
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))
alpha0 <- 1
beta0 <- 0.0001
tausq0 <- 1/rgamma(ndraws, shape = alpha0, scale = 1/beta0)
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
#mean(tausq0) #0.0035
pi <- rbern(1e5, prob = 0.2)
x <- pi*studt_slab + (1-pi)*studt_spike
# tmp <- runif(1e5)
# x <- (tmp<=0.2)*studt_slab + (tmp>0.2)*studt_spike
ggplot(data_frame(x), aes(x=x)) +
geom_density(color = "red", fill = "lightgreen") +
xlim(-10,10) + coord_cartesian(c(-5,5))+
labs(caption = paste0("SSVS-studt prior")) +
xlab("x") +
ylab(paste0("density")) +
theme(
plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
)
## Warning: Removed 195 rows containing non-finite values (stat_density).
SSVS-lasso
SSVS-horse
##### Sample from prior densities -----
set.seed(123)
ndraws <- 1e5
## ridge
normal <- rnorm(ndraws, mean=0, sd=0.5)
## Student's t
studt <- rt(ndraws, df=1)
CTS_studt_fun <- function(alpha = 1, beta = 0.001, ndraws = 1e5) {
library(purrr)
tausq <- 1/rgamma(ndraws, shape = alpha, scale = 1/beta)
x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
return(x)
}
CTS_studt <- CTS_studt_fun(alpha = 1, beta = 0.001, ndraws = 1e5)
## lasso
lasso <- rmutil::rlaplace(ndraws, m=0, s=0.5)
CTS_lasso_fun <- function (alpha = 1, sigma = 1, ndraws = 1e5) {
library(purrr)
lambdasq <- rgamma(ndraws, shape = alpha, scale = sigma)
tausq <- map_dbl(lambdasq, ~rexp(1, rate = .x/2))
x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
return(x)
}
CTS_lasso <- CTS_lasso_fun(alpha = 1, sigma = 1, ndraws = 1e5)
## horseshoe
CTS_horsemx_fun <- function(ndraws = 1e5) {
library(purrr)
nu <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
lamdasq <- map_dbl(nu, ~ 1/rgamma(1, shape = 1/2, scale = .x))
xi <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
tausq <- map_dbl(xi, ~ 1/rgamma(1, shape = 1/2, scale = .x))
temp <- lamdasq*tausq
x <- map_dbl(temp, ~rnorm(1, mean = 0, sd = .x))
return(x)
}
CTS_horsemx <- CTS_horsemx_fun(ndraws = 1e5)
CTS_horsesl_fun <- function(ndraws = 1e5) {
library(purrr)
lambda <- rhalfcauchy(ndraws, scale= 1)
tau <- map_dbl(lambda, ~rhalfcauchy(1, scale = .x))
x <- map_dbl(tau, ~rnorm(1, mean = 0, sd = .x))
return(x)
}
CTS_horsesl <- CTS_horsesl_fun(ndraws = 1e5)
##### Plot -----
## Create plot data
df_comb <- data_frame(CTS_studt, CTS_lasso, CTS_horsemx, CTS_horsesl)
df_long <- gather(df_comb, Prior, value) # long format
df_long$Prior <- factor(df_long$Prior)
levels(df_long$Prior) <- list("CTS_studt" = "CTS_studt", "CTS_lasso" = "CTS_lasso", "CTS_horsemx" = "CTS_horsemx", "CTS_horsesl" = "CTS_horsesl")
df_long$asymp <- rep(NA, nrow(df_long))
df_long[which(df_long$Prior=="CTS_horsemx"|df_long$Prior=="CTS_horsesl"), "asymp"] <- 0
df_long <- gather(df_comb, Prior, value) %>%
mutate(Prior = factor(Prior, levels = list("CTS_studt" = "CTS_studt", "CTS_lasso" = "CTS_lasso", "CTS_horsemx" = "CTS_horsemx", "CTS_horsesl" = "CTS_horsesl")),
asymp = case_when(Prior=="CTS_horsemx" ~ 0,
Prior=="CTS_horsesl" ~ 0,
TRUE ~ NA_real_))
plotComb <- ggplot(df_long, aes(x = value, colour = Prior, linetype = Prior)) +
stat_density(geom = "line", position = "identity") +
geom_vline(aes(xintercept = asymp), colour = "grey") +
scale_linetype_manual(values = c(1,2,3,4)) +
scale_color_manual(values = c(1,2,3,4)) +
xlim(-10,10) + coord_cartesian(xlim=c(-5,5), ylim =c(0,5)) +
theme(axis.title = element_blank(), legend.position = "bottom")
# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian
##### Tail behavior -----
# Plot the survival function, i.e., 1-CDF
# See https://stats.stackexchange.com/questions/86429/which-has-the-heavier-tail-lognormal-or-gamma
tailComb <- ggplot(df_long, aes(x = value, group = Prior)) +
geom_line(aes(y = 1 - ..y.., linetype = Prior, colour = Prior), stat='ecdf') +
scale_linetype_manual(values = c(1,2,3,4)) +
scale_color_manual(values = c(1,2,3,4)) +
coord_cartesian(xlim = c(0, 8), ylim = c(0, 0.5), expand = FALSE) +
theme(axis.title = element_blank(), legend.position = "bottom", legend.title = element_blank()) +
guides(lty=guide_legend(nrow=1,byrow=TRUE))
library(gridExtra)
get_legend <- function(myggplot){
require(gridExtra)
tmp <- ggplot_gtable(ggplot_build(myggplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
legend <- get_legend(plotComb)
## Warning: Removed 36684 rows containing non-finite values (stat_density).
## Warning: Removed 200000 rows containing missing values (geom_vline).
plotComb <- plotComb + theme(legend.position="none")
tailComb <- tailComb + theme(legend.position="none")
grid.arrange(plotComb,tailComb, legend, ncol = 2, nrow =2,
layout_matrix = rbind(c(1,2),c(3,3)),
heights = c(4,1))
## Warning: Removed 36684 rows containing non-finite values (stat_density).
## Removed 200000 rows containing missing values (geom_vline).
# install.packages("pkgsearch")
library(pkgsearch)
invGammaPkg <- pkg_search(query = "inverse gamma", size = 200)
invGammaPkgShort <- invGammaPkg %>%
filter(maintainer_name != "ORPHANED", score > 1000) %>%
select(score, package, downloads_last_month) %>%
arrange(downloads_last_month)
head(invGammaPkgShort)
## # A data frame: 0 × 3
## # … with 3 variables: score <dbl>, package <chr>, downloads_last_month <int>