\(\newcommand{\mathbbm}[1]{\mathbb{#1}}\)

library(tidyverse)
library(ggplot2)

1 Statistical Distributions

1.1 Normal Distribution

The normal distribution has density:

\[ f(x) = \frac{1}{\sqrt{2\pi\sigma^{2}}} exp\left(-\frac{(x-\mu)^2}{2\sigma^2}\right) \]

help("Normal")

1.1.1 True PDF

Plotting the standard normal distribution

# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('Mu','Mu', min = - 3, max = 3, value = 0, step = 1),
#     sliderInput('Sigma','Sigma', min = 1, max = 5, value = 1, step = 1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
# 
#   selectedData <- reactive({
#     data_frame(x = c(input$Mu - 4*input$Sigma, input$Mu + 4*input$Sigma))
#   })
# 
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = input$Mu, linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dnorm, args = c(input$Mu,input$Sigma),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Normal Distribution N (", input$Mu," , ", input$Sigma^2, ")")) +
#       xlab("x") +
#       ylab("dnorm(x)") +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )
# 
#   })
# 
# }
# 
# shinyApp(ui = ui, server = server)
mu <- 0
sigma <- 1

mean <- mu
var <- sigma^2
sd <- sigma
q9999 <- qnorm(p = 0.9999, mean = mu, sd = sigma, lower.tail = TRUE)

p <- data_frame(x = c(-q9999, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mu, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dnorm, args = c(mu,sigma),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Normal Distribution N (", mu," , ", sigma^2, ")")) +
    xlab("x") +
    ylab("dnorm(x)") +
  theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0; True Variance: 1"
ggplot(data.frame(x = -5:5), aes(x)) + 
  stat_function(fun = dnorm, args = c(0,1), geom = 'area', xlim = c(1,5), fill = 4) + 
  stat_function(fun = dnorm, args = c(0,1)) +
  labs(x = "", y = "") +
  ggtitle("Normal distribution") +
  theme(plot.title = element_text(hjust = 0.5))

1.1.2 Sampling

# Built-in R function
paste0("Built-in R function:")
## [1] "Built-in R function:"
# rnrom(n, mean=0, sd=1)
mu <- 0
sigma <- 1

set.seed(123)
x <- rnorm(n = 1e5, mean = mu, sd = sigma)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dnorm(x, mean = mu, sd = sigma), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.000976748821684996; Variance: 0.999466876209305"
# Built-in Rcpp function
paste0("Built-in Rcpp function:")
## [1] "Built-in Rcpp function:"
library("Rcpp")
# evalCpp("Rcpp::rnorm(n,0,1)")
mu <- 0
sigma <- 1

set.seed(123)
x <- evalCpp("Rcpp::rnorm(1e5, 0, 1)")

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dnorm(x, mean = mu, sd = sigma), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.000976748821684996; Variance: 0.999466876209305"
library("Rcpp")

set.seed(123)
rnorm(1,0,1)

set.seed(123)
evalCpp("R::rnorm(0,1)")

set.seed(123)
rnorm(3,0,1)

set.seed(123)
evalCpp("Rcpp::rnorm(3,0,1)")

1.2 t distribution

The t distribution with df = \(\nu\) degrees of freedom has density

\[ f(x) = \]

help("TDist")

1.2.1 True PDF

Plotting the Central t distribution

# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('df','df', min = 1, max = 12, value = 6, step = 1),
#     sliderInput('ncp','ncp', min = 1, max = 5, value = 1, step = 1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
#   
#   mean <- 0
#   sd <- reactive({sqrt(input$df/(input$df-2))})
#   selectedData <- reactive({
#     data_frame(x = c(mean - 4*sd(), mean + 4*sd()))
#   })
#   
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dt, args = c(input$df),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("t Distribution (df = ", input$df,")")) +
#       xlab("x") +
#       ylab(paste0("dt(x, df = ", input$df,")")) +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       ) 
#       
#   })
#   
# }
# 
# shinyApp(ui = ui, server = server)
df <- 6
ncp <- NA # non-centrality parameter

mean <- 0
var <- df/(df-2)
sd <- sqrt(df/(df-2))
q9999 <- qt(p = 0.9999, df = df, lower.tail = TRUE)

p <- data_frame(x = c(-q9999, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dt, args = c(df),
                  color = "#84CA72", size = 1) +
    labs(caption = paste0("t Distribution (df = ", df,")")) +
    xlab("x") +
    ylab(paste0("dt(x, df = ", df,")")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      ) 

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0; True Variance: 1.5"

1.2.2 Sampling

# Built-in R function
paste0("Built-in R function:")
## [1] "Built-in R function:"
# rt(n, df, ncp)
df <- 6
ncp <- NA # non-centrality parameter

set.seed(123)
x <- rt(n = 1e5, df = df)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dt(x, df = df), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: -0.00191542325893477; Variance: 1.51823043058928"

1.3 Gamma Distribution

The Gamma distribution with parameters shape = \(c\) and rate = \(d\) has density

\[ f(x \mid c,d) = \frac{d^c}{\Gamma(c)} x^{c - 1} e^{(-dx)} \] for \(x \geq 0, c > 0\) and \(d > 0\).

The mean and variance are

\[ E(X) = \frac{c}{d}; \quad Var(X) = \frac{c}{d^2} \]

The Gamma distribution with parameters shape = \(c\) and scale = \(\delta\) has density

\[ f(x \mid c, \delta) = \frac{1}{\delta^c \Gamma(c)} x^{c - 1} e^{(-x/\delta)} \] for \(x \geq 0, c > 0\) and \(\delta > 0\).

The mean and variance are

\[ E(X) = c \delta; \quad Var(X) = c \delta^2 \]

help("GammaDist")

1.3.1 True PDF

Plotting the Gamma distribution

# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('alpha','alpha (shape)', min = 1, max = 12, value = 1, step = 1),
#     sliderInput('sigma','sigma (scale)', min = 0, max = 3, value = 0.5, step = 0.1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
# 
#   mean <- reactive({input$alpha*input$sigma})
#   sd <- reactive({sqrt(input$alpha*input$sigma^2)})
# 
#   selectedData <- reactive({
#     data_frame(x = c(0, qgamma(p = 0.9999, shape = input$alpha, scale = input$sigma)))
#   })
# 
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dgamma, args = c(input$alpha,input$sigma),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Gamma (alpha = ", input$alpha, ", sigma = ", input$sigma,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dgamma(x, alpha = ", input$alpha, ", sigma = ", input$sigma,")")) +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )
# 
#   })
# 
# }
# 
# shinyApp(ui = ui, server = server)
# dgamma(x, shape, rate = 1, scale = 1/rate, log = FALSE)
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

mean <- c/d
var <- c/d^2
sd <- sqrt(c/d^2)
q9999 <- qgamma(p = 0.9999, shape = c, rate = d)

p <- data_frame(x = c(0, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dgamma, args = c(shape = c, rate = d),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Gamma (c = ", c, ", d = ", d,") Distribution")) +
    xlab("x") +
    ylab(paste0("dgamma")) +
    theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 6; True Variance: 12"

1.3.2 Sampling

c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

# Built-in function
paste0("Built-in R function:")
## [1] "Built-in R function:"
set.seed(123)
# x <- rgamma(1e5, c, d)
x <- rgamma(n = 1e5, shape = c, rate = d) 

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 6.00987751542745; Variance: 11.9980901747901"
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

# Built-in function
paste0("Built-in Rcpp function:")
## [1] "Built-in Rcpp function:"
set.seed(123)
x <- Rcpp::evalCpp("randg(1e5, arma::distr_param(3,2))", depends = "RcppArmadillo") # shape and scale parameters

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 5.99775522536012; Variance: 11.9708700496215"
library(Rcpp)
library(RcppArmadillo)

c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

Draw_Gamma <- cppFunction(depends = "RcppArmadillo",
'
  arma::Col<double> rrgamma(int n, double c, double d) {
    return(rgamma(n, c, 1/d)); // shape and scale parameters
  }
'
)

# My Rcpp function
paste0("My Rcpp function:")
## [1] "My Rcpp function:"
set.seed(123)
x <- Draw_Gamma(1e5, c, d)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 6.00987751542745; Variance: 11.9980901747901"

1.4 Inverse-Gamma Distribution

The Inverse-Gamma distribution with parameters shape = \(a\) and rate = \(b\) has density

\[ f(x \mid a, b) = \frac{1}{b^a\Gamma(a)} x^{-a-1} e^{\left(-\frac{1}{bx}\right)} \] for \(x \geq 0, a > 0\) and \(b > 0\).

The mean and variance are

\[ E(X) = \frac{1}{b(a-1)} \quad (a>1) ; \quad Var(X) = \frac{1}{b^2(a-1)^2(a-2)} \quad (a>2) \]

The Inverse-Gamma distribution with parameters shape = \(a\) and scale = \(\beta\) has density

\[ f(x \mid a, \beta) = \frac{\beta^a}{\Gamma(a)} x^{-a-1} e^{(-\beta/x)} \] for \(x \geq 0, a > 0\) and \(\beta > 0\).

The mean and variance are

\[ E(X) = \frac{\beta}{a-1} \quad (a>1) ; \quad Var(X) = \frac{\beta^2}{(a-1)^2(a-2)} \quad (a>2) \]

1.4.1 True PDF

Plotting the Inverse-Gamma distribution

# # My Shiny App
# 
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# library(invgamma)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('alpha','alpha (shape)', min = 1, max = 12, value = 1, step = 1),
#     sliderInput('beta','beta (scale)', min = 0, max = 3, value = 0.5, step = 0.5)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
#   
#   mode <- reactive({input$beta/(input$alpha+1)})
# 
#   selectedData <- reactive({
#     data_frame(x = c(0, qinvgamma(p = 0.9, shape = input$alpha, scale = input$beta)))
#   })
#   
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mode(), linetype = "dashed", alpha = 0.4, color = "blue") +
#       stat_function(fun = dinvgamma, args = c(input$alpha,input$beta),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Inverse Gamma (alpha = ", input$alpha, ", beta = ", input$beta,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dinvgamma(x, alpha = ", input$alpha, ", beta = ", input$beta,")")) +
#       coord_cartesian(xlim = c(0,3)) +
#       coord_cartesian(ylim = c(0,5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       ) 
#       
#   })
#   
# }
# 
# shinyApp(ui = ui, server = server)
a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

library(extraDistr)
mean <- 1/(b*(a-1))
var <- 1/(b^2*(a-1)^2*(a-2))
sd <- sqrt(1/(b^2*(a-1)^2*(a-2)))
q9999 <- extraDistr::qinvgamma(p = 0.9999, alpha = a, beta = 1/b) # shape and scale parameters

p <- data_frame(x = c(0, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = extraDistr::dinvgamma, args = c(alpha = a, beta = 1/b),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Inverse-Gamma (a = ", a, ", b = ", b,") Distribution")) +
    xlab("x") +
    ylab(paste0("dinvgamma")) +
    theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0.25; True Variance: 0.0625"

1.4.2 Sampling

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

library(extraDistr)
# Built-in function
paste0("Built-in function:")
## [1] "Built-in function:"
set.seed(123)
x <- extraDistr::rinvgamma(n = 1e5, alpha = a, beta = 1/b) # shape and scale parameters

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"
# My R function
Draw_InvGamma_R <- function(n, shape, rate) {
  x <- rgamma(n = n, shape = shape, rate = 1/rate)
  y <- 1/x
  return(y)
}

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

paste0("My R function:")
## [1] "My R function:"
set.seed(123)
x <- Draw_InvGamma_R(n = 1e5, shape = a, rate = b)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"
# My Rcpp function
Draw_InvGamma_Rcpp <- cppFunction(depends = "RcppArmadillo",
'
  arma::Col<double> rrinvgamma(int n, double a, double b) {
    return(1/rgamma(n, a, b)); // shape and scale parameters
  }
'
)

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

paste0("My Rcpp function:")
## [1] "My Rcpp function:"
set.seed(123)
x <- Draw_InvGamma_Rcpp(1e5, a, b)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"

1.5 Inverse Gaussian Distribution

Reference: John R. Michael, William R. Schucany, and Roy W. Haas (1976) Generating Random Variates Using Transformations with Multiple Roots. The American Statistician 30: 88-90.

The Inverse Gaussian distribution (also called the Wald distribution) with parameters location \(= \mu\) and shape \(= \lambda\) has density \[ f(x \mid \mu, \lambda) = \frac{\lambda}{\sqrt{2\pi x^3}}exp\left(\frac{-\lambda(x-\mu)^2}{2\mu^2x}\right) \] for \(x > 0, \mu > 0\) and \(\lambda > 0\)

The mean and variance are \[ E(X) = \mu; \quad Var(X) = \frac{\mu^3}{\lambda} \]

1.5.1 True PDF

Plotting the \(IG(\mu, \lambda)\) distribution

library(extraDistr)
# dwald(x, mu, lambda, log = FALSE)

mu <- 1
lambda <- 1

mean <- mu
var <- mu^3/lambda
sd <- sqrt(mu^3/lambda)

p <- data_frame(x = c(0, mean + 15*sd)) %>% 
  ggplot(aes(x = x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
  stat_function(fun = dwald, args = c(mu = mu, lambda = lambda),
                color = "#84CA72", size = 1) +
  ggtitle(paste0("Inverse Gaussian (mu = ", mu, ", lambda = ", lambda,") Distribution")) +
  xlab("x") +
  ylab(paste0("dInverseGaussian")) +
  theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 1; True Variance: 1"

1.5.2 Sampling

library(extraDistr)
# Built-in function
paste0("Built-in function:")
## [1] "Built-in function:"
set.seed(123)
x <- rwald(n = 1e5, mu = 1, lambda = 1)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 1.00506842306895; Variance: 1.02736098395571"
# My R function
Draw_IG <- function(n, mu, lambda) {
  
  y <- rep(NA, n)
  
  for (i in 1:n) {
    # First generate a chi_square(1) variate
    v0 <-  rnorm(1,0,1)^2

    # Now find the two roots in Eq. (5) of the paper
    x1 <-  mu + (0.5*(mu^2)*v0)/lambda - (0.5*mu/lambda)*sqrt(4*mu*lambda*v0 + (mu^2)*(v0^2));
    x2 <-  (mu^2)/x1

    # Accept x1 with probability p1_v0, otherwise accept x2
    p1_v0 <-  mu/(mu+x1)
    
    if (runif(1) <= p1_v0) {
      y[i] <- x1
    } else {
      y[i] <- x2
    }
    
  }
  
  return(y)
}

paste0("My R function:")
## [1] "My R function:"
set.seed(123)
x <- Draw_IG(n = 1e5, mu = 1, lambda = 1)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 1.00350059340242; Variance: 1.01015584280717"
library(Rcpp)
library(RcppArmadillo)

Dfuns <- cppFunction(depends = "RcppArmadillo",
'
arma::Col<double> rrinvgauss(int n, double mu, double lambda){
    arma::Col<double> random_vector(n);
    double z,y,x,u;
    for(int i=0; i<n; ++i){
        z=R::rnorm(0,1);
        y=z*z;
        x=mu+0.5*mu*mu*y/lambda - 0.5*(mu/lambda)*sqrt(4*mu*lambda*y+mu*mu*y*y);
        u=R::runif(0,1);
        if(u <= mu/(mu+x)){
            random_vector(i)=x;
        }else{
            random_vector(i)=mu*mu/x;
        };
    }
    return(random_vector);
}
'           
)

paste0("My Rcpp function:")
## [1] "My Rcpp function:"
set.seed(123)
x <- Dfuns(n = 1e5, mu = 1, lambda = 1)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 1.00350059340242; Variance: 1.01015584280717"

1.6 Exponential Distribution

The exponential distribution with parameter rate = \(r\) has density

\[ f(x \mid r) = r e^{-r x} \]

for \(x \in [0, \infty), r > 0\).

The mean and variance are

\[ E(X) = \frac{1}{r}; \quad Var(X) = \frac{1}{r^2} \]

1.6.1 True PDF

Plotting the Exponential distribution

# # My Shiny App
# 
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('lambda','lambda (rate)', min = 0.5, max = 2, value = 1, step = 0.5),
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
#   
#   mean <- reactive({1/input$lambda})
#   sd <- reactive({1/input$lambda})
# 
#   selectedData <- reactive({
#     data_frame(x = c(0, qexp(p = 0.9999, rate = input$lambda)))
#   })
#   
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dexp, args = c(input$lambda),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Exponential (lambda = ", input$lambda,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dexp(x, lambda = ", input$lambda,")")) +
#       coord_cartesian(ylim = c(0,2)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       ) 
#       
#   })
#   
# }
# 
# shinyApp(ui = ui, server = server)
# dexp(x, rate = 1, log = FALSE)
r <- 1.5 # rate

mean <- 1/r
var <- 1/r^2
sd <- 1/r
q9999 <- qexp(p = 0.9999, rate = r)

p <- data_frame(x = c(0, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dexp, args = c(rate = r),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Exponential (r = ", r,") Distribution")) +
    xlab("x") +
    ylab(paste0("dexp")) +
    theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0.666666666666667; True Variance: 0.444444444444444"

1.6.2 Sampling

r <- 1.5 # rate

# Built-in function
paste0("Built-in R function:")
## [1] "Built-in R function:"
# rexp(n, rate = 1)

set.seed(123)
x <- rexp(n = 1e5, rate = r) 

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dexp(x, rate = r), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: 0.664974647598273; Variance: 0.439985915259907"

1.7 Laplace Distribution

The Laplace distribution with parameters location = \(\mu\) and scale = \(s\) has density

\[ f(x \mid \mu, s) = \frac{1}{2s} exp\left(-\frac{\mid x - \mu \mid}{s}\right) \]

which can be written as the following Normal-Exponential mixture (Double Exponential)

\[ f(x \mid \mu, s) = \int_{0}^{\infty}\frac{1}{\sqrt{2 \pi t}} exp \left(-\frac{(x-\mu)^2}{2t}\right) \frac{1}{2s^2} exp\left(-\frac{t}{2s^2}\right) dt \]

The mean and variance are

\[ E(X) = \mu; \quad Var(X) = 2 s^2 \]

The Laplace distribution with parameters location = \(\mu\) and rate = \(\lambda\) has density

\[ f(x \mid \mu, \lambda) = \frac{\lambda}{2} exp\left(-\lambda\mid x - \mu \mid\right) \]

which can be written as the following Normal-Exponential mixture (Double Exponential)

\[ f(x \mid \mu, \lambda) = \int_{0}^{\infty}\frac{1}{\sqrt{2 \pi t}} exp \left(-\frac{(x-\mu)^2}{2t}\right) \frac{\lambda^2}{2} exp\left(-\frac{\lambda^2t}{2}\right) dt \]

The mean and variance are

\[ E(X) = \mu; \quad Var(X) = \frac{2}{\lambda^2} \]

Note: Representation from Park and Casella (2008)

\[ \frac{a}{2} e^{-a \mid z \mid} = \int_{0}^{\infty} \frac{1}{\sqrt{2\pi s}} e^{-z^2/(2s^2)} \frac{a^2}{2}e^{(-a^2s/2)}ds \] for \(a>0\)

1.7.1 True PDF

Plotting the \(Laplace(\mu, \lambda)\) distribution

library(rmutil) # for double-exponential 
library(LaplacesDemon)
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# library(rmutil) # for double-exponential 
# library(LaplacesDemon)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('mu','mu (location)', min = -3, max = 3, value = 0, step = 1),
#     sliderInput('lambda','lambda (scale)', min = 0, max = 10, value = 1, step = 1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
# 
#   mean <- reactive({input$mu})
#   sd <- reactive({sqrt(2*input$lambda^2)})
#   q9999 <- reactive({qlaplace(p = 0.9999, location = input$mu, scale = input$lambda)})
# 
#   selectedData <- reactive({
#     data_frame(x = c(c(-q9999(),q9999())))
#   })
# 
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dlaplace, args = c(location = input$mu, scale = input$lambda),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Laplace (mu = ", input$mu, ", lambda = ", input$lambda,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dgamma(x, Laplace (mu = ", input$mu, ", lambda = ", input$lambda,")")) +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )
# 
#   })
# 
# }
# 
# shinyApp(ui = ui, server = server)
library(LaplacesDemon)

# dlaplace(x, location=0, scale=1, log=FALSE)
mu <- 0 # location
lambda <- 2 # rate
scale <- 1/lambda # scale = 1/rate

mean <- mu
var <- 2/lambda^2
sd <- sqrt(2/lambda^2)
q9999 <- LaplacesDemon::qlaplace(p = 0.9999, location = mu, scale = 1/lambda)

p <- data_frame(x = c(-q9999,q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = LaplacesDemon::dlaplace, args = c(location = mu, scale = 1/lambda),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Laplace (mu = ", mu, ", lambda = ", lambda,") Distribution")) +
    xlab("x") +
    ylab(paste0("dlaplace")) +
  theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)
## [1] "True Mean: 0; True Variance: 0.5"

1.7.2 Sampling

library(LaplacesDemon)

# Built-in function
paste0("Built-in function:")
## [1] "Built-in function:"
# rlaplace(n, location=0, scale=1)
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate

set.seed(123)
x <- LaplacesDemon::rlaplace(n = 1e5, location = mu, scale = 1/lambda)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) LaplacesDemon::dlaplace(x, location = mu, scale = 1/lambda), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: -0.000332018113338988; Variance: 0.495343727783127"
# My R function
Draw_Laplace_R <- function(n, mu, lambda) {
  t <- rexp(n = n, rate = lambda^2/2)
  
  x <- rep(NA,n)
  for (i in 1:n) {
    x[i] <- rnorm(1, mean = mu, sd = sqrt(t[i]))
  }
  
  return(x)
}

# Draw_Laplace2 <- function(n, mu, lambda) {
#   t <- rexp(n = n, rate = lambda^2/2)
#   
#   library(purrr)
#   x <- map_dbl(t, ~ rnorm(1, mu, sd = sqrt(.x)))
#   
#   return(x)
# }


paste0("My R function:")
## [1] "My R function:"
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate

set.seed(123)
x <- Draw_Laplace_R(n = 1e5, mu = mu, lambda = lambda)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) LaplacesDemon::dlaplace(x, location = mu, scale = 1/lambda), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))
## [1] "Mean: -0.00254867097691125; Variance: 0.50212479220306"

1.8 Assymmetric Laplace distribution

The Assymmetric Laplace distribution with parameters location = \(\mu\), scale = \(\sigma\) and skewness = \(\tau\) has density

\[ f(x \mid \mu, \sigma, \tau) = \frac{\tau(1-\tau)}{\sigma} exp\left\{-\rho_\tau \left(\frac{x-\mu}{\sigma}\right)\right\} \] where \[ \rho_\tau(x) = x(\tau - I(x<0)) \] can be written as a normal-exponential mixture of the form

\[ f(x \mid \mu, \sigma, \tau) = \]

library(ald)
# dALD(y, mu = 0, sigma = 1, p = 0.5)
# pALD(q, mu = 0, sigma = 1, p = 0.5, lower.tail = TRUE)
# qALD(prob, mu = 0, sigma = 1, p = 0.5, lower.tail = TRUE)
# rALD(n, mu = 0, sigma = 1, p = 0.5)


# dALD(y, mu = 0, sigma = 1, p = 0.5)

mu <- 0 # location
sigma <- 1 # scale
tau <- 0.8

p <- data_frame(x = seq(-40,80,0.5)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mu, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dALD, args = c(mu = mu, sigma = sigma, p = tau),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("ALD (mu = ", mu, ", sigma = ", sigma,", tau = ", tau, " ) Distribution")) +
    xlab("x") +
    ylab(paste0("dALD")) +
  theme(plot.title = element_text(hjust = 0.5))

1.9 Binomial Distribution

The binomial distribution with size = \(n\) and prob = \(p\) has density \[ p(x) = \left(\begin{array} n \\ x \end{array}\right) \] for \(x = 0,\ldots,n\).

1.9.1 True PDF

rbinom(4, size=1, prob=0.5) 
## [1] 1 1 0 1
dbinom(4, size=12, prob=0.2) 
## [1] 0.1328756

2 Bayesian Shrinkage Priors

2.1 CTS-studt

The Normal-Inv-Gamma prior is the scale mixture of Normals representation of the fat-tailed Student-t distribution. This hierarchical prior, which is also called “sparse Bayesian Learning” prior in signal processing, takes the form

\[ \mathbf{\beta} \mid \{\tau_j^2\}_{j=1}^p, \sigma^2 \sim N_{p}(\mathbf{0}, \sigma^2 \mathbf{Q}), \] \[ \tau_j^2 \sim inv-Gamma(a,b), \quad \text{for} \quad j = 1,\ldots,p, \] \[ \sigma^2 \sim \frac{1}{\sigma^2} \] where \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\)

The conditional posteriors are the form

\[ \mathbf{\beta} \mid \bullet \sim N_p(\mathbf{V}\times \mathbf{X'y}, \sigma^2\mathbf{V}), \] \[ \tau_j^2 \mid \bullet \sim Inv-Gamma\left(a + \frac{1}{2}, b + \frac{\beta_j^2}{2\sigma^2}\right), \quad \text{for} \quad j = 1,\ldots,p, \]

\[ \sigma^2 \mid \bullet \sim Inv-Gamma\left(\frac{n+p}{2},\frac{\psi + \mathbf{\beta'}\mathbf{Q}^{-1}\mathbf{\beta}}{2}\right) \]

where \(\mathbf{V} = (\mathbf{X'X} + \mathbf{Q}^{-1})^{-1}\), \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\) and \(\psi = (\mathbf{y} - \mathbf{X\beta})'(\mathbf{y} - \mathbf{X\beta})\)

# Student-t distribution ====

# df <- 6
# ncp <- NA # non-centrality parameter
# 
# mean <- 0
# sd <- sqrt(df/(df-2))
# 
# p <- data_frame(x = c(mean - 4*sd, mean + 4*sd)) %>%
#   ggplot(aes(x=x))
# 
# p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
#     stat_function(fun = dt, args = c(df),
#                   color = "#84CA72", size = 1) +
#     labs(caption = paste0("t Distribution (df = ", df,")")) +
#     xlab("x") +
#     ylab(paste0("dt(x, df = ", df,")")) +
#     coord_cartesian(ylim = c(0,0.5)) +
#     theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )

dt2 <- function(df) {
  function (x) dt(x, df = df)
}

stat_studt <- function(df) {
  stat_function(aes(colour = df), fun = dt2(df), size = 0.5)
}

ggplot(data_frame(x = c(-5,5)), aes(x)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
  lapply(c(2,6,10), stat_studt) +
  scale_colour_viridis_c(limits = c(2,10)) +
    labs(caption = paste0("t Distribution (df)")) +
    xlab("x") +
    ylab(paste0("dt(x, df)")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

# CTS-studt prior ====
ndraws <- 1e5

a <- 1
b <- 0.001
tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
library(purrr)
x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))


ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
  xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("CTS-studt prior (a = ", a, ", b = ", b, ")")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

# ggplot(data_frame(x), aes(x=x)) +
#   stat_density(geom = "line", position = "identity") +
#   xlim(-10,10) + coord_cartesian(c(-5,5))

# Choose hyper-parameters
library(purrr)
ndraws <- 1e5

CTS_studt_pa1 <- function(a){
  set.seed(123)
  ndraws <- 1e5
  b <- 0.001
  tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
  x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa1 <- lapply(c(1,2,3), CTS_studt_pa1)
df2_pa1 <- data_frame(x = unlist(df1_pa1), a = factor(rep(c(1,2,3), each = ndraws)))

plot_pa1 <- ggplot(df2_pa1, aes(x=x, color = a)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa1 <- ggplot(df2_pa1, aes(x=x, color = a)) +
  geom_line(aes(y = 1 - ..y.., linetype = a), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 1.5), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

CTS_studt_pa2 <- function(b){
  set.seed(123)
  ndraws <- 1e5
  a <- 1
  tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
  x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa2 <- lapply(c(0.001,0.01,0.1), CTS_studt_pa2)
df2_pa2 <- data_frame(x = unlist(df1_pa2), b = factor(rep(c(0.001,0.01,0.1), each = ndraws)))

plot_pa2 <- ggplot(df2_pa2, aes(x=x, color = b)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa2 <- ggplot(df2_pa2, aes(x=x, color = b)) +
  geom_line(aes(y = 1 - ..y.., linetype = b), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 1.5), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian  
# percentage of obs removed
sum((df2_pa2$x < 10) & (df2_pa2$x > -10))/length(df2_pa2$x)*100
## [1] 99.961
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(plot_pa1,plot_pa2, ncol = 2)

# tail behavior
grid.arrange(tail_pa1,tail_pa2, ncol = 2)

2.2 CTS-lasso

Reference: Park and Casella (2008) - The Bayesian Lasso

The Laplace prior takes the form \[ \mathbf{\beta} \mid \{\tau_j^2\}_{j=1}^p, \sigma^2 \sim N_p(\mathbf{0}, \sigma^2\mathbf{Q}), \] \[ \tau_j^2 \mid \lambda^2 \sim Exponential\left(\frac{\lambda^2}{2}\right), \quad \text{for} \quad j = 1,\ldots, p, \]

\[ \lambda^2 \sim Gamma(c,d) \]

\[ \sigma^2 \sim \frac{1}{\sigma^2} \]

where \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\)

The conditional posteriors are of the form \[ \mathbf{\beta} \mid \bullet \sim N_p\left(\mathbf{V}\times \mathbf{X}'\mathbf{y}, \sigma^2 \mathbf{V}\right) \] \[ \frac{1}{\tau_j^2} \mid \bullet \sim IG\left(\sqrt{\frac{\lambda^2\sigma^2}{\beta_j^2}}, \lambda^2\right), \quad \text{for} \quad j = 1,\ldots,p, \]

\[ \lambda^2 \mid \bullet \sim Gamma\left(c + p, d + \frac{\sum_{j=1}^p\tau_j^2}{2}\right) \]

\[ \sigma^2 \mid \bullet \sim Inv-Gamma\left(\frac{n+p}{2},\frac{\psi + \mathbf{\beta'}\mathbf{Q}^{-1}\mathbf{\beta}}{2}\right) \]

where \(\mathbf{V} = (\mathbf{X'X} + \mathbf{Q}^{-1})^{-1}\), \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\) and \(\psi = (\mathbf{y} - \mathbf{X\beta})'(\mathbf{y} - \mathbf{X\beta})\)

2.2.1 Version 1: Double-exponential (Laplace) prior

# Double exponential prior ====
ndraws <- 1e5
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate

tausq <-  rexp(ndraws, rate = lambda^2/2)
library(purrr)
x <- map_dbl(tausq, ~ rnorm(1, mean = mu, sd = sqrt(.x)))

ggplot(data_frame(x), aes(x=x)) +
  geom_density(fill = "lightgreen") +
  stat_function(fun = LaplacesDemon::dlaplace, args = c(location = mu, scale = 1/lambda),
                  color = "red", size = 1) +
    labs(caption = paste0("Laplace prior (rate = ", lambda, ")")) +
  xlab("x") +
  ylab(paste0("density")) +
  theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
  )

# Choice of rate ====
library(LaplacesDemon)

dlaplace2 <- function(lambda) {
  function (x) LaplacesDemon::dlaplace(x, location = 0, scale = 1/lambda)
}

stat_laplace <- function(lambda) {
  stat_function(aes(colour = lambda), 
                fun = dlaplace2(lambda), 
                size = 0.5) 
}

ggplot(data_frame(x = c(-5,5)), aes(x=x)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
  lapply(c(2,1,0.1), stat_laplace) +
  scale_colour_viridis_c(limits = c(0.1,2)) +
    labs(caption = paste0("Laplace Prior (lambda)")) +
    xlab("x") +
    ylab(paste0("dlapalce")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
    )

2.2.2 Version 2: Hierarchical prior

# Hierarchical prior ====
ndraws <- 1e5

library(purrr)
c <- 1
d <- 1.05

lambdasq <- rgamma(ndraws, shape = c, rate = d)
tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))

# c(mean(lambdasq), sd(lambdasq))
rate <- sqrt(mean(lambdasq)) # for corresponding Laplace distribution

ggplot(data_frame(x), aes(x=x)) +
  geom_density(fill = "lightgreen") +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
  stat_function(fun = LaplacesDemon::dlaplace, args = c(location = 0, scale = 1/rate),
                  color = "red", size = 0.5) +
  xlim(-10,10) + coord_cartesian(c(-5,5)) +
  labs(caption = paste0("Hierarchical prior (c = ", c, ", d = ", d, ")")) +
  xlab("x") +
  ylab(paste0("density")) +
  theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
  )

paste0("Corresponding Laplace distribution - Rate = ", rate)
## [1] "Corresponding Laplace distribution - Rate = 0.977472489051276"
# Choice of hyper-parameters
library(purrr)
ndraws <- 1e5

CTS_lasso_pa1 <- function(c) {
  set.seed(123)
  d = 1
  lambdasq <- rgamma(ndraws, shape = c, rate = d)
  tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
  x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa1 <- lapply(c(1,2,3), CTS_lasso_pa1)
df2_pa1 <- data_frame(x = unlist(df1_pa1), c = factor(rep(c(1,2,3), each = ndraws)))

plot_pa1 <- ggplot(df2_pa1, aes(x=x, color = c)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa1 <- ggplot(df2_pa1, aes(x=x, color = c)) +
  geom_line(aes(y = 1 - ..y.., linetype = c), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 2), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

CTS_lasso_pa2 <- function(d) {
  set.seed(123)
  c = 1
  lambdasq <- rgamma(ndraws, shape = c, rate = d)
  tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
  x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa2 <- lapply(c(1,0.5,0.1), CTS_lasso_pa2)
df2_pa2 <- data_frame(x = unlist(df1_pa2), d = factor(rep(c(1,0.5,0.1), each = ndraws)))

plot_pa2 <- ggplot(df2_pa2, aes(x=x, color = d)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa2 <- ggplot(df2_pa2, aes(x=x, color = d)) +
  geom_line(aes(y = 1 - ..y.., linetype = d), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 2), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian  
# percentage of obs removed
# sum((df2_pa1$x < 10) & (df2_pa1$x > -10))/length(df2_pa1$x)*100
# sum((df2_pa2$x < 10) & (df2_pa2$x > -10))/length(df2_pa2$x)*100

library(gridExtra)
grid.arrange(plot_pa1,plot_pa2, ncol = 2)

# tail behavior
grid.arrange(tail_pa1,tail_pa2, ncol = 2)

2.3 CTS-horse-mx

ndraws = 1e5
library(purrr)

nu <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
lamdasq <- map_dbl(nu, ~ 1/rgamma(1, shape = 1/2, scale = .x))
xi <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
tausq <- map_dbl(xi, ~ 1/rgamma(1, shape = 1/2, scale = .x))
temp <- lamdasq*tausq
x <- map_dbl(temp, ~rnorm(1, mean = 0, sd = .x))

ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
    xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("CTS-horseshoe-mx prior")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )
## Warning: Removed 24095 rows containing non-finite values (stat_density).

2.4 CTS-horse-sl

ndraws = 1e5
library(purrr)
lambda <- rhalfcauchy(ndraws, scale= 1)
tau <- map_dbl(lambda, ~rhalfcauchy(1, scale = .x))
x <- map_dbl(tau, ~rnorm(1, mean = 0, sd = .x))

ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
    xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("CTS-horseshoe-sl prior")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )
## Warning: Removed 10430 rows containing non-finite values (stat_density).

2.5 SSVS-normal

2.6 SSVS-studt

ndraws <- 1e5

# SSVS-studt1
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))

tausq0 <- 1e-3
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))


# SSVS-studt2
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))

tausq0 <- tausq1*1e-3
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
#mean(tausq0) #0.012


# SSVS-studt3
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))

alpha0 <- 1
beta0 <- 0.0001
tausq0 <- 1/rgamma(ndraws, shape = alpha0, scale = 1/beta0)
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
#mean(tausq0) #0.0035


pi <- rbern(1e5, prob = 0.2)
x <- pi*studt_slab + (1-pi)*studt_spike
# tmp <- runif(1e5)
# x <- (tmp<=0.2)*studt_slab + (tmp>0.2)*studt_spike
ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
  xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("SSVS-studt prior")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )
## Warning: Removed 195 rows containing non-finite values (stat_density).

2.7 SSVS-lasso

2.8 SSVS-horse

##### Sample from prior densities -----

set.seed(123)
ndraws <- 1e5

## ridge 
normal <- rnorm(ndraws, mean=0, sd=0.5)

## Student's t 
studt <- rt(ndraws, df=1) 

CTS_studt_fun <- function(alpha = 1, beta = 0.001, ndraws = 1e5) {
  library(purrr)
  tausq <- 1/rgamma(ndraws, shape = alpha, scale = 1/beta)
  x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}
CTS_studt <- CTS_studt_fun(alpha = 1, beta = 0.001, ndraws = 1e5)


## lasso 
lasso <- rmutil::rlaplace(ndraws, m=0, s=0.5)

CTS_lasso_fun <- function (alpha = 1, sigma = 1, ndraws = 1e5) {
  library(purrr)
  lambdasq <- rgamma(ndraws, shape = alpha, scale = sigma)
  tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
  x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}
CTS_lasso <- CTS_lasso_fun(alpha = 1, sigma = 1, ndraws = 1e5)

## horseshoe

CTS_horsemx_fun <- function(ndraws = 1e5) {
  library(purrr)
  nu <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
  lamdasq <- map_dbl(nu, ~ 1/rgamma(1, shape = 1/2, scale = .x))
  xi <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
  tausq <- map_dbl(xi, ~ 1/rgamma(1, shape = 1/2, scale = .x))
  temp <- lamdasq*tausq
  x <- map_dbl(temp, ~rnorm(1, mean = 0, sd = .x))
  return(x)
}
CTS_horsemx <- CTS_horsemx_fun(ndraws = 1e5)

CTS_horsesl_fun <- function(ndraws = 1e5) {
  library(purrr)
  lambda <- rhalfcauchy(ndraws, scale= 1)
  tau <- map_dbl(lambda, ~rhalfcauchy(1, scale = .x))
  x <- map_dbl(tau, ~rnorm(1, mean = 0, sd = .x))
  return(x)
}
CTS_horsesl <- CTS_horsesl_fun(ndraws = 1e5)
##### Plot -----
## Create plot data
df_comb <- data_frame(CTS_studt, CTS_lasso, CTS_horsemx, CTS_horsesl)
df_long <- gather(df_comb, Prior, value) # long format
df_long$Prior <- factor(df_long$Prior)
levels(df_long$Prior) <- list("CTS_studt" = "CTS_studt", "CTS_lasso" = "CTS_lasso", "CTS_horsemx" = "CTS_horsemx", "CTS_horsesl" = "CTS_horsesl")
df_long$asymp <- rep(NA, nrow(df_long)) 
df_long[which(df_long$Prior=="CTS_horsemx"|df_long$Prior=="CTS_horsesl"), "asymp"] <- 0

   
df_long <- gather(df_comb, Prior, value) %>% 
      mutate(Prior = factor(Prior, levels = list("CTS_studt" = "CTS_studt", "CTS_lasso" = "CTS_lasso", "CTS_horsemx" = "CTS_horsemx", "CTS_horsesl" = "CTS_horsesl")),
             asymp = case_when(Prior=="CTS_horsemx" ~ 0,
                               Prior=="CTS_horsesl" ~ 0,
                               TRUE ~ NA_real_))

plotComb <- ggplot(df_long, aes(x = value, colour = Prior, linetype = Prior)) +
  stat_density(geom = "line", position = "identity") +
  geom_vline(aes(xintercept = asymp), colour = "grey") +
  scale_linetype_manual(values = c(1,2,3,4)) +
  scale_color_manual(values = c(1,2,3,4)) +
  xlim(-10,10) + coord_cartesian(xlim=c(-5,5), ylim =c(0,5)) +
  theme(axis.title = element_blank(), legend.position = "bottom")
# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian  

##### Tail behavior -----
# Plot the survival function, i.e., 1-CDF
# See https://stats.stackexchange.com/questions/86429/which-has-the-heavier-tail-lognormal-or-gamma

tailComb <- ggplot(df_long, aes(x = value, group = Prior)) +
  geom_line(aes(y = 1 - ..y.., linetype = Prior, colour = Prior), stat='ecdf') +
  scale_linetype_manual(values = c(1,2,3,4)) +
  scale_color_manual(values = c(1,2,3,4)) +
  coord_cartesian(xlim = c(0, 8), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom", legend.title = element_blank()) + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

library(gridExtra)
get_legend <- function(myggplot){
  require(gridExtra)
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

legend <- get_legend(plotComb)
## Warning: Removed 36684 rows containing non-finite values (stat_density).
## Warning: Removed 200000 rows containing missing values (geom_vline).
plotComb <- plotComb + theme(legend.position="none")
tailComb <- tailComb + theme(legend.position="none")
grid.arrange(plotComb,tailComb, legend, ncol = 2, nrow =2,
             layout_matrix = rbind(c(1,2),c(3,3)),
             heights = c(4,1))
## Warning: Removed 36684 rows containing non-finite values (stat_density).
## Removed 200000 rows containing missing values (geom_vline).

Appendix

# install.packages("pkgsearch")
library(pkgsearch)
invGammaPkg <- pkg_search(query = "inverse gamma", size = 200)

invGammaPkgShort <- invGammaPkg %>% 
  filter(maintainer_name != "ORPHANED", score > 1000) %>% 
  select(score, package, downloads_last_month) %>% 
  arrange(downloads_last_month)

head(invGammaPkgShort)
## # A data frame: 0 × 3
## # … with 3 variables: score <dbl>, package <chr>, downloads_last_month <int>

Reference

Park, Trevor, and George Casella. 2008. “The Bayesian Lasso.” Journal of the American Statistical Association 103 (482): 681–86.