\(\newcommand{\mathbbm}[1]{\mathbb{#1}}\)

library(tidyverse)
library(ggplot2)

1 Statistical Distributions

1.1 Normal Distribution

The normal distribution has density:

\[ f(x) = \frac{1}{\sqrt{2\pi\sigma^{2}}} exp\left(-\frac{(x-\mu)^2}{2\sigma^2}\right) \]

help("Normal")

1.1.1 True PDF

Plotting the standard normal distribution

# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('Mu','Mu', min = - 3, max = 3, value = 0, step = 1),
#     sliderInput('Sigma','Sigma', min = 1, max = 5, value = 1, step = 1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
# 
#   selectedData <- reactive({
#     data_frame(x = c(input$Mu - 4*input$Sigma, input$Mu + 4*input$Sigma))
#   })
# 
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = input$Mu, linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dnorm, args = c(input$Mu,input$Sigma),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Normal Distribution N (", input$Mu," , ", input$Sigma^2, ")")) +
#       xlab("x") +
#       ylab("dnorm(x)") +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )
# 
#   })
# 
# }
# 
# shinyApp(ui = ui, server = server)

mu <- 0
sigma <- 1

mean <- mu
var <- sigma^2
sd <- sigma
q9999 <- qnorm(p = 0.9999, mean = mu, sd = sigma, lower.tail = TRUE)

p <- data_frame(x = c(-q9999, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mu, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dnorm, args = c(mu,sigma),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Normal Distribution N (", mu," , ", sigma^2, ")")) +
    xlab("x") +
    ylab("dnorm(x)") +
  theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 0; True Variance: 1"

ggplot(data.frame(x = -5:5), aes(x)) + 
  stat_function(fun = dnorm, args = c(0,1), geom = 'area', xlim = c(1,5), fill = 4) + 
  stat_function(fun = dnorm, args = c(0,1)) +
  labs(x = "", y = "") +
  ggtitle("Normal distribution") +
  theme(plot.title = element_text(hjust = 0.5))

1.1.2 Sampling

# Built-in R function
paste0("Built-in R function:")

## [1] "Built-in R function:"

# rnrom(n, mean=0, sd=1)
mu <- 0
sigma <- 1

set.seed(123)
x <- rnorm(n = 1e5, mean = mu, sd = sigma)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dnorm(x, mean = mu, sd = sigma), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 0.000976748821684996; Variance: 0.999466876209305"

# Built-in Rcpp function
paste0("Built-in Rcpp function:")

## [1] "Built-in Rcpp function:"

library("Rcpp")
# evalCpp("Rcpp::rnorm(n,0,1)")
mu <- 0
sigma <- 1

set.seed(123)
x <- evalCpp("Rcpp::rnorm(1e5, 0, 1)")

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dnorm(x, mean = mu, sd = sigma), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 0.000976748821684996; Variance: 0.999466876209305"

library("Rcpp")

set.seed(123)
rnorm(1,0,1)

set.seed(123)
evalCpp("R::rnorm(0,1)")

set.seed(123)
rnorm(3,0,1)

set.seed(123)
evalCpp("Rcpp::rnorm(3,0,1)")

1.2 t distribution

The t distribution with df = \(\nu\) degrees of freedom has density

\[ f(x) = \]

help("TDist")

1.2.1 True PDF

Plotting the Central t distribution

# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('df','df', min = 1, max = 12, value = 6, step = 1),
#     sliderInput('ncp','ncp', min = 1, max = 5, value = 1, step = 1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
#   
#   mean <- 0
#   sd <- reactive({sqrt(input$df/(input$df-2))})
#   selectedData <- reactive({
#     data_frame(x = c(mean - 4*sd(), mean + 4*sd()))
#   })
#   
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dt, args = c(input$df),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("t Distribution (df = ", input$df,")")) +
#       xlab("x") +
#       ylab(paste0("dt(x, df = ", input$df,")")) +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       ) 
#       
#   })
#   
# }
# 
# shinyApp(ui = ui, server = server)

df <- 6
ncp <- NA # non-centrality parameter

mean <- 0
var <- df/(df-2)
sd <- sqrt(df/(df-2))
q9999 <- qt(p = 0.9999, df = df, lower.tail = TRUE)

p <- data_frame(x = c(-q9999, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dt, args = c(df),
                  color = "#84CA72", size = 1) +
    labs(caption = paste0("t Distribution (df = ", df,")")) +
    xlab("x") +
    ylab(paste0("dt(x, df = ", df,")")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 0; True Variance: 1.5"

1.2.2 Sampling

# Built-in R function
paste0("Built-in R function:")

## [1] "Built-in R function:"

# rt(n, df, ncp)
df <- 6
ncp <- NA # non-centrality parameter

set.seed(123)
x <- rt(n = 1e5, df = df)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dt(x, df = df), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: -0.00191542325893477; Variance: 1.51823043058928"

1.3 Gamma Distribution

The Gamma distribution with parameters shape = \(c\) and rate = \(d\) has density

\[ f(x \mid c,d) = \frac{d^c}{\Gamma(c)} x^{c - 1} e^{(-dx)} \] for \(x \geq 0, c > 0\) and \(d > 0\).

The mean and variance are

\[ E(X) = \frac{c}{d}; \quad Var(X) = \frac{c}{d^2} \]

The Gamma distribution with parameters shape = \(c\) and scale = \(\delta\) has density

\[ f(x \mid c, \delta) = \frac{1}{\delta^c \Gamma(c)} x^{c - 1} e^{(-x/\delta)} \] for \(x \geq 0, c > 0\) and \(\delta > 0\).

The mean and variance are

\[ E(X) = c \delta; \quad Var(X) = c \delta^2 \]

help("GammaDist")

1.3.1 True PDF

Plotting the Gamma distribution

# # My Shiny App
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('alpha','alpha (shape)', min = 1, max = 12, value = 1, step = 1),
#     sliderInput('sigma','sigma (scale)', min = 0, max = 3, value = 0.5, step = 0.1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
# 
#   mean <- reactive({input$alpha*input$sigma})
#   sd <- reactive({sqrt(input$alpha*input$sigma^2)})
# 
#   selectedData <- reactive({
#     data_frame(x = c(0, qgamma(p = 0.9999, shape = input$alpha, scale = input$sigma)))
#   })
# 
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dgamma, args = c(input$alpha,input$sigma),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Gamma (alpha = ", input$alpha, ", sigma = ", input$sigma,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dgamma(x, alpha = ", input$alpha, ", sigma = ", input$sigma,")")) +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )
# 
#   })
# 
# }
# 
# shinyApp(ui = ui, server = server)

# dgamma(x, shape, rate = 1, scale = 1/rate, log = FALSE)
c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

mean <- c/d
var <- c/d^2
sd <- sqrt(c/d^2)
q9999 <- qgamma(p = 0.9999, shape = c, rate = d)

p <- data_frame(x = c(0, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dgamma, args = c(shape = c, rate = d),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Gamma (c = ", c, ", d = ", d,") Distribution")) +
    xlab("x") +
    ylab(paste0("dgamma")) +
    theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 6; True Variance: 12"

1.3.2 Sampling

c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

# Built-in function
paste0("Built-in R function:")

## [1] "Built-in R function:"

set.seed(123)
# x <- rgamma(1e5, c, d)
x <- rgamma(n = 1e5, shape = c, rate = d) 

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 6.00987751542745; Variance: 11.9980901747901"

c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

# Built-in function
paste0("Built-in Rcpp function:")

## [1] "Built-in Rcpp function:"

set.seed(123)
x <- Rcpp::evalCpp("randg(1e5, arma::distr_param(3,2))", depends = "RcppArmadillo") # shape and scale parameters

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 5.99775522536012; Variance: 11.9708700496215"

library(Rcpp)
library(RcppArmadillo)

c <- 3 # shape
d <- 0.5 # rate
delta <- 1/d # scale = 1/rate

Draw_Gamma <- cppFunction(depends = "RcppArmadillo",
'
  arma::Col<double> rrgamma(int n, double c, double d) {
    return(rgamma(n, c, 1/d)); // shape and scale parameters
  }
'
)

# My Rcpp function
paste0("My Rcpp function:")

## [1] "My Rcpp function:"

set.seed(123)
x <- Draw_Gamma(1e5, c, d)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dgamma(x, shape = c, rate = d), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 6.00987751542745; Variance: 11.9980901747901"

1.4 Inverse-Gamma Distribution

The Inverse-Gamma distribution with parameters shape = \(a\) and rate = \(b\) has density

\[ f(x \mid a, b) = \frac{1}{b^a\Gamma(a)} x^{-a-1} e^{\left(-\frac{1}{bx}\right)} \] for \(x \geq 0, a > 0\) and \(b > 0\).

The mean and variance are

\[ E(X) = \frac{1}{b(a-1)} \quad (a>1) ; \quad Var(X) = \frac{1}{b^2(a-1)^2(a-2)} \quad (a>2) \]

The Inverse-Gamma distribution with parameters shape = \(a\) and scale = \(\beta\) has density

\[ f(x \mid a, \beta) = \frac{\beta^a}{\Gamma(a)} x^{-a-1} e^{(-\beta/x)} \] for \(x \geq 0, a > 0\) and \(\beta > 0\).

The mean and variance are

\[ E(X) = \frac{\beta}{a-1} \quad (a>1) ; \quad Var(X) = \frac{\beta^2}{(a-1)^2(a-2)} \quad (a>2) \]

1.4.1 True PDF

Plotting the Inverse-Gamma distribution

# # My Shiny App
# 
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# library(invgamma)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('alpha','alpha (shape)', min = 1, max = 12, value = 1, step = 1),
#     sliderInput('beta','beta (scale)', min = 0, max = 3, value = 0.5, step = 0.5)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
#   
#   mode <- reactive({input$beta/(input$alpha+1)})
# 
#   selectedData <- reactive({
#     data_frame(x = c(0, qinvgamma(p = 0.9, shape = input$alpha, scale = input$beta)))
#   })
#   
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mode(), linetype = "dashed", alpha = 0.4, color = "blue") +
#       stat_function(fun = dinvgamma, args = c(input$alpha,input$beta),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Inverse Gamma (alpha = ", input$alpha, ", beta = ", input$beta,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dinvgamma(x, alpha = ", input$alpha, ", beta = ", input$beta,")")) +
#       coord_cartesian(xlim = c(0,3)) +
#       coord_cartesian(ylim = c(0,5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       ) 
#       
#   })
#   
# }
# 
# shinyApp(ui = ui, server = server)

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

library(extraDistr)
mean <- 1/(b*(a-1))
var <- 1/(b^2*(a-1)^2*(a-2))
sd <- sqrt(1/(b^2*(a-1)^2*(a-2)))
q9999 <- extraDistr::qinvgamma(p = 0.9999, alpha = a, beta = 1/b) # shape and scale parameters

p <- data_frame(x = c(0, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = extraDistr::dinvgamma, args = c(alpha = a, beta = 1/b),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Inverse-Gamma (a = ", a, ", b = ", b,") Distribution")) +
    xlab("x") +
    ylab(paste0("dinvgamma")) +
    theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 0.25; True Variance: 0.0625"

1.4.2 Sampling

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

library(extraDistr)
# Built-in function
paste0("Built-in function:")

## [1] "Built-in function:"

set.seed(123)
x <- extraDistr::rinvgamma(n = 1e5, alpha = a, beta = 1/b) # shape and scale parameters

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"

# My R function
Draw_InvGamma_R <- function(n, shape, rate) {
  x <- rgamma(n = n, shape = shape, rate = 1/rate)
  y <- 1/x
  return(y)
}

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

paste0("My R function:")

## [1] "My R function:"

set.seed(123)
x <- Draw_InvGamma_R(n = 1e5, shape = a, rate = b)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"

# My Rcpp function
Draw_InvGamma_Rcpp <- cppFunction(depends = "RcppArmadillo",
'
  arma::Col<double> rrinvgamma(int n, double a, double b) {
    return(1/rgamma(n, a, b)); // shape and scale parameters
  }
'
)

a <- 3 # shape
b <- 2 # rate
beta <- 1/b # scale = 1/rate

paste0("My Rcpp function:")

## [1] "My Rcpp function:"

set.seed(123)
x <- Draw_InvGamma_Rcpp(1e5, a, b)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) extraDistr::dinvgamma(x, alpha = a, beta = 1/b), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 0.249672265609684; Variance: 0.0573891856334876"

1.5 Inverse Gaussian Distribution

Reference: John R. Michael, William R. Schucany, and Roy W. Haas (1976) Generating Random Variates Using Transformations with Multiple Roots. The American Statistician 30: 88-90.

The Inverse Gaussian distribution (also called the Wald distribution) with parameters location \(= \mu\) and shape \(= \lambda\) has density \[ f(x \mid \mu, \lambda) = \frac{\lambda}{\sqrt{2\pi x^3}}exp\left(\frac{-\lambda(x-\mu)^2}{2\mu^2x}\right) \] for \(x > 0, \mu > 0\) and \(\lambda > 0\)

The mean and variance are \[ E(X) = \mu; \quad Var(X) = \frac{\mu^3}{\lambda} \]

1.5.1 True PDF

Plotting the \(IG(\mu, \lambda)\) distribution

library(extraDistr)
# dwald(x, mu, lambda, log = FALSE)

mu <- 1
lambda <- 1

mean <- mu
var <- mu^3/lambda
sd <- sqrt(mu^3/lambda)

p <- data_frame(x = c(0, mean + 15*sd)) %>% 
  ggplot(aes(x = x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
  stat_function(fun = dwald, args = c(mu = mu, lambda = lambda),
                color = "#84CA72", size = 1) +
  ggtitle(paste0("Inverse Gaussian (mu = ", mu, ", lambda = ", lambda,") Distribution")) +
  xlab("x") +
  ylab(paste0("dInverseGaussian")) +
  theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 1; True Variance: 1"

1.5.2 Sampling

library(extraDistr)
# Built-in function
paste0("Built-in function:")

## [1] "Built-in function:"

set.seed(123)
x <- rwald(n = 1e5, mu = 1, lambda = 1)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 1.00506842306895; Variance: 1.02736098395571"

# My R function
Draw_IG <- function(n, mu, lambda) {
  
  y <- rep(NA, n)
  
  for (i in 1:n) {
    # First generate a chi_square(1) variate
    v0 <-  rnorm(1,0,1)^2

    # Now find the two roots in Eq. (5) of the paper
    x1 <-  mu + (0.5*(mu^2)*v0)/lambda - (0.5*mu/lambda)*sqrt(4*mu*lambda*v0 + (mu^2)*(v0^2));
    x2 <-  (mu^2)/x1

    # Accept x1 with probability p1_v0, otherwise accept x2
    p1_v0 <-  mu/(mu+x1)
    
    if (runif(1) <= p1_v0) {
      y[i] <- x1
    } else {
      y[i] <- x2
    }
    
  }
  
  return(y)
}

paste0("My R function:")

## [1] "My R function:"

set.seed(123)
x <- Draw_IG(n = 1e5, mu = 1, lambda = 1)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 1.00350059340242; Variance: 1.01015584280717"

library(Rcpp)
library(RcppArmadillo)

Dfuns <- cppFunction(depends = "RcppArmadillo",
'
arma::Col<double> rrinvgauss(int n, double mu, double lambda){
    arma::Col<double> random_vector(n);
    double z,y,x,u;
    for(int i=0; i<n; ++i){
        z=R::rnorm(0,1);
        y=z*z;
        x=mu+0.5*mu*mu*y/lambda - 0.5*(mu/lambda)*sqrt(4*mu*lambda*y+mu*mu*y*y);
        u=R::runif(0,1);
        if(u <= mu/(mu+x)){
            random_vector(i)=x;
        }else{
            random_vector(i)=mu*mu/x;
        };
    }
    return(random_vector);
}
'           
)

paste0("My Rcpp function:")

## [1] "My Rcpp function:"

set.seed(123)
x <- Dfuns(n = 1e5, mu = 1, lambda = 1)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green')  +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 1.00350059340242; Variance: 1.01015584280717"

1.6 Exponential Distribution

The exponential distribution with parameter rate = \(r\) has density

\[ f(x \mid r) = r e^{-r x} \]

for \(x \in [0, \infty), r > 0\).

The mean and variance are

\[ E(X) = \frac{1}{r}; \quad Var(X) = \frac{1}{r^2} \]

1.6.1 True PDF

Plotting the Exponential distribution

# # My Shiny App
# 
# library(shiny)
# library(ggplot2)
# library(tidyverse)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('lambda','lambda (rate)', min = 0.5, max = 2, value = 1, step = 0.5),
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
#   
#   mean <- reactive({1/input$lambda})
#   sd <- reactive({1/input$lambda})
# 
#   selectedData <- reactive({
#     data_frame(x = c(0, qexp(p = 0.9999, rate = input$lambda)))
#   })
#   
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dexp, args = c(input$lambda),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Exponential (lambda = ", input$lambda,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dexp(x, lambda = ", input$lambda,")")) +
#       coord_cartesian(ylim = c(0,2)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       ) 
#       
#   })
#   
# }
# 
# shinyApp(ui = ui, server = server)

# dexp(x, rate = 1, log = FALSE)
r <- 1.5 # rate

mean <- 1/r
var <- 1/r^2
sd <- 1/r
q9999 <- qexp(p = 0.9999, rate = r)

p <- data_frame(x = c(0, q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dexp, args = c(rate = r),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Exponential (r = ", r,") Distribution")) +
    xlab("x") +
    ylab(paste0("dexp")) +
    theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 0.666666666666667; True Variance: 0.444444444444444"

1.6.2 Sampling

r <- 1.5 # rate

# Built-in function
paste0("Built-in R function:")

## [1] "Built-in R function:"

# rexp(n, rate = 1)

set.seed(123)
x <- rexp(n = 1e5, rate = r) 

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) dexp(x, rate = r), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: 0.664974647598273; Variance: 0.439985915259907"

1.7 Laplace Distribution

The Laplace distribution with parameters location = \(\mu\) and scale = \(s\) has density

\[ f(x \mid \mu, s) = \frac{1}{2s} exp\left(-\frac{\mid x - \mu \mid}{s}\right) \]

which can be written as the following Normal-Exponential mixture (Double Exponential)

\[ f(x \mid \mu, s) = \int_{0}^{\infty}\frac{1}{\sqrt{2 \pi t}} exp \left(-\frac{(x-\mu)^2}{2t}\right) \frac{1}{2s^2} exp\left(-\frac{t}{2s^2}\right) dt \]

The mean and variance are

\[ E(X) = \mu; \quad Var(X) = 2 s^2 \]

The Laplace distribution with parameters location = \(\mu\) and rate = \(\lambda\) has density

\[ f(x \mid \mu, \lambda) = \frac{\lambda}{2} exp\left(-\lambda\mid x - \mu \mid\right) \]

which can be written as the following Normal-Exponential mixture (Double Exponential)

\[ f(x \mid \mu, \lambda) = \int_{0}^{\infty}\frac{1}{\sqrt{2 \pi t}} exp \left(-\frac{(x-\mu)^2}{2t}\right) \frac{\lambda^2}{2} exp\left(-\frac{\lambda^2t}{2}\right) dt \]

The mean and variance are

\[ E(X) = \mu; \quad Var(X) = \frac{2}{\lambda^2} \]

Note: Representation from Park and Casella (2008)

\[ \frac{a}{2} e^{-a \mid z \mid} = \int_{0}^{\infty} \frac{1}{\sqrt{2\pi s}} e^{-z^2/(2s^2)} \frac{a^2}{2}e^{(-a^2s/2)}ds \] for \(a>0\)

1.7.1 True PDF

Plotting the \(Laplace(\mu, \lambda)\) distribution

library(rmutil) # for double-exponential 
library(LaplacesDemon)

# library(shiny)
# library(ggplot2)
# library(tidyverse)
# library(rmutil) # for double-exponential 
# library(LaplacesDemon)
# 
# ui <- fluidPage(
#   column(4,
#     wellPanel(
#     sliderInput('mu','mu (location)', min = -3, max = 3, value = 0, step = 1),
#     sliderInput('lambda','lambda (scale)', min = 0, max = 10, value = 1, step = 1)
#     )
#   ),
#   column(8,
#     plotOutput('plotDensity')
#   )
# )
# 
# server <- function(input,output) {
# 
#   mean <- reactive({input$mu})
#   sd <- reactive({sqrt(2*input$lambda^2)})
#   q9999 <- reactive({qlaplace(p = 0.9999, location = input$mu, scale = input$lambda)})
# 
#   selectedData <- reactive({
#     data_frame(x = c(c(-q9999(),q9999())))
#   })
# 
#   output$plotDensity <- renderPlot({
#     ggplot(selectedData(), aes(x=x)) +
#       geom_vline(xintercept = mean(), linetype = "dashed", alpha = 0.4) +
#       stat_function(fun = dlaplace, args = c(location = input$mu, scale = input$lambda),
#                     color = "#84CA72", size = 1) +
#       labs(caption = paste0("Laplace (mu = ", input$mu, ", lambda = ", input$lambda,") Distribution")) +
#       xlab("x") +
#       ylab(paste0("dgamma(x, Laplace (mu = ", input$mu, ", lambda = ", input$lambda,")")) +
#       coord_cartesian(ylim = c(0,0.5)) +
#       theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )
# 
#   })
# 
# }
# 
# shinyApp(ui = ui, server = server)

library(LaplacesDemon)

# dlaplace(x, location=0, scale=1, log=FALSE)
mu <- 0 # location
lambda <- 2 # rate
scale <- 1/lambda # scale = 1/rate

mean <- mu
var <- 2/lambda^2
sd <- sqrt(2/lambda^2)
q9999 <- LaplacesDemon::qlaplace(p = 0.9999, location = mu, scale = 1/lambda)

p <- data_frame(x = c(-q9999,q9999)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = LaplacesDemon::dlaplace, args = c(location = mu, scale = 1/lambda),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("Laplace (mu = ", mu, ", lambda = ", lambda,") Distribution")) +
    xlab("x") +
    ylab(paste0("dlaplace")) +
  theme(plot.title = element_text(hjust = 0.5))

paste0("True Mean: ", mean, "; True Variance: ", var)

## [1] "True Mean: 0; True Variance: 0.5"

1.7.2 Sampling

library(LaplacesDemon)

# Built-in function
paste0("Built-in function:")

## [1] "Built-in function:"

# rlaplace(n, location=0, scale=1)
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate

set.seed(123)
x <- LaplacesDemon::rlaplace(n = 1e5, location = mu, scale = 1/lambda)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) LaplacesDemon::dlaplace(x, location = mu, scale = 1/lambda), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: -0.000332018113338988; Variance: 0.495343727783127"

# My R function
Draw_Laplace_R <- function(n, mu, lambda) {
  t <- rexp(n = n, rate = lambda^2/2)
  
  x <- rep(NA,n)
  for (i in 1:n) {
    x[i] <- rnorm(1, mean = mu, sd = sqrt(t[i]))
  }
  
  return(x)
}

# Draw_Laplace2 <- function(n, mu, lambda) {
#   t <- rexp(n = n, rate = lambda^2/2)
#   
#   library(purrr)
#   x <- map_dbl(t, ~ rnorm(1, mu, sd = sqrt(.x)))
#   
#   return(x)
# }


paste0("My R function:")

## [1] "My R function:"

mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate

set.seed(123)
x <- Draw_Laplace_R(n = 1e5, mu = mu, lambda = lambda)

ggplot(data_frame(x), aes(x=x)) + 
  geom_density(color = 'green') +
  geom_vline(xintercept = mean(x), linetype = "dashed", alpha = 0.4) +
  stat_function(fun = function(x) LaplacesDemon::dlaplace(x, location = mu, scale = 1/lambda), color = "red", alpha = 0.4)

paste0("Mean: ", mean(x), "; Variance: ", var(x))

## [1] "Mean: -0.00254867097691125; Variance: 0.50212479220306"

1.8 Assymmetric Laplace distribution

The Assymmetric Laplace distribution with parameters location = \(\mu\), scale = \(\sigma\) and skewness = \(\tau\) has density

\[ f(x \mid \mu, \sigma, \tau) = \frac{\tau(1-\tau)}{\sigma} exp\left\{-\rho_\tau \left(\frac{x-\mu}{\sigma}\right)\right\} \] where \[ \rho_\tau(x) = x(\tau - I(x<0)) \] can be written as a normal-exponential mixture of the form

\[ f(x \mid \mu, \sigma, \tau) = \]

library(ald)
# dALD(y, mu = 0, sigma = 1, p = 0.5)
# pALD(q, mu = 0, sigma = 1, p = 0.5, lower.tail = TRUE)
# qALD(prob, mu = 0, sigma = 1, p = 0.5, lower.tail = TRUE)
# rALD(n, mu = 0, sigma = 1, p = 0.5)


# dALD(y, mu = 0, sigma = 1, p = 0.5)

mu <- 0 # location
sigma <- 1 # scale
tau <- 0.8

p <- data_frame(x = seq(-40,80,0.5)) %>% 
  ggplot(aes(x=x))

p + geom_vline(xintercept = mu, linetype = "dashed", alpha = 0.4) +
    stat_function(fun = dALD, args = c(mu = mu, sigma = sigma, p = tau),
                  color = "#84CA72", size = 1) +
    ggtitle(paste0("ALD (mu = ", mu, ", sigma = ", sigma,", tau = ", tau, " ) Distribution")) +
    xlab("x") +
    ylab(paste0("dALD")) +
  theme(plot.title = element_text(hjust = 0.5))

1.9 Binomial Distribution

The binomial distribution with size = \(n\) and prob = \(p\) has density \[ p(x) = \left(\begin{array} n \\ x \end{array}\right) \] for \(x = 0,\ldots,n\).

1.9.1 True PDF

rbinom(4, size=1, prob=0.5)

## [1] 1 1 0 1

dbinom(4, size=12, prob=0.2)

## [1] 0.1328756

2 Bayesian Shrinkage Priors

2.1 `CTS-studt`

The Normal-Inv-Gamma prior is the scale mixture of Normals representation of the fat-tailed Student-t distribution. This hierarchical prior, which is also called “sparse Bayesian Learning” prior in signal processing, takes the form

\[ \mathbf{\beta} \mid \{\tau_j^2\}_{j=1}^p, \sigma^2 \sim N_{p}(\mathbf{0}, \sigma^2 \mathbf{Q}), \] \[ \tau_j^2 \sim inv-Gamma(a,b), \quad \text{for} \quad j = 1,\ldots,p, \] \[ \sigma^2 \sim \frac{1}{\sigma^2} \] where \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\)

The conditional posteriors are the form

\[ \mathbf{\beta} \mid \bullet \sim N_p(\mathbf{V}\times \mathbf{X'y}, \sigma^2\mathbf{V}), \] \[ \tau_j^2 \mid \bullet \sim Inv-Gamma\left(a + \frac{1}{2}, b + \frac{\beta_j^2}{2\sigma^2}\right), \quad \text{for} \quad j = 1,\ldots,p, \]

\[ \sigma^2 \mid \bullet \sim Inv-Gamma\left(\frac{n+p}{2},\frac{\psi + \mathbf{\beta'}\mathbf{Q}^{-1}\mathbf{\beta}}{2}\right) \]

where \(\mathbf{V} = (\mathbf{X'X} + \mathbf{Q}^{-1})^{-1}\), \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\) and \(\psi = (\mathbf{y} - \mathbf{X\beta})'(\mathbf{y} - \mathbf{X\beta})\)

# Student-t distribution ====

# df <- 6
# ncp <- NA # non-centrality parameter
# 
# mean <- 0
# sd <- sqrt(df/(df-2))
# 
# p <- data_frame(x = c(mean - 4*sd, mean + 4*sd)) %>%
#   ggplot(aes(x=x))
# 
# p + geom_vline(xintercept = mean, linetype = "dashed", alpha = 0.4) +
#     stat_function(fun = dt, args = c(df),
#                   color = "#84CA72", size = 1) +
#     labs(caption = paste0("t Distribution (df = ", df,")")) +
#     xlab("x") +
#     ylab(paste0("dt(x, df = ", df,")")) +
#     coord_cartesian(ylim = c(0,0.5)) +
#     theme(
#         plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
#       )

dt2 <- function(df) {
  function (x) dt(x, df = df)
}

stat_studt <- function(df) {
  stat_function(aes(colour = df), fun = dt2(df), size = 0.5)
}

ggplot(data_frame(x = c(-5,5)), aes(x)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
  lapply(c(2,6,10), stat_studt) +
  scale_colour_viridis_c(limits = c(2,10)) +
    labs(caption = paste0("t Distribution (df)")) +
    xlab("x") +
    ylab(paste0("dt(x, df)")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

# CTS-studt prior ====
ndraws <- 1e5

a <- 1
b <- 0.001
tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
library(purrr)
x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))


ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
  xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("CTS-studt prior (a = ", a, ", b = ", b, ")")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

# ggplot(data_frame(x), aes(x=x)) +
#   stat_density(geom = "line", position = "identity") +
#   xlim(-10,10) + coord_cartesian(c(-5,5))

# Choose hyper-parameters
library(purrr)
ndraws <- 1e5

CTS_studt_pa1 <- function(a){
  set.seed(123)
  ndraws <- 1e5
  b <- 0.001
  tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
  x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa1 <- lapply(c(1,2,3), CTS_studt_pa1)
df2_pa1 <- data_frame(x = unlist(df1_pa1), a = factor(rep(c(1,2,3), each = ndraws)))

plot_pa1 <- ggplot(df2_pa1, aes(x=x, color = a)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa1 <- ggplot(df2_pa1, aes(x=x, color = a)) +
  geom_line(aes(y = 1 - ..y.., linetype = a), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 1.5), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

CTS_studt_pa2 <- function(b){
  set.seed(123)
  ndraws <- 1e5
  a <- 1
  tausq <- 1/rgamma(ndraws, shape = a, scale = 1/b)
  x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa2 <- lapply(c(0.001,0.01,0.1), CTS_studt_pa2)
df2_pa2 <- data_frame(x = unlist(df1_pa2), b = factor(rep(c(0.001,0.01,0.1), each = ndraws)))

plot_pa2 <- ggplot(df2_pa2, aes(x=x, color = b)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa2 <- ggplot(df2_pa2, aes(x=x, color = b)) +
  geom_line(aes(y = 1 - ..y.., linetype = b), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 1.5), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian  
# percentage of obs removed
sum((df2_pa2$x < 10) & (df2_pa2$x > -10))/length(df2_pa2$x)*100

## [1] 99.961

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

grid.arrange(plot_pa1,plot_pa2, ncol = 2)

# tail behavior
grid.arrange(tail_pa1,tail_pa2, ncol = 2)

2.2 `CTS-lasso`

Reference: Park and Casella (2008) - The Bayesian Lasso

The Laplace prior takes the form \[ \mathbf{\beta} \mid \{\tau_j^2\}_{j=1}^p, \sigma^2 \sim N_p(\mathbf{0}, \sigma^2\mathbf{Q}), \] \[ \tau_j^2 \mid \lambda^2 \sim Exponential\left(\frac{\lambda^2}{2}\right), \quad \text{for} \quad j = 1,\ldots, p, \]

\[ \lambda^2 \sim Gamma(c,d) \]

\[ \sigma^2 \sim \frac{1}{\sigma^2} \]

where \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\)

The conditional posteriors are of the form \[ \mathbf{\beta} \mid \bullet \sim N_p\left(\mathbf{V}\times \mathbf{X}'\mathbf{y}, \sigma^2 \mathbf{V}\right) \] \[ \frac{1}{\tau_j^2} \mid \bullet \sim IG\left(\sqrt{\frac{\lambda^2\sigma^2}{\beta_j^2}}, \lambda^2\right), \quad \text{for} \quad j = 1,\ldots,p, \]

\[ \lambda^2 \mid \bullet \sim Gamma\left(c + p, d + \frac{\sum_{j=1}^p\tau_j^2}{2}\right) \]

\[ \sigma^2 \mid \bullet \sim Inv-Gamma\left(\frac{n+p}{2},\frac{\psi + \mathbf{\beta'}\mathbf{Q}^{-1}\mathbf{\beta}}{2}\right) \]

where \(\mathbf{V} = (\mathbf{X'X} + \mathbf{Q}^{-1})^{-1}\), \(\mathbf{Q} = diag(\tau_1^2, \ldots, \tau_p^2)\) and \(\psi = (\mathbf{y} - \mathbf{X\beta})'(\mathbf{y} - \mathbf{X\beta})\)

2.2.1 Version 1: Double-exponential (Laplace) prior

# Double exponential prior ====
ndraws <- 1e5
mu <- 0 # location
lambda <- 2 # rate
s <- 1/lambda # scale = 1/rate

tausq <-  rexp(ndraws, rate = lambda^2/2)
library(purrr)
x <- map_dbl(tausq, ~ rnorm(1, mean = mu, sd = sqrt(.x)))

ggplot(data_frame(x), aes(x=x)) +
  geom_density(fill = "lightgreen") +
  stat_function(fun = LaplacesDemon::dlaplace, args = c(location = mu, scale = 1/lambda),
                  color = "red", size = 1) +
    labs(caption = paste0("Laplace prior (rate = ", lambda, ")")) +
  xlab("x") +
  ylab(paste0("density")) +
  theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
  )

# Choice of rate ====
library(LaplacesDemon)

dlaplace2 <- function(lambda) {
  function (x) LaplacesDemon::dlaplace(x, location = 0, scale = 1/lambda)
}

stat_laplace <- function(lambda) {
  stat_function(aes(colour = lambda), 
                fun = dlaplace2(lambda), 
                size = 0.5) 
}

ggplot(data_frame(x = c(-5,5)), aes(x=x)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
  lapply(c(2,1,0.1), stat_laplace) +
  scale_colour_viridis_c(limits = c(0.1,2)) +
    labs(caption = paste0("Laplace Prior (lambda)")) +
    xlab("x") +
    ylab(paste0("dlapalce")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
    )

2.2.2 Version 2: Hierarchical prior

# Hierarchical prior ====
ndraws <- 1e5

library(purrr)
c <- 1
d <- 1.05

lambdasq <- rgamma(ndraws, shape = c, rate = d)
tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))

# c(mean(lambdasq), sd(lambdasq))
rate <- sqrt(mean(lambdasq)) # for corresponding Laplace distribution

ggplot(data_frame(x), aes(x=x)) +
  geom_density(fill = "lightgreen") +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.4) +
  stat_function(fun = LaplacesDemon::dlaplace, args = c(location = 0, scale = 1/rate),
                  color = "red", size = 0.5) +
  xlim(-10,10) + coord_cartesian(c(-5,5)) +
  labs(caption = paste0("Hierarchical prior (c = ", c, ", d = ", d, ")")) +
  xlab("x") +
  ylab(paste0("density")) +
  theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
  )

paste0("Corresponding Laplace distribution - Rate = ", rate)

## [1] "Corresponding Laplace distribution - Rate = 0.977472489051276"

# Choice of hyper-parameters
library(purrr)
ndraws <- 1e5

CTS_lasso_pa1 <- function(c) {
  set.seed(123)
  d = 1
  lambdasq <- rgamma(ndraws, shape = c, rate = d)
  tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
  x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa1 <- lapply(c(1,2,3), CTS_lasso_pa1)
df2_pa1 <- data_frame(x = unlist(df1_pa1), c = factor(rep(c(1,2,3), each = ndraws)))

plot_pa1 <- ggplot(df2_pa1, aes(x=x, color = c)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa1 <- ggplot(df2_pa1, aes(x=x, color = c)) +
  geom_line(aes(y = 1 - ..y.., linetype = c), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 2), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

CTS_lasso_pa2 <- function(d) {
  set.seed(123)
  c = 1
  lambdasq <- rgamma(ndraws, shape = c, rate = d)
  tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
  x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}

df1_pa2 <- lapply(c(1,0.5,0.1), CTS_lasso_pa2)
df2_pa2 <- data_frame(x = unlist(df1_pa2), d = factor(rep(c(1,0.5,0.1), each = ndraws)))

plot_pa2 <- ggplot(df2_pa2, aes(x=x, color = d)) +
  geom_density() +
  xlim(-10,10) + coord_cartesian(c(-5,5))

tail_pa2 <- ggplot(df2_pa2, aes(x=x, color = d)) +
  geom_line(aes(y = 1 - ..y.., linetype = d), stat='ecdf') +
  scale_linetype_manual(values = c(1, 2, 3)) +
  coord_cartesian(xlim = c(0, 2), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom") + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian  
# percentage of obs removed
# sum((df2_pa1$x < 10) & (df2_pa1$x > -10))/length(df2_pa1$x)*100
# sum((df2_pa2$x < 10) & (df2_pa2$x > -10))/length(df2_pa2$x)*100

library(gridExtra)
grid.arrange(plot_pa1,plot_pa2, ncol = 2)

# tail behavior
grid.arrange(tail_pa1,tail_pa2, ncol = 2)

2.3 `CTS-horse-mx`

ndraws = 1e5
library(purrr)

nu <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
lamdasq <- map_dbl(nu, ~ 1/rgamma(1, shape = 1/2, scale = .x))
xi <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
tausq <- map_dbl(xi, ~ 1/rgamma(1, shape = 1/2, scale = .x))
temp <- lamdasq*tausq
x <- map_dbl(temp, ~rnorm(1, mean = 0, sd = .x))

ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
    xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("CTS-horseshoe-mx prior")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

## Warning: Removed 24095 rows containing non-finite values (stat_density).

2.4 `CTS-horse-sl`

ndraws = 1e5
library(purrr)
lambda <- rhalfcauchy(ndraws, scale= 1)
tau <- map_dbl(lambda, ~rhalfcauchy(1, scale = .x))
x <- map_dbl(tau, ~rnorm(1, mean = 0, sd = .x))

ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
    xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("CTS-horseshoe-sl prior")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

## Warning: Removed 10430 rows containing non-finite values (stat_density).

2.5 `SSVS-normal`

2.6 `SSVS-studt`

ndraws <- 1e5

# SSVS-studt1
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))

tausq0 <- 1e-3
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))


# SSVS-studt2
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))

tausq0 <- tausq1*1e-3
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
#mean(tausq0) #0.012


# SSVS-studt3
alpha1 <- 1
beta1 <- 1
tausq1 <- 1/rgamma(ndraws, shape = alpha1, scale = 1/beta1)
library(purrr)
studt_slab <- map_dbl(tausq1, ~rnorm(1, mean = 0, sd = sqrt(.x)))

alpha0 <- 1
beta0 <- 0.0001
tausq0 <- 1/rgamma(ndraws, shape = alpha0, scale = 1/beta0)
library(purrr)
studt_spike <- map_dbl(tausq0, ~rnorm(1, mean = 0, sd = sqrt(.x)))
#mean(tausq0) #0.0035


pi <- rbern(1e5, prob = 0.2)
x <- pi*studt_slab + (1-pi)*studt_spike
# tmp <- runif(1e5)
# x <- (tmp<=0.2)*studt_slab + (tmp>0.2)*studt_spike
ggplot(data_frame(x), aes(x=x)) +
  geom_density(color = "red", fill = "lightgreen") +
  xlim(-10,10) + coord_cartesian(c(-5,5))+
    labs(caption = paste0("SSVS-studt prior")) +
    xlab("x") +
    ylab(paste0("density")) +
    theme(
        plot.caption = element_text(hjust = 0.5, size = 14, face ="italic")
      )

## Warning: Removed 195 rows containing non-finite values (stat_density).

2.7 `SSVS-lasso`

2.8 `SSVS-horse`

##### Sample from prior densities -----

set.seed(123)
ndraws <- 1e5

## ridge 
normal <- rnorm(ndraws, mean=0, sd=0.5)

## Student's t 
studt <- rt(ndraws, df=1) 

CTS_studt_fun <- function(alpha = 1, beta = 0.001, ndraws = 1e5) {
  library(purrr)
  tausq <- 1/rgamma(ndraws, shape = alpha, scale = 1/beta)
  x <- map_dbl(tausq, ~rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}
CTS_studt <- CTS_studt_fun(alpha = 1, beta = 0.001, ndraws = 1e5)


## lasso 
lasso <- rmutil::rlaplace(ndraws, m=0, s=0.5)

CTS_lasso_fun <- function (alpha = 1, sigma = 1, ndraws = 1e5) {
  library(purrr)
  lambdasq <- rgamma(ndraws, shape = alpha, scale = sigma)
  tausq <-  map_dbl(lambdasq, ~rexp(1, rate = .x/2))
  x <- map_dbl(tausq, ~ rnorm(1, mean = 0, sd = sqrt(.x)))
  return(x)
}
CTS_lasso <- CTS_lasso_fun(alpha = 1, sigma = 1, ndraws = 1e5)

## horseshoe

CTS_horsemx_fun <- function(ndraws = 1e5) {
  library(purrr)
  nu <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
  lamdasq <- map_dbl(nu, ~ 1/rgamma(1, shape = 1/2, scale = .x))
  xi <- 1/rgamma(ndraws, shape = 1/2, scale = 1)
  tausq <- map_dbl(xi, ~ 1/rgamma(1, shape = 1/2, scale = .x))
  temp <- lamdasq*tausq
  x <- map_dbl(temp, ~rnorm(1, mean = 0, sd = .x))
  return(x)
}
CTS_horsemx <- CTS_horsemx_fun(ndraws = 1e5)

CTS_horsesl_fun <- function(ndraws = 1e5) {
  library(purrr)
  lambda <- rhalfcauchy(ndraws, scale= 1)
  tau <- map_dbl(lambda, ~rhalfcauchy(1, scale = .x))
  x <- map_dbl(tau, ~rnorm(1, mean = 0, sd = .x))
  return(x)
}
CTS_horsesl <- CTS_horsesl_fun(ndraws = 1e5)

##### Plot -----
## Create plot data
df_comb <- data_frame(CTS_studt, CTS_lasso, CTS_horsemx, CTS_horsesl)
df_long <- gather(df_comb, Prior, value) # long format
df_long$Prior <- factor(df_long$Prior)
levels(df_long$Prior) <- list("CTS_studt" = "CTS_studt", "CTS_lasso" = "CTS_lasso", "CTS_horsemx" = "CTS_horsemx", "CTS_horsesl" = "CTS_horsesl")
df_long$asymp <- rep(NA, nrow(df_long)) 
df_long[which(df_long$Prior=="CTS_horsemx"|df_long$Prior=="CTS_horsesl"), "asymp"] <- 0

   
df_long <- gather(df_comb, Prior, value) %>% 
      mutate(Prior = factor(Prior, levels = list("CTS_studt" = "CTS_studt", "CTS_lasso" = "CTS_lasso", "CTS_horsemx" = "CTS_horsemx", "CTS_horsesl" = "CTS_horsesl")),
             asymp = case_when(Prior=="CTS_horsemx" ~ 0,
                               Prior=="CTS_horsesl" ~ 0,
                               TRUE ~ NA_real_))

plotComb <- ggplot(df_long, aes(x = value, colour = Prior, linetype = Prior)) +
  stat_density(geom = "line", position = "identity") +
  geom_vline(aes(xintercept = asymp), colour = "grey") +
  scale_linetype_manual(values = c(1,2,3,4)) +
  scale_color_manual(values = c(1,2,3,4)) +
  xlim(-10,10) + coord_cartesian(xlim=c(-5,5), ylim =c(0,5)) +
  theme(axis.title = element_blank(), legend.position = "bottom")
# xlim removes values outside the range, coord_cartesian does not, but behaves strange sometimes
# solution: restrict range with xlim first & then use coord-cartesian  

##### Tail behavior -----
# Plot the survival function, i.e., 1-CDF
# See https://stats.stackexchange.com/questions/86429/which-has-the-heavier-tail-lognormal-or-gamma

tailComb <- ggplot(df_long, aes(x = value, group = Prior)) +
  geom_line(aes(y = 1 - ..y.., linetype = Prior, colour = Prior), stat='ecdf') +
  scale_linetype_manual(values = c(1,2,3,4)) +
  scale_color_manual(values = c(1,2,3,4)) +
  coord_cartesian(xlim = c(0, 8), ylim = c(0, 0.5), expand = FALSE) +
  theme(axis.title = element_blank(), legend.position = "bottom", legend.title = element_blank()) + 
  guides(lty=guide_legend(nrow=1,byrow=TRUE))

library(gridExtra)
get_legend <- function(myggplot){
  require(gridExtra)
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

legend <- get_legend(plotComb)

## Warning: Removed 36684 rows containing non-finite values (stat_density).

## Warning: Removed 200000 rows containing missing values (geom_vline).

plotComb <- plotComb + theme(legend.position="none")
tailComb <- tailComb + theme(legend.position="none")
grid.arrange(plotComb,tailComb, legend, ncol = 2, nrow =2,
             layout_matrix = rbind(c(1,2),c(3,3)),
             heights = c(4,1))

## Warning: Removed 36684 rows containing non-finite values (stat_density).
## Removed 200000 rows containing missing values (geom_vline).

Appendix

# install.packages("pkgsearch")
library(pkgsearch)
invGammaPkg <- pkg_search(query = "inverse gamma", size = 200)

invGammaPkgShort <- invGammaPkg %>% 
  filter(maintainer_name != "ORPHANED", score > 1000) %>% 
  select(score, package, downloads_last_month) %>% 
  arrange(downloads_last_month)

head(invGammaPkgShort)

## # A data frame: 0 × 3
## # … with 3 variables: score <dbl>, package <chr>, downloads_last_month <int>

Reference

Park, Trevor, and George Casella. 2008. “The Bayesian Lasso.” Journal of the American Statistical Association 103 (482): 681–86.

Statistical Distributions and Shrinkage Priors

Duong Trinh

2023-03-12

1 Statistical Distributions

1.1 Normal Distribution

1.1.1 True PDF

1.1.2 Sampling

1.2 t distribution

1.2.1 True PDF

1.2.2 Sampling

1.3 Gamma Distribution

1.3.1 True PDF

1.3.2 Sampling

1.4 Inverse-Gamma Distribution

1.4.1 True PDF

1.4.2 Sampling

1.5 Inverse Gaussian Distribution

1.5.1 True PDF

1.5.2 Sampling

1.6 Exponential Distribution

1.6.1 True PDF

1.6.2 Sampling

1.7 Laplace Distribution

1.7.1 True PDF

1.7.2 Sampling

1.8 Assymmetric Laplace distribution

1.9 Binomial Distribution

1.9.1 True PDF

2 Bayesian Shrinkage Priors

2.1 CTS-studt

2.2 CTS-lasso

2.2.1 Version 1: Double-exponential (Laplace) prior

2.2.2 Version 2: Hierarchical prior

2.3 CTS-horse-mx

2.4 CTS-horse-sl

2.5 SSVS-normal

2.6 SSVS-studt

2.7 SSVS-lasso

2.8 SSVS-horse

Appendix

Reference

2.1 `CTS-studt`

2.2 `CTS-lasso`

2.3 `CTS-horse-mx`

2.4 `CTS-horse-sl`

2.5 `SSVS-normal`

2.6 `SSVS-studt`

2.7 `SSVS-lasso`

2.8 `SSVS-horse`