predmarg mpred 0.2.1

Generic function to preduce predictive margins

Description

Generic function to preduce predictive margins

Usage

predmarg(
  obj,
  settings,
  data,
  subset,
  groups = NULL,
  setup = NULL,
  cifunc = cinorm,
  level = 0.95,
  parallel = TRUE,
  mc.cores = if (.Platform$OS.type == "windows") 1L else max.cores,
  ...
)

Arguments

obj

a model object, e.g. returned by lm, glm, etc.

settings

an optional data frame of settings for independent variables.

data

an optional data frame for which the predictive margins are computed. If ommited, an attempt is made to obtain the data from the model object.

subset

an optional logical vector that defines a subset for which a predictive margin is computed

groups

a variable that defines groups for which predictive margines are computed. This variable has to have the same number of observations as the data to which the model was fitted.

setup

an optional expression that is evaluated for each setting, i.e. individually for each row of the settings data frame. Can be used to modify independent variables.

cifunc

a function to compute prediction intervals.

level

level of confidence intervals of predictions.

parallel

logical value that determines whether predictions for individual settings are computed in parallel. (Does not yet work on windows.)

mc.cores

number of CPU cores used for parallel processing.

...

optional vectors of values of independent variabls. These further arguments, if present, are used to create a data frame of settings, using expand.grid.

Value

a data frame with the following variables:

pred

the mean prediction for the setting of the independent variables

var.pred

the (estimated) variance of the mean prediction

se.pred

the standard error of prediction, i.e. the square root of the variance of the mean prediction

lower

lower prediction interval computed with qfunc

upper

upper prediction interval computed with qfunc

the independent variables for which values are set to create the predictions are also included in the resulting data frame.

Details

The generic function predmarg computes predictive margins for various settings of the independent variables. It is also possible to provide settings for independent variables that are included in the model, but that are used in the setup expression to transform independent variables. See the examples below.

Examples

library(magrittr)
# Simple linear regression

fm <- lm(weight ~ poly(height, 2), data = women)
pm <-predmarg(fm,
             height=seq(from=58,to=72,
                        length=10))
Using 12 cores ...
str(pm)
'data.frame':        10 obs. of  6 variables:
 $ pred    : num  115 119 123 128 133 ...
 $ var.pred: num  0.00983 0.00983 0.00983 0.00983 0.00983 ...
 $ se.pred : num  0.0992 0.0992 0.0992 0.0992 0.0992 ...
 $ lower   : num  115 119 123 127 132 ...
 $ upper   : num  115 119 123 128 133 ...
 $ height  : num  58 59.6 61.1 62.7 64.2 ...
plot(pred~height,data=pm,
    type="l")
/software/mpred/manual/predmarg/predmarg_0001.svg
with(women, points(height,weight))
with(pm, lines(height,lower,lty=2))
with(pm, lines(height,upper,lty=2))

# Logistic regression

library(carData)
Chile %<>% within({
   vote2 <- factor(vote,levels=c("N","Y"))
   vote2 <- as.integer(vote2=="Y")
})
glm.Chile.1 <- glm(vote2~sex+age+income+education,
                  data=Chile,
                  family=binomial)
summary(glm.Chile.1)
Call:
glm(formula = vote2 ~ sex + age + income + education, family = binomial,
    data = Chile)

Deviance Residuals:
    Min       1Q   Median       3Q      Max
-1.6676  -1.1139  -0.7329   1.1194   1.7335

Coefficients:
              Estimate Std. Error z value Pr(>|z|)
(Intercept)  1.053e-01  1.786e-01   0.589 0.555615
sexM        -5.415e-01  1.009e-01  -5.365 8.10e-08 ***
age          1.203e-02  3.594e-03   3.347 0.000817 ***
income       4.481e-06  1.342e-06   3.339 0.000840 ***
educationPS -1.125e+00  1.656e-01  -6.791 1.12e-11 ***
educationS  -6.369e-01  1.207e-01  -5.276 1.32e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2361.7  on 1703  degrees of freedom
Residual deviance: 2239.0  on 1698  degrees of freedom
  (996 observations deleted due to missingness)
AIC: 2251

Number of Fisher Scoring iterations: 4
pm.Chile.1.income <- predmarg(glm.Chile.1,
                             income=seq(from=2500,to=200000,length=20))
Using 12 cores ...
plot(pred~income,data=pm.Chile.1.income,
    type="l")
/software/mpred/manual/predmarg/predmarg_0002.svg
# Baseline category logit

library(mclogit)
Loading required package: Matrix
library(MASS)
mb.Chile <- mblogit(vote~statusquo,
                   data=Chile)
Iteration 1 - Deviance = 4528.43
Iteration 2 - Deviance = 4394.717
Iteration 3 - Deviance = 4383.817
Iteration 4 - Deviance = 4383.674
Iteration 5 - Deviance = 4383.674
converged
pm.mb.Chile <- predmarg(mb.Chile,
                       statusquo=seq(from=-2,to=2,length=20))
Using 12 cores ...
str(pm.mb.Chile)
'data.frame':        80 obs. of  8 variables:
 $ pred     : num  0.018123 0.950795 0.030231 0.000851 0.025456 ...
 $ var.pred : num  3.78e-06 1.77e-05 7.00e-06 7.21e-09 7.21e-06 ...
 $ se.pred  : num  1.94e-03 4.20e-03 2.65e-03 8.49e-05 2.68e-03 ...
 $ lower    : num  0.014315 0.942554 0.025044 0.000685 0.020194 ...
 $ upper    : num  0.02193 0.95904 0.03542 0.00102 0.03072 ...
 $ eqnum    : int  1 2 3 4 1 2 3 4 1 2 ...
 $ statusquo: num  -2 -2 -2 -2 -1.79 ...
 $ response : Factor w/ 4 levels "A","N","U","Y": 1 2 3 4 1 2 3 4 1 2 ...
library(ggplot2)
Attaching package: 'ggplot2'

The following object is masked from 'package:memisc':

    syms

The following object is masked from 'package:crayon':

    %+%
(ggplot(pm.mb.Chile,
      aes(x=statusquo,
          y=pred,
          fill=response
          )
      ) + geom_area())
/software/mpred/manual/predmarg/predmarg_0003.svg
(ggplot(pm.mb.Chile,
      aes(x=statusquo,
          y=pred,
          ymin=lower,
          ymax=upper
          )
      ) + geom_line() +geom_ribbon(alpha=.25) + facet_grid(~response))
/software/mpred/manual/predmarg/predmarg_0004.svg
mb.hs <- mblogit(Sat~Infl+Type+Cont,weights=Freq,
                data=housing)
Iteration 1 - Deviance = 3493.764
Iteration 2 - Deviance = 3470.111
Iteration 3 - Deviance = 3470.084
Iteration 4 - Deviance = 3470.084
converged
pm.mb.hs <- predmarg(mb.hs,
                    Infl=levels(Infl),
                    Type=levels(Type))
Using 12 cores ...
dodge <- position_dodge(width=.8)
(ggplot(pm.mb.hs)
   +facet_wrap(~Type)
   +geom_bar(
        aes(fill=response,
            x=Infl,
            y=pred),
        stat='identity',position=dodge,width=.8)
   +geom_errorbar(
        aes(x=Infl,
            ymin=lower,
            ymax=upper,group=response),
        position=dodge,width=.4))
/software/mpred/manual/predmarg/predmarg_0005.svg
# Baseline category logit with random effects

# Some artificial data
exadata <- local({
   B <- cbind(c(-.5,.3),
              c(.5,-.5))
   set.seed(42)
   x <- rnorm(n=60)
   X <- cbind(1,x)
   Eta <- X %*% B
   j <- rep(1:10,6)
   jf <- as.factor(j)
   u1 <- rnorm(n=10,sd=.8)
   u2 <- rnorm(n=10,sd=.8)
   Eta <- Eta + cbind(u1[j],x*u2[j])
   expEta <- cbind(1,exp(Eta))
   sum.expEta <- rowSums(expEta)
   pi <- expEta/sum.expEta
   Y <- t(apply(pi,1,rmultinom,n=1,size=300))
   res <-data.frame(Y,x,j,jf)
   names(res)[1:3] <- paste0("y",1:3)
   res
})

# Baseline logit model with random intercepts and random slopes
mbrsl <- mblogit(cbind(y1,y2,y3)~x,data=exadata,
                random = ~1+x|j)
Iteration 1 - deviance = 288.6454 - criterion = 0.2327619
Iteration 2 - deviance = 280.0621 - criterion = 0.001978202
Iteration 3 - deviance = 280.4586 - criterion = 2.825173e-05
Iteration 4 - deviance = 280.673 - criterion = 1.567796e-06
Iteration 5 - deviance = 280.7528 - criterion = 1.658424e-07
Iteration 6 - deviance = 280.7821 - criterion = 2.001306e-08
Iteration 7 - deviance = 280.793 - criterion = 2.668305e-09
converged
summary(mbrsl)
Call:
mblogit(formula = cbind(y1, y2, y3) ~ x, data = exadata, random = ~1 +
    x | j)

Equation for y2 vs y1:
            Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.10133    0.17623  -0.575    0.565
x            0.33688    0.06356   5.300 1.16e-07 ***

Equation for y3 vs y1:
            Estimate Std. Error z value Pr(>|z|)
(Intercept)   0.4734     0.0369  12.831  < 2e-16 ***
x            -0.6745     0.2047  -3.295 0.000986 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Co-)Variances:
Grouping level: 1
Estimate Std.Err.
y2~1 0.304766 1.886e-02
y3~1 0.001329 0.009325 5.606e-04 2.324e-05
y2~x -0.020697 0.007922 0.034540 3.405e-03 1.240e-04 7.388e-04
y3~x 0.113854 0.011080 -0.050176 0.413338 2.435e-02 9.316e-04 5.361e-03
  3.936e-02

Null Deviance:     5127
Residual Deviance: 280.8
Number of Fisher Scoring iterations:  7
Number of observations:  60
# Predictive margins for values of x
pm.mbrsl <- predmarg(mbrsl,x=seq(from=min(x),to=max(x),length=24))
Using 12 cores ...
(ggplot(pm.mbrsl,
      aes(x=x,
          y=pred,
          fill=response
          )
      ) + geom_area())
/software/mpred/manual/predmarg/predmarg_0006.svg
# Predictive margins for the random effects
pm.mbrsl.j <- predmarg(mbrsl,j=1:10)
Using 12 cores ...
(ggplot(pm.mbrsl.j,
      aes(x=j,
          y=pred,
          fill=response
          )
      ) + geom_bar(position="fill",stat="identity"))
/software/mpred/manual/predmarg/predmarg_0007.svg
pm.mbrsl.jx <- predmarg(mbrsl,
                   j=1:10,
                   x=seq(from=min(x),to=max(x),
                         length=24))
Using 12 cores ...
(ggplot(pm.mbrsl.jx,
      aes(x=x,
          y=pred,
          fill=response
          )
      ) + geom_area()
   + facet_wrap(~j))
/software/mpred/manual/predmarg/predmarg_0008.svg