Ich hatte die gleiche Frage. Im Folgenden habe ich eine Funktion aus dem mfx-Paket modifiziert, um marginale Effekte mit Daten zu berechnen, die als Vermessungsobjekt organisiert sind. Ich habe nicht viel getan, hauptsächlich "mean()" und ähnliche Befehle, die auf Nicht-Umfragedaten mit den Äquivalenten des Umfragepakets ausgeführt werden. Nach dem modifizierten mfx-Code gibt es Code, der ein Beispiel ausführt.
Hintergrund
Details zum mfx-Paket von Alan Fernihough: https://cran.r-project.org/web/packages/mfx/mfx.pdf
-Code für das mfx-Paket auf Github (die Dateien, die ich sind probitmfxest.r und probitmfx.r modifiziert): https://github.com/cran/mfx/tree/master/R
Im mfx-Rechner habe ich viel von der Flexibilität auskommen lassen, die in die ursprünglichen Funktionen eingebaut wurde, die verschiedene Annahmen über Cluster und robuste SEs behandelten. Ich könnte mich irren, aber ich denke, dass diese Probleme bereits mit dem Regressionsschätzungsbefehl aus dem Umfragepaket svyglm() berücksichtigt werden.
Marginale Effekte Rechner
library(survey)
probitMfxEstSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in the original mfx package
){
if(is.null(formula)){
stop("model formula is missing")
}
for(i in 1:length(class(design))){
if(!((class(design)[i] %in% "survey.design2") | (class(design)[i] %in% "survey.design"))){
stop("design arguement must contain survey object")
}
}
# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# # cluster sort part
# if(is.null(clustervar1) & !is.null(clustervar2)){
# stop("use clustervar1 arguement before clustervar2 arguement")
# }
# if(!is.null(clustervar1)){
# if(is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),data[,clustervar1])
# names(data)[dim(data)[2]] = clustervar1
# data=na.omit(data)
# }
# if(!is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# if(!(clustervar2 %in% names(data))){
# stop("clustervar2 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),
# data[,c(clustervar1,clustervar2)])
# names(data)[c(dim(data)[2]-1):dim(data)[2]] = c(clustervar1,clustervar2)
# data=na.omit(data)
# }
# }
# fit the probit regression
fit = svyglm(formula,
design=design,
family = quasibinomial(link = "probit"),
x=T
)
# TS: summary(fit)
# terms needed
x1 = model.matrix(fit)
if (any(alias <- is.na(coef(fit)))) { # this conditional removes any vars with a NA coefficient
x1 <- x1[, !alias, drop = FALSE]
}
xm = as.matrix(svymean(x1,design)) # calculate means of x variables
be = as.matrix(na.omit(coef(fit))) # collect coefficients: be as in beta
k1 = length(na.omit(coef(fit))) # collect number of coefficients or x variables
xb = t(xm) %*% be # get the matrix product of xMean and beta, which is the model prediction at the mean
fxb = ifelse(atmean==TRUE, dnorm(xb), mean(dnorm(x1 %*% be))) # collect either the overall predicted mean, or the average of every observation's predictions
# get variances
vcv = vcov(fit)
# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# if(robust){
# if(is.null(clustervar1)){
# # white correction
# vcv = vcovHC(fit, type = "HC0")
# } else {
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# }
#
# if(robust==FALSE & is.null(clustervar1)==FALSE){
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# set mfx equal to predicted mean (or other value) multiplied by beta
mfx = data.frame(mfx=fxb*be, se=NA)
# get standard errors
if(atmean){# fxb * id matrix - avg model prediction * (beta X xmean)
gr = as.numeric(fxb)*(diag(k1) - as.numeric(xb) *(be %*% t(xm)))
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
} else {
gr = apply(x1, 1, function(x){
as.numeric(as.numeric(dnorm(x %*% be))*(diag(k1) - as.numeric(x %*% be)*(be %*% t(x))))
})
gr = matrix(apply(gr,1,mean),nrow=k1)
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
}
# pick out constant and remove from mfx table
temp1 = apply(x1,2,function(x)length(table(x))==1)
const = names(temp1[temp1==TRUE])
mfx = mfx[row.names(mfx)!=const,]
# pick out discrete change variables
temp1 = apply(x1,2,function(x)length(table(x))==2)
disch = names(temp1[temp1==TRUE])
# calculate the disctrete change marginal effects and standard errors
if(length(disch)!=0){
for(i in 1:length(disch)){
if(atmean){
disx0 = disx1 = xm
disx1[disch[i],] = max(x1[,disch[i]])
disx0[disch[i],] = min(x1[,disch[i]])
# mfx equal to prediction @ x=1 minus prediction @ x=0
mfx[disch[i],1] = pnorm(t(be) %*% disx1) - pnorm(t(be) %*% disx0)
# standard errors
gr = dnorm(t(be) %*% disx1) %*% t(disx1) - dnorm(t(be) %*% disx0) %*% t(disx0)
mfx[disch[i],2] = sqrt(gr %*% vcv %*% t(gr))
} else {
disx0 = disx1 = x1
disx1[,disch[i]] = max(x1[,disch[i]])
disx0[,disch[i]] = min(x1[,disch[i]])
mfx[disch[i],1] = mean(pnorm(disx1 %*% be) - pnorm(disx0 %*% be))
# standard errors
gr = as.numeric(dnorm(disx1 %*% be)) * disx1 - as.numeric(dnorm(disx0 %*% be)) * disx0
avegr = as.matrix(colMeans(gr))
mfx[disch[i],2] = sqrt(t(avegr) %*% vcv %*% avegr)
}
}
}
mfx$discretechgvar = ifelse(rownames(mfx) %in% disch, 1, 0)
output = list(fit=fit, mfx=mfx)
return(output)
}
probitMfxSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in original mfx package
)
{
# res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start, control)
res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start)
est = NULL
est$mfxest = cbind(dFdx = res$mfx$mfx,
StdErr = res$mfx$se,
z.value = res$mfx$mfx/res$mfx$se,
p.value = 2*pt(-abs(res$mfx$mfx/res$mfx$se), df = Inf))
colnames(est$mfxest) = c("dF/dx","Std. Err.","z","P>|z|")
rownames(est$mfxest) = rownames(res$mfx)
est$fit = res$fit
est$dcvar = rownames(res$mfx[res$mfx$discretechgvar==1,])
est$call = match.call()
class(est) = "probitmfx"
est
}
Beispiel
# initialize sample data
nObs = 100
x1 = rbinom(nObs,1,.5)
x2 = rbinom(nObs,1,.3)
#x3 = rbinom(100,1,.9)
x3 = runif(nObs,0,.9)
id = 1:nObs
w1 = sample(c(10,50,100),nObs,replace=TRUE)
# dependnt variables
ystar = x1 + x2 - x3 + rnorm(nObs)
y = ifelse(ystar>0,1,0)
# set up data frame
data = data.frame(id, w1, x1, x2, x3, ystar, y)
# initialize survey
survey.design <- svydesign(ids = data$id,
weights = data$w1,
data = data)
mean(data$x2)
sd(data$x2)/(length(data$x2))^0.5
svymean(x=x2,design=survey.design)
probit = svyglm(y~x1 + x2 + x3, design=survey.design, family=quasibinomial(link='probit'))
summary(probit)
probitMfxSurv(formula = y~x1 + x2 + x3, design = survey.design)
http://r-survey.r-forge.r-project.org/survey/html/marginpred.html –