Notice: On April 23, 2014, Statalist moved from an email list to a forum, based at statalist.org.
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
st: Mata efficiency: st_views and st_selects
From
László Sándor <[email protected]>
To
[email protected]
Subject
st: Mata efficiency: st_views and st_selects
Date
Fri, 27 Apr 2012 17:09:10 -0400
Hi,
For correct standard errors for matching on propensity scores, I
intend to use the following Mata function after a _logit run
estimating the propensity score. It completes in less than 3 seconds
for 5000 observations (on Stata 12.1 MP for mac), which is better than
my expectations (-nnmatch- is notoriously slow, at it was written
before Mata).
However, to ease my final calculations, I do not deal with completely
separate copies of the data into a treated and an untreated subsample,
but I use views (onto tempvars), so the values I set for the
subsamples end up in the "full sample". Now I realize that probably I
could have achieved the same with copying things into matrices only,
and at worst joining the few vectors in the end, as needed. My
question is: Do you expect it's worth the rewrite? In sample of
millions, would the views be much slower than otherwise?
Also, of course I am grateful for any other comment you have on the function.
Thanks!
Laszlo
void function mpsc(string scalar yvar,
string scalar tvar,
string scalar xvars,
real scalar M,
real scalar L,
string scalar touse)
{
string scalar pscdensvar
real matrix y, t, X, theta, psc, pscd, weightvar, te, sigmabar,
kperm, covhat, mu0hat, mu1hat, xmte
real matrix y0, y1, X0, X1, xyk, psc0, psc1, te0, te1, kperm0,
kperm1, sb0, sb1, ch0, ch1, mu0hat0, mu0hat1, mu1hat0, mu1hat1, xmte0,
xmte1
real matrix yki, ykw, logitv, information_matrix, Iinv, tauqmv, tauqmvt
real rowvector chat, ct2hat, ct1hat, dtaudtheta
string matrix xv
string rowvector xvarmat
real colvector dist
real scalar i, j, rows0, rows1, n, n1, n0, k
real scalar sigmatau, sigmat, taun, taunt, sigmahatadj, sigmahatadjt
st_view(y,.,yvar,touse)
n = rows(y)
st_view(t,.,tvar,touse)
logitv = st_matrix("e(V)")
theta = st_matrix("e(b)")
k = length(theta)
xv = st_matrixcolstripe_split("e(V)",.,0)
for (i=1; i<rows(xv); i++) {
xvarmat = xvarmat , xv[i,1]
for (j=2; j<=cols(xv); j++) {
xvarmat[i] = xvarmat[i] + xv[i,j]
}
}
X = st_data(.,xvarmat,touse) , J(n,1,1)
st_view(psc,.,st_addvar("double","pscvar"),touse) // NB to change
these to tempvars, there is no need to leave them hanging
psc[.] = invlogit(X*theta')
st_view(pscd,.,st_addvar("double",st_tempname(),1),touse) // NB to
change these to tempvars, there is no need to leave them hanging
pscd[.] = psc:*(-1:*psc:+1)
information_matrix = quadcross(X,pscd,X) :/n
st_view(te,.,st_addvar("double",st_tempname(),1),touse)
st_view(sigmabar,.,st_addvar("double",st_tempname(),1),touse)
st_view(kperm,.,st_addvar("double",st_tempname(),1),touse)
st_view(covhat,.,st_addvar("double",st_tempname(k),1),touse)
st_view(mu0hat,.,st_addvar("double",st_tempname(),1),touse)
st_view(mu1hat,.,st_addvar("double",st_tempname(),1),touse)
st_view(xmte,.,st_addvar("double",st_tempname(),1),touse) // For
"TEs" on covariate matching
kperm[.,.] = J(n,1,0)
// Selecting observations for matching among untreated:
st_select(y0,y,-1*t:+1)
st_select(X0,X,-1*t:+1)
st_select(psc0,psc,-1*t:+1)
st_select(te0,te,-1*t:+1)
st_select(sb0,sigmabar,-1*t:+1)
st_select(kperm0,kperm,-1*t:+1)
st_select(ch0,covhat,-1*t:+1)
st_select(mu0hat0,mu0hat,-1*t:+1)
st_select(mu1hat0,mu1hat,-1*t:+1)
st_select(xmte0,xmte,-1*t:+1)
// Selecting observations for matching among treated:
st_select(y1,y,t)
st_select(X1,X,t)
st_select(psc1,psc,t)
st_select(te1,te,t)
st_select(sb1,sigmabar,t)
st_select(kperm1,kperm,t)
st_select(ch1,covhat,t)
st_select(mu0hat1,mu0hat,t)
st_select(mu1hat1,mu1hat,t)
st_select(xmte1,xmte,t)
n1 = rows(y1)
n0 = n-n1
rows0 = n-n1
rows1 = n1
for (i=1; i<=rows1; i++) {
dist = abs(psc0:-psc1[i])
minindex(dist,M,yki,ykw)
te1[i] = y1[i] - mean(y0[yki])
kperm0[yki] = kperm0[yki] :+ (1 / rows(yki))
minindex(dist,M,yki,ykw)
mu0hat1[i] = mean(y0[yki])
minindex(abs(psc1:-psc1[i]),L+1,yki,ykw) // NB that it includes the
original too here!
xyk = quadmeanvariance( (y1[yki],X1[yki,.]) )
mu1hat1[i] = xyk[1,1]
sb1[i] = xyk[2,1]
ch1[i,.] = xyk[|2,2 \ 2,.|]
// Now match on covariates
minindex(quadrowsum((X0:-X1[i,.]):^2),M,yki,ykw)
xmte1[i] = y1[i] - mean(y0[yki])
}
for (i=1; i<=rows0; i++) {
dist = abs(psc1:-psc0[i])
minindex(dist,M,yki,ykw)
te0[i] = mean(y1[yki]) - y0[i]
kperm1[yki] = kperm1[yki] :+ (1 / rows(yki))
minindex(dist,L + 1,yki,ykw)
mu1hat0[i] = mean(y1[yki])
minindex(abs(psc0:-psc0[i]),L+1,yki,ykw) // NB that it includes the
original too here!
xyk = quadmeanvariance( (y0[yki],X0[yki,.]) )
mu0hat0[i] = xyk[1,1]
sb0[i] = xyk[2,1]
ch0[i,.] = xyk[|2,2 \ 2,.|]
// Now match on covariates
minindex(quadrowsum((X1:-X0[i,.]):^2),M,yki,ykw)
xmte0[i] = mean(y1[yki])-y0[i]
}
tauqmv = quadmeanvariance(te)
taun = tauqmv[1,1]
sigmatau = tauqmv[2,1] + mean((kperm:^2 + ((2*M-1)/M) * kperm):*sigmabar)
tauqmvt = quadmeanvariance(te,t)
taunt = tauqmvt[1,1]
sigmat = (n/n1) * tauqmvt[2,1] + (n*n0/n1^2) * mean((kperm0:^2 -
(kperm0 :/ M)):*sb0)
chat = mean(covhat:*pscd:*(t:/psc:^2 + (-1:*t:+1):/(-1:+psc):^2))
ct2hat =(n/n1) :* mean(covhat:*pscd:*psc:*(t:/(psc:^2) +
(-1:*t:+1):/((-1:+psc):^2)))
ct1hat =(n/n1) :* mean(X:*pscd:*((mu1hat-mu0hat):-taunt))
dtaudtheta = (n/n1) :* mean(X:*pscd:*(xmte:-taunt))
Iinv = invsym(information_matrix)
sigmahatadj = sigmatau - chat*Iinv*chat'
sigmahatadjt = sigmat - (ct1hat+ct2hat)*Iinv*(ct1hat+ct2hat)' +
dtaudtheta*Iinv*dtaudtheta'
st_numscalar("e(ate)", taun)
st_numscalar("e(ate_se)", sqrt(sigmatau/n))
st_numscalar("e(ate_se_adj)", sqrt(sigmahatadj/n))
st_numscalar("e(atet)", taunt)
st_numscalar("e(atet_se)", sqrt(sigmat/n))
st_numscalar("e(atet_se_adj)", sqrt(sigmahatadjt/n))
st_numscalar("e(N)", n)
st_numscalar("e(N1)",n1)
st_numscalar("e(N0)",n0)
}
mata mosave mpsc(), replace
end
*
* For searches and help try:
* http://www.stata.com/help.cgi?search
* http://www.stata.com/support/statalist/faq
* http://www.ats.ucla.edu/stat/stata/