Stata: Data Analysis and Statistical Software

Notice: On April 23, 2014, Statalist moved from an email list to a forum, based at statalist.org.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

st: Mata efficiency: st_views and st_selects

From	László Sándor <[email protected]>
To	[email protected]
Subject	st: Mata efficiency: st_views and st_selects
Date	Fri, 27 Apr 2012 17:09:10 -0400

Hi,

For correct standard errors for matching on propensity scores, I
intend to use the following Mata function after a _logit run
estimating the propensity score. It completes in less than 3 seconds
for 5000 observations (on Stata 12.1 MP for mac), which is better than
my expectations (-nnmatch- is notoriously slow, at it was written
before Mata).

However, to ease my final calculations, I do not deal with completely
separate copies of the data into a treated and an untreated subsample,
but I use views (onto tempvars), so the values I set for the
subsamples end up in the "full sample". Now I realize that probably I
could have achieved the same with copying things into matrices only,
and at worst joining the few vectors in the end, as needed. My
question is: Do you expect it's worth the rewrite? In sample of
millions, would the views be much slower than otherwise?

Also, of course I am grateful for any other comment you have on the function.

Thanks!

Laszlo

void function mpsc(string scalar yvar,
					string scalar tvar,
					string scalar xvars,
					real scalar M,
					real scalar L,
					string scalar touse)
{
	string scalar pscdensvar
	real matrix y, t, X, theta, psc, pscd, weightvar, te, sigmabar,
kperm, covhat, mu0hat, mu1hat, xmte
	real matrix y0, y1, X0, X1, xyk, psc0, psc1, te0, te1, kperm0,
kperm1, sb0, sb1, ch0, ch1, mu0hat0, mu0hat1, mu1hat0, mu1hat1, xmte0,
xmte1
	real matrix yki, ykw, logitv, information_matrix, Iinv, tauqmv, tauqmvt
	real rowvector chat, ct2hat, ct1hat, dtaudtheta
	string matrix xv
	string rowvector xvarmat
	real colvector dist
	real scalar i, j, rows0, rows1, n, n1, n0, k
	real scalar sigmatau, sigmat, taun, taunt, sigmahatadj, sigmahatadjt
	st_view(y,.,yvar,touse)
	n = rows(y)
	st_view(t,.,tvar,touse)
	logitv = st_matrix("e(V)")
	theta = st_matrix("e(b)")
	k = length(theta)
	xv = st_matrixcolstripe_split("e(V)",.,0)
	for (i=1; i<rows(xv); i++) {
		xvarmat = xvarmat , xv[i,1]
		for (j=2; j<=cols(xv); j++) {
			xvarmat[i] = xvarmat[i] + xv[i,j]
		}
	}
	X = st_data(.,xvarmat,touse) , J(n,1,1)
	st_view(psc,.,st_addvar("double","pscvar"),touse) // NB to change
these to tempvars, there is no need to leave them hanging
	psc[.] = invlogit(X*theta')
	st_view(pscd,.,st_addvar("double",st_tempname(),1),touse) // NB to
change these to tempvars, there is no need to leave them hanging
	pscd[.] = psc:*(-1:*psc:+1)
	information_matrix = quadcross(X,pscd,X) :/n
	st_view(te,.,st_addvar("double",st_tempname(),1),touse)
	st_view(sigmabar,.,st_addvar("double",st_tempname(),1),touse)
	st_view(kperm,.,st_addvar("double",st_tempname(),1),touse)
	st_view(covhat,.,st_addvar("double",st_tempname(k),1),touse)
	st_view(mu0hat,.,st_addvar("double",st_tempname(),1),touse)
	st_view(mu1hat,.,st_addvar("double",st_tempname(),1),touse)
	st_view(xmte,.,st_addvar("double",st_tempname(),1),touse) // For
"TEs" on covariate matching
	kperm[.,.] = J(n,1,0)
	// Selecting observations for matching among untreated:
	st_select(y0,y,-1*t:+1)
	st_select(X0,X,-1*t:+1)
	st_select(psc0,psc,-1*t:+1)
	st_select(te0,te,-1*t:+1)
	st_select(sb0,sigmabar,-1*t:+1)
	st_select(kperm0,kperm,-1*t:+1)
	st_select(ch0,covhat,-1*t:+1)
	st_select(mu0hat0,mu0hat,-1*t:+1)
	st_select(mu1hat0,mu1hat,-1*t:+1)
	st_select(xmte0,xmte,-1*t:+1)
	// Selecting observations for matching among treated:
	st_select(y1,y,t)
	st_select(X1,X,t)
	st_select(psc1,psc,t)
	st_select(te1,te,t)
	st_select(sb1,sigmabar,t)
	st_select(kperm1,kperm,t)
	st_select(ch1,covhat,t)
	st_select(mu0hat1,mu0hat,t)
	st_select(mu1hat1,mu1hat,t)
	st_select(xmte1,xmte,t)
	n1 = rows(y1)
	n0 = n-n1	
	rows0 = n-n1
	rows1 = n1	
	for (i=1; i<=rows1; i++) {
		dist = abs(psc0:-psc1[i])
		minindex(dist,M,yki,ykw)
		te1[i] = y1[i] - mean(y0[yki])
		kperm0[yki] = kperm0[yki] :+ (1 / rows(yki))
		minindex(dist,M,yki,ykw)
		mu0hat1[i] = mean(y0[yki])
		minindex(abs(psc1:-psc1[i]),L+1,yki,ykw) // NB that it includes the
original too here!
		xyk = quadmeanvariance( (y1[yki],X1[yki,.]) )
		mu1hat1[i] = xyk[1,1]
		sb1[i] = xyk[2,1]
		ch1[i,.] = xyk[|2,2 \ 2,.|]
// Now match on covariates
		minindex(quadrowsum((X0:-X1[i,.]):^2),M,yki,ykw)
		xmte1[i] = y1[i] - mean(y0[yki])		
	}
	for (i=1; i<=rows0; i++) {
		dist = abs(psc1:-psc0[i])
		minindex(dist,M,yki,ykw)
		te0[i] = mean(y1[yki]) - y0[i]
		kperm1[yki] = kperm1[yki] :+ (1 / rows(yki))
		minindex(dist,L + 1,yki,ykw)
		mu1hat0[i] = mean(y1[yki])		
		minindex(abs(psc0:-psc0[i]),L+1,yki,ykw)  // NB that it includes the
original too here!
		xyk = quadmeanvariance( (y0[yki],X0[yki,.]) )
		mu0hat0[i] = xyk[1,1]
		sb0[i] = xyk[2,1]
		ch0[i,.] = xyk[|2,2 \ 2,.|]
// Now match on covariates
		minindex(quadrowsum((X1:-X0[i,.]):^2),M,yki,ykw)
		xmte0[i] = mean(y1[yki])-y0[i]		
	}
	
	tauqmv = quadmeanvariance(te)
	taun = tauqmv[1,1]
	sigmatau = tauqmv[2,1] + mean((kperm:^2 + ((2*M-1)/M) * kperm):*sigmabar)
	tauqmvt = quadmeanvariance(te,t)
	taunt = tauqmvt[1,1]
	sigmat = (n/n1) * tauqmvt[2,1] + (n*n0/n1^2) * mean((kperm0:^2 -
(kperm0 :/ M)):*sb0)
	
	chat = mean(covhat:*pscd:*(t:/psc:^2 + (-1:*t:+1):/(-1:+psc):^2))

	ct2hat =(n/n1) :* mean(covhat:*pscd:*psc:*(t:/(psc:^2) +
(-1:*t:+1):/((-1:+psc):^2)))
	ct1hat =(n/n1) :* mean(X:*pscd:*((mu1hat-mu0hat):-taunt))
	dtaudtheta = (n/n1) :* mean(X:*pscd:*(xmte:-taunt))
	Iinv = invsym(information_matrix)

	sigmahatadj = sigmatau - chat*Iinv*chat'
	sigmahatadjt = sigmat - (ct1hat+ct2hat)*Iinv*(ct1hat+ct2hat)' +
dtaudtheta*Iinv*dtaudtheta'
	
	st_numscalar("e(ate)", taun)
	st_numscalar("e(ate_se)", sqrt(sigmatau/n))
	st_numscalar("e(ate_se_adj)", sqrt(sigmahatadj/n))
	st_numscalar("e(atet)", taunt)
	st_numscalar("e(atet_se)", sqrt(sigmat/n))
	st_numscalar("e(atet_se_adj)", sqrt(sigmahatadjt/n))
	st_numscalar("e(N)", n)
	st_numscalar("e(N1)",n1)
	st_numscalar("e(N0)",n0)
}
mata mosave mpsc(), replace
end
*
*   For searches and help try:
*   http://www.stata.com/help.cgi?search
*   http://www.stata.com/support/statalist/faq
*   http://www.ats.ucla.edu/stat/stata/

Prev by Date: RE: st: RE: creating 2 datasets in memory without saving and merge
Next by Date: st: troubleshooting positional arguments
Previous by thread: st: mata conditional operator element-by-element?
Next by thread: st: troubleshooting positional arguments
Index(es):
- Date
- Thread