David and Kelly--
Just wanted to emphasize that my example ignores the possibility that
two or more hospitals are exactly the same distance from a patient.
Generically this case does not occur (i.e. it occurs with probability
zero) if addresses are continuously distributed, but they are
not--geocoding software may assign a relatively small set of latitudes
and longitudes. It might be useful to at least put a warning message
in the code testing the relevant inequalities. What to do in such a
case is a matter I leave to the researcher.
Also, I think the example code previously posted does not save the
hospital ID when the first hospital is the closest (the initialization
of the variable is faulty).
Note also that the basic structure allows for each patient (or each i,
whether they are patients or workers or what have you) to be matched
with a different set of hospitals (or j's, whether they are hospitals
or firms or what have you) if you move the -merge- inside the loop and
have a j file specific to the individual i. This could be useful if,
for example, the closest hospital could only of a certain type for a
given type of patient, thinking not just of the veteran/non-veteran
distinction, but of health insurance, state of residence, etc.
I think the following is a marginal improvement. Someone else could
certainly improve it further...
clear
range pid 1 10 10
set seed 123
g p_lat=32+uniform()*10
g p_lon=-120+uniform()*40
save /patients, replace
clear
input id cntrl VA h_lat h_lon
0785 45 1 42.103 -80.065
0787 45 1 42.103 -80.065001
0150 45 1 39.739 -75.605
0960 45 1 37.499 -77.470
0510 45 1 39.464 -77.962
0030 45 1 27.844 -82.796
0625 45 1 39.138 -84.509
0570 45 1 38.271 -85.694
0020 45 1 36.143 -86.805
1260 33 0 35.832 -86.073
9061 33 0 36.313 -87.676
0305 45 1 33.505 -86.801
0365 33 0 32.288 -90.258
0470 45 1 41.628 -93.659
0006 45 1 44.412 -103.469
0004 45 1 40.807 -96.625
0493 33 0 35.608 -91.265
0070 45 1 31.292 -92.461
0126 45 1 31.080 -97.348
1310 33 0 31.783 -106.474
0180 45 1 46.612 -112.048
0060 45 1 43.618 -116.194
0085 33 0 41.242 -110.991
0540 45 1 34.568 -112.456
0075 33 0 40.698 -111.990
0190 33 0 40.044 -111.716
1180 45 1 46.053 -118.356
0009 33 0 33.860 -118.149
1538 33 0 34.093 -118.344
end
isid id
save /hospitals, replace
desc using /hospitals, sh
local nh=r(N)
use /patients
local np=_N
g d2va=.
g id4va=.
g d2nva=.
g id4nva=.
merge using /hospitals
forv i=1/`np' {
local x1=p_lat[`i']
local y1=p_lon[`i']
local x2 h_lat
local y2 h_lon
tempvar L
local R=6367.44
qui {
g double L=(`y2'-`y1')*_pi/180
replace L=(`y2'-`y1'-360)*_pi/180 if L<. & L>_pi
replace L=(`y2'-`y1'+360)*_pi/180 if L<-_pi
local t1 acos(sin(`x2'*_pi/180)*sin(`x1'*_pi/180)
g d=`t1'+cos(`x2'*_pi/180)*cos(`x1'*_pi/180)*cos(L))*`R'
g va_d=d if VA==1
g nva_d=d if VA==0
scalar dv=va_d[1]
if dv<. scalar iv=id[1]
else scalar iv=.
scalar dnv=nva_d[1]
if dnv<. scalar inv=id[1]
else scalar inv=.
forv j=2/`nh' {
if abs(float(va_d[`j'])-float(dv))<1e-4 {
noi di as err "VA hospital (`=id[`j']') `=va_d[`j']' km away"
noi di as txt "Another VA hospital (`=iv') `=dv' km away"
}
if abs(float(nva_d[`j'])-float(dnv))<1e-4 {
noi di as err "Non-VA hospital (`=id[`j']') `=nva_d[`j']' km away"
noi di as txt "Another VA hospital (`=inv') `=dnv' km away"
}
if va_d[`j']<dv {
scalar dv=va_d[`j']
scalar iv=id[`j']
}
if nva_d[`j']<dnv {
scalar dnv=nva_d[`j']
scalar inv=id[`j']
}
}
replace d2va=dv in `i'
if dv<. replace id4va=iv in `i'
replace d2nva=dnv in `i'
if dnv<. replace id4nva=inv in `i'
}
drop L d va_d nva_d
}
drop if _m==2
drop id cntrl VA h_lat h_lon _m
l
la var d2va "Distance to nearest VA hosp"
la var d2nva "Distance to nearest non-VA hosp"
la var id4va "Hosp id for nearest VA hosp"
la var id4nva "Hosp id for nearest non-VA hosp"
*
* For searches and help try:
* http://www.stata.com/support/faqs/res/findit.html
* http://www.stata.com/support/statalist/faq
* http://www.ats.ucla.edu/stat/stata/