> I am using the xi: command to construct dummy variables
> from categorical variables. My problem is that I would
> like the xi-command to not remove one of the dummies...
> I just want to add that I've run into this problem too,
> and I too consider it a (rare) limitation. More than
> once I've wanted to run a regression with explicit
> dummies for each category in combination with the
> -nocons- option, and without an ugly hack not worth
> repeating here (Uli's suggestion is *much* nicer) it
> isn't possible with -xi-'s dummies.
I agree that the "tab, gen()" solution is much nicer, but
here is the "ugly" hack. The following command "xa" works
exactly like "xi" except it does not omit any category. I
believe that it does not interfere with the normal operation
of xi. Comments welcome.
-Michael Ash
------xa.ado-----CUT BELOW THIS LINE-------
*! version 1.1.0 10/19/94
program define xa
version 4.0
parse "`*'", parse(" ,:()")
if "`1'"==":" {
mac shift
local i 2
local xeq "yes"
}
else local i 1
capture drop I* /* (crude but necessary) */
global X__in
global X__out
global X__cont
while "``i''"!="" {
if upper(substr("``i''",1,2))=="I." {
if index("``i''","*") { xa_eii ``i'' "*" }
else if index("``i''","|") { xa_eii ``i'' "|" }
else xa_ei ``i''
local `i' "$S_1"
if "``i''"=="." { local `i' " " }
}
local i=`i'+1
}
global X__in
global X__out
global X__cont
if "`xeq'"=="yes" {
`*'
}
end
program define xa_ei /* I.<name> */
version 4.0
local orig "`1'"
tempvar g on
local vn = substr("`orig'",3,.)
_crcunab `vn'
local vn "$S_1"
if "$X__in" != "" {
parse "$X__in", parse(" ")
local i 1
while "``i''"!="" {
if "``i''"=="`vn'" {
global S_1 : word `i' of $X__out
exit
}
local i=`i'+1
}
}
qui egen `g' = group(`vn')
qui summ `g'
local ng = _result(6)
local lowcode 1
local topcode `ng'
local useuser 0
cap confirm string var `vn'
if _rc {
local isnumb "yes"
cap assert `vn'==int(`vn') & `vn'<100 & `vn'>=0 if `vn'!=.
if _rc==0 {
qui summ `vn'
local lowcode = _result(5)
local topcode = _result(6)
local useuser 1
}
}
xa_mkun `vn' `topcode'
local svn "$S_1"
/* user char vn[omit] containing <value> */
local omis : char `vn'[omit]
if "`omis'" != "" {
tempvar umark
if "`isnumb'"=="yes" {
capture confirm number `omis'
if _rc {
di in red /*
*/" characteristic `vn'[omit] (`omis') invalid;" /*
*/ _n "variable `vn' is numeric"
exit 198
}
gen byte `umark'= float(`vn')==float(`omis')
}
else gen byte `umark'= `vn'=="`omis'"
capture assert `umark'==0
if _rc==0 {
di in gr "(characteristic `vn'[omit]: `omis'" _n /*
*/ "yet variable `vn' never equals `omis'; characteristic ignored)"
local umark
}
}
/* code for dropping first category */
local xamode : char _dta[omit]
if "`umark'"=="" & "`xamode'"=="" {
tempvar umark
qui gen byte `umark'=(`g'==1)
}
local max 0
local jmax 0
local j 1
qui gen long `on'=.
while `j'<=`ng' {
/* obtain value */
qui replace `on'=cond(`g'==`j',_n,`on'[_n-1])
local value = `vn'[`on'[_N]]
if `useuser' { local k `value' }
else local k `j'
qui gen byte `svn'`k' = `g'==`j' if `g'!=.
label var `svn'`k' "`vn'==`value'"
if "`umark'"=="" {
qui count if `g'==`j'
if _result(1)>`max' {
local max = _result(1)
local jmax `k'
local dval "`value'"
}
}
else {
capture assert `umark' if `g'==`j'
if _rc==0 {
local jmax `k'
local dval "`value'"
}
}
local j=`j'+1
}
if `useuser' {
di in gr "`orig'" /*
*/ _col(23) "`svn'`lowcode'-`topcode'" /*
*/ _col(36) "(naturally coded)"
}
else di in gr "`orig'" /*
*/ _col(23) "`svn'`lowcode'-`topcode'" /*
*/ _col(36) ""
*drop `svn'`jmax'
capture list `svn'* in 1
if _rc {
global S_1 "."
}
else global S_1 "`svn'*"
global X__in "$X__in `vn'"
global X__out "$X__out $S_1"
end
program define xa_eic
version 4.0
local orig "`1'" /* of form i.<varname>*<varname> */
local ichar "`2'"
local lstar = index("`orig'","`ichar'")
local part1 = substr("`orig'",1,`lstar'-1)
local part2 = substr("`orig'",`lstar'+1,.)
_crcunab `part2'
local part2 "$S_1"
local type : type `part2'
xa_ei `part1'
local res1 "$S_1"
if "`res1'"=="." {
di in gr "`orig'" _col(36) "(requires no interaction terms)"
xa_eicu `part2'
exit
}
_crcunab `res1'
local uab1 "$S_1"
parse "`uab1'", parse(" ")
local len1 0
while "`1'"!="" {
if length("`1'")>`len1' {
local len1 = length("`1'")
}
mac shift
}
local len1 = `len1'-length("`res1'") + 1
local c1 = substr("`res1'",2,1)
local c2 = substr("`part2'",1,4-`len1')
local stub "I`c1'X`c2'_"
xa_mkun2 `stub'
local stub "$S_1"
parse "`uab1'", parse(" ")
local i 1
while "``i''"!="" {
local num1 = substr("``i''",length("`res1'"),.)
local lbl1 : variable label ``i''
qui gen `type' `stub'`num1' = ``i''*`part2'
label var `stub'`num1' "(`lbl1')*`part2'"
local i=`i'+1
}
xa_eicu `part2'
if "`ichar'"=="*" {
global S_1 "`res1' ${S_1}`stub'*"
}
else global S_1 "${S_1}`stub'*"
di in gr "`orig'" _col(23) "`stub'#" /*
*/ _col(36) "(coded as above)"
end
program define xa_eicu /* <contvar_name> */
version 4.0
local vn "`1'"
global S_1
if "$X__cont" != "" {
parse "$X__cont", parse(" ")
local i 1
while "``i''"!="" {
if "``i''"=="`vn'" { exit }
local i=`i'+1
}
}
global S_1 "`vn' " /* sic */
global X__cont "$X__cont `vn'"
end
program define xa_eii
version 4.0
local orig "`1'"
local ichar "`2'"
local lstar = index("`orig'","`ichar'")
local part1 = substr("`orig'",1,`lstar'-1)
local part2 = substr("`orig'",`lstar'+1,.)
if upper(substr("`part2'",1,2))!="I." {
xa_eic `orig' "`ichar'"
exit
}
else if "`ichar'"!="*" {
di in red "I.xxx|I.yyy not allowed"
exit 198
}
xa_ei `part1'
local res1 "$S_1"
xa_ei `part2'
local res2 "$S_1"
if "`res1'"=="." | "`res2'"=="." {
di in gr "`orig'" _col(36) "(requires no interaction terms)"
exit
}
_crcunab `res1'
local uab1 "$S_1"
parse "`uab1'", parse(" ")
local len1 0
while "`1'"!="" {
if length("`1'")>`len1' {
local len1 = length("`1'")
}
mac shift
}
_crcunab `res2'
local uab2 "$S_1"
parse "`uab2'", parse(" ")
local len2 0
while "`1'"!="" {
if length("`1'")>`len2' {
local len2 = length("`1'")
}
mac shift
}
local len1 = `len1'-length("`res1'") + 1
local len2 = `len2'-length("`res2'") + 1
local len = `len1'+`len2'
local c1 = substr("`res1'",2,1)
local c2 = substr("`res2'",2,1)
if `len'==2 {
local stub "I`c1'X`c2'_"
}
else if `len'==3 {
local stub "I`c1'`c2'_"
}
else if `len'==4 {
local stub "I`c1'`c2'"
}
else {
di in red "syserr: length |`len'| not right"
exit 198
}
xa_mkun2 `stub'
local stub "$S_1"
parse "`uab2'", parse(" ")
local i 1
local a : word `i' of `uab1'
while "`a'"!="" {
local num1 = substr("`a'",length("`res1'"),.)
local lbl1 : variable label `a'
local j 1
while "``j''"!="" {
local num2 = substr("``j''",length("`res2'"),.)
qui gen byte `stub'`num1'_`num2' = `a'*``j''
local lbl2 : variable label ``j''
label var `stub'`num1'_`num2' "`lbl1' & `lbl2'"
local j=`j'+1
}
local i=`i'+1
local a: word `i' of `uab1'
}
global S_1 "`res1' `res2' `stub'*"
di in gr "`orig'" _col(23) "`stub'#-#" /*
*/ _col(36) "(coded as above)"
end
program define xa_mkun /* meaning make_unique_name <suggested_name> <topcat> */
version 4.0
local base "`1'"
local ng `2'
local name "I`base'"
if `ng'<10 {
local name = substr("`name'",1,6) + "_"
}
else if `ng'<100 {
local name = substr("`name'",1,5) + "_"
}
else if `ng'<1000 {
local name = substr("`name'",1,5)
}
else {
di in red "too many groups for `base'"
exit 499
}
xa_mkun2 `name' `ng'
end
program define xa_mkun2 /* meaning make_unique_name <suggested_name> */
version 4.0
local name "`1'"
local totry "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
local l 0
local len = length("`name'")
capture list `name'* in 1 /* try name out */
while _rc==0 {
local l=`l'+1
local name = substr("`name'",1,`len'-1)+substr("`totry'",`l',1)
capture list `name'* in 1
}
global S_1 "`name'"
end
exit
I.myvar means dummies for myvar, drop the most frequent
I.myvar*this means continuous interaction (still drop most frequent)
I.myvar*I.that means dummy interaction.
I.myvar[what] means dummies for myvar, drop dummy for myvar==what
I.myvar*thatvar means interaction of myvar and thatvar
I.myvar[val]*thatvar[val] means drop corresponding.
12345678
I123_#x#
I12##x##
For I.name*I.name
We try:
12345
IrXr_ e.g., IrXr_#_# for two 1-digit numbers
Irr_ e.g., Irr_#_## or Irr_##_# for 1 and 2 digit numbers
Irr e.g., Irr##_## for two 2-digit numbers
For I.name*name
Irr_#
I.abc*I.def
IrXr_ we try, then shorten to
Irr_
I12345_#
I1234_##
I1234###
------xa.ado-----CUT ABOVE THIS LINE-------
*
* For searches and help try:
* http://www.stata.com/support/faqs/res/findit.html
* http://www.stata.com/support/statalist/faq
* http://www.ats.ucla.edu/stat/stata/