As I have promised, I hereby send the code. It has been improved based
on our conversations so far in the following ways:
#1 -meanonly- is now implemented
#2 All obs in the complete datastet that has too few observations are
dropped using Nick's -egen- method.
#3 tempnames have been removed.
#4 I try to reserve the use -local- to those situations when it is
absolutely necessary.
#5 Unnecessary usage of -if- statements have been eliminated.
Any suggestions for further improvement is greatly appreciated.
I am considering to try to run seprate regressions on the complete
database without any loops using -statsby- or perhaps -by-, but that
requires that I add variables to the complete dataset, and I have
previously been meeting the memory restrictions doing so. After
implementation of Nick's -egen- idea I managed to drop about 2.5
million obs from the dataset, so I may now be able to do so proceed
with dataset wide regressions.
I will do a seprate post with a short transcript from the two
databases that I join.
*********** Beginning Code **************
capture program drop GetExpectedAnnualReturn
program define GetExpectedAnnualReturn
version 10
args mrkData estimationPeriod
tempvar dependentVar marketRiskpremium expectedReturn
use `mrkData', clear
quietly ///
{
tsset
local tDelta = r(tdelta)
replace riskFreereturn = riskFreereturn / floor( 365 / r(tdelta) )
generate `marketRiskpremium' = marketReturn - riskFreereturn
summarize `marketRiskpremium', meanonly
local ERP = r(mean)
summarize SMBtr, meanonly
local SMB = r(mean)
summarize HMLtr, meanonly
local HML = r(mean)
}
local requiredEstimationPeriod = `estimationPeriod' * floor( 365 / `tDelta' )
display " "
display as text "The Ex Post Weekly Average Premiums "
display in smcl in green "{hline 35}"
display as text "Equity Risk Premium " as result %9.7f `ERP'
display as text "Small-Minus_Big " as result %9.7f `SMB'
display as text "High-Minus-Low " as result %9.7f `HML'
display in smcl in green "{hline 35}"
display " "
use us_data_ret, clear
// Snipet below thanks to Nick Cox
egen nvalid = count(totalReturn), by(id)
drop if nvalid < `requiredEstimationPeriod'
drop nvalid
// Create sample based on id
drop if id > 101
// drop if id < 8125
// Restrict sample based on time
// drop if year > 1997
// drop if year < 1995
display in text "Time to join data"
joinby date using `mrkData'
quietly ///
{
format date %tg
levelsof id, local(panelVar)
local counter : word count `panelVar'
local adjPanelVar = "`panelVar'"
}
preserve
display in text "Time for loop over `counter' panelids"
local counter2 = `counter' - 100
quietly foreach i of local panelVar ///
{
if `counter' <= `counter2' ///
{
local counter2 = `counter' - 100
noisily display as text "`counter' panelids remains"
}
restore, preserve
drop if id != `i'
summarize totalReturn, meanonly
if `r(N)' >= `requiredEstimationPeriod' ///
{
tempfile `i'data
generate `dependentVar' = totalReturn - riskFreereturn
generate `marketRiskpremium' = marketReturn - riskFreereturn
generate float b = .
generate float s = .
generate float h = .
generate double `expectedReturn' = .
generate float expectedannualReturn = .
label variable b "Factor loading for risk premium in 3-F CAPM over
all available data"
label variable s "Factor loading for SMB premium in 3-F CAPM over
all available data"
label variable h "Factor loading for HML premium in 3-F CAPM over
all available data"
label variable expectedannualReturn "Expected Annualized Return
(time-varying ) measured using 3-F CAPM over all available data"
regress `dependentVar' `marketRiskpremium' SMB HML
replace b = _b[`marketRiskpremium']
replace s = _b[SMBtr]
replace h = _b[HMLtr]
summarize year, meanonly
forvalues j = `r(min)' / `r(max)' ///
{
summarize date if year == `j', meanonly
drop if year == `j' & date < r(max)
}
save ``i'data'
}
local counter = `counter' - 1
}
quietly /// Merge the data.
{
display in yellow "Merge the data"
drop if id != .
foreach j of local panelVar ///
{
append using ``j'data'
local ++j
}
}
quietly ///
{
replace `expectedReturn' = riskFreereturn / floor( 365 / `tDelta' ) + ///
b * `ERP' + ///
s * `SMB' + ///
h * `HML'
replace expectedannualReturn = (1 + `expectedReturn') ^ ///
( 365 / `tDelta' ) - 1 if totalReturn != .
summarize year if totalReturn != .
drop if year == `r(max)'
replace year = year + 1
replace b = . if totalReturn == .
replace s = . if totalReturn == .
replace h = . if totalReturn == .
keep id dscd year b s h expectedannualReturn
restore, not
}
label data "Expected Annualized Return (time-varying ), measured
using 3-F CAPM over all available data"
end
******** End of Code ***************
*
* For searches and help try:
* http://www.stata.com/help.cgi?search
* http://www.stata.com/support/statalist/faq
* http://www.ats.ucla.edu/stat/stata/