...
 
Commits (16)
......@@ -49,7 +49,7 @@ modeldata_trim_levels <- list()
# to send to desktop review
models <- subset(
read.xlsx("ccao_dictionary_draft.xlsx", sheetName = "models", stringsAsFactors=FALSE)
, estimation_method %in% c("OLS", "Quantile")
, estimation_method %in% c("OLS", "Quantile", "GBM")
# & modeling_group=="NCHARS"
)
# Modeling loop ----
......@@ -109,12 +109,13 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
, importance=TRUE
, data=subset(modeldata, filter_1>=filter_setting_1 & modeling_group==models$modeling_group[m])))
}
if(models$estimation_method[m]=="GBM"){
if(models$estimation_method[m]=="GBM"
& (models$type[m]=="lin-lin" | models$type[m]=="log-log")){
model <- try(gbm(as.formula(models$form[m])
,distribution="gaussian"
,interaction.depth=2
,n.trees=ntree
,cv.folds = 1
,data=subset(modeldata, filter_1>=filter_setting_1 & modeling_group==models$modeling_group[m])))
}
......@@ -196,6 +197,53 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
}
}
if (models$estimation_method[m]=='GBM' & models$type[m] == 'lin-lin'){
predictions <- NA
TF <- modeldata$modeling_group==models$modeling_group[m] & modeldata$filter_1 >= 1
error <- try(predict(model, newdata=modeldata[TF,], n.trees=ntree))
if (class(error) != "try-error"){
predictions[TF] <- try(predict(model, newdata=modeldata[TF,], n.trees=ntree))
modeldata$fitted_value <- predictions
print("Values predicted")
}
else{
if(filter_setting_1==1){
print(error)
check<- paste0("Able to estimate all models with no trimming?")
msg<- paste0("Bad: failed to predict values for ",models$model_id[m], " at trim setting 1")
integrity_checks <- rbind(integrity_checks, data.frame(check=check, outcome=msg))
print(check); print(msg); rm( msg)
next
}else{
print(paste0("Error predicting values, skipping loop."))
next
}
}
}
if (models$estimation_method[m]=='GBM' & models$type[m] == "log-log"){
predictions <- NA
TF <- modeldata$modeling_group==models$modeling_group[m] & modeldata$filter_1 >= 1
error <- try(exp(predict(model, newdata=modeldata[TF,], n.trees=ntree)))
if (class(error) != "try-error"){
predictions[TF] <- try(exp(predict(model, newdata=modeldata[TF,], n.trees=ntree)))
modeldata$fitted_value <- predictions
print("Values predicted")
}
else{
if(filter_setting_1==1){
print(error)
check<- paste0("Able to estimate all models with no trimming?")
msg<- paste0("Bad: failed to predict values for ",models$model_id[m], " at trim setting 1")
integrity_checks <- rbind(integrity_checks, data.frame(check=check, outcome=msg))
print(check); print(msg); rm( msg)
next
}else{
print(paste0("Error predicting values, skipping loop."))
next
}
}
}
rm(predictions)
# Can't have negative predictions
......@@ -315,18 +363,23 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
rm(df, cod, cod_mod, cod_10,cod_90, prd, prd_mod, prb, prb_mod)
if(filter_setting_1==1){
print(paste0(models$model_id[m],"-->", models$form[m]))
if(class(model)=="rq"){
print(summary(model, se = "iid"))
}else{
print(summary(model))
}
}
# Model R-Squared ----
r <- try(summary(model)$adj.r.squared)
if(is.null(r)|class(r)=="try-error"){r<-NA}
if(models$estimation_method[m]=="OLS"){
print(paste0(models$model_id[m]," --> "
, "Check n= ", nobs(model), " in ", models$modeling_group[m]
, " Adjusted r = ", round(summary(model)$adj.r.squared,3)
n <- nobs(model)
}else if(models$estimation_method[m]=="Quantile"){
n <-length(model[["residuals"]])
}else if(models$estimation_method[m]=="GBM"){
n <- length(model[["fit"]])
}else{
n<-NA
}
print(paste0(models$model_id[m]," --> "
, "Check n= ", n, " in ", models$modeling_group[m]
, " Adjusted r = ", r
, " COD = ", COD$COD
, " COD NOT BOOTED = ",round(100*sum(abs(subset(modeldata, !is.na(ratio) & filter_1>=1 )$ratio-
median(subset(modeldata, !is.na(ratio) & filter_1>=1)$ratio))) /
......@@ -334,17 +387,8 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
median(subset(modeldata, !is.na(ratio) & filter_1>=1)$ratio)),2)
," PRD = ",COD$PRD
," PRB = ",COD$PRB))
}else{
print(paste0(models$model_id[m]," --> "
, "Check n= ", length(model[["residuals"]]), " in ", models$modeling_group[m]
, " COD = ", COD$COD
, " COD NOT BOOTED = ",round(100*sum(abs(subset(modeldata, !is.na(ratio) & filter_1>=1 )$ratio-
median(subset(modeldata, !is.na(ratio) & filter_1>=1)$ratio))) /
(nrow(subset(modeldata, !is.na(ratio) & filter_1>=1))*
median(subset(modeldata, !is.na(ratio) & filter_1>=1)$ratio)),2)
," PRD = ",COD$PRD
," PRB = ",COD$PRB))
}
rm(n)
if (holdout_switch == "y"){
print("Holdout")
# Holdout ----
......@@ -384,7 +428,8 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
,distribution="gaussian"
,interaction.depth=2
,n.trees=ntree
,data=subset(modeldata, filter_1>=filter_setting_1 & modeling_group==models$modeling_group[m] & DOC_NO!=deed)))
,cv.folds = 1
,data=subset(modeldata, filter_1>=filter_setting_1 & modeling_group==models$modeling_group[m])))
}
if (class(model)=="try-error"){
......@@ -458,6 +503,31 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
}
}
}
if (models$estimation_method[m]=='GBM' & models$type[m] == 'lin-lin'){
predictions <- NA
TF <- modeldata$modeling_group==models$modeling_group[m] & modeldata$filter_1 >= 1
error <- try(predict(model, newdata=modeldata[TF,], n.trees=ntree))
if (class(error) != "try-error"){
predictions[TF] <- try(predict(model, newdata=modeldata[TF,], n.trees=ntree))
modeldata$fitted_value <- predictions
print("Values predicted")
}
else{
if(filter_setting_1==1){
print(error)
check<- paste0("Able to estimate all models with no trimming?")
msg<- paste0("Bad: failed to predict values for ",models$model_id[m], " at trim setting 1")
integrity_checks <- rbind(integrity_checks, data.frame(check=check, outcome=msg))
print(check); print(msg); rm( msg)
next
}else{
print(paste0("Error predicting values, skipping loop."))
next
}
}
}
if(is.numeric(predictions)){
try(ho_predictions$out_of_sample_fv[ho_predictions$DOC_NO==deed] <- predictions[!is.na(predictions)])
}else{
......@@ -488,21 +558,16 @@ for(m in 1:nrow(models)){ for(i in 1:filter_iters){
median(subset(ho_predictions, !is.na(ratio))$ratio)),2)
median_hold_out_ratio <- median(ho_predictions$ratio)
rm(ho_predictions)
}
}
else if(holdout_switch == "n"){
r <- try(summary(model)$adj.r.squared)
if(is.null(r)|class(r)=="try-error"){r<-NA}
}else if(holdout_switch == "n"){
hold_out_COD <- "NA"
median_hold_out_ratio <- "NA"
print(paste0(user, " chose to skip holdout"))
}
# Model R-Squared ----
# Want direct comparison for models without explicit R-squareds
# Manually calculate R-squared as squared deviations from preductions?
r <- try(summary(model)$adj.r.squared)
if(is.null(r)|class(r)=="try-error"){r<-NA}
# Save Modeling Results ----
# IQR
p25 <- quantile(subset(modeldata, filter_1>=filter_setting_1)$ratio, c(.25), names = FALSE, na.rm=TRUE)
......