Commit f41019c7 authored by Jefferson F. Paril's avatar Jefferson F. Paril 😁

making the R analysis scripts more generic

parent c04ccb2e
......@@ -31,7 +31,8 @@ ADDITIONAL_METRICS <- function(DATA, START_METRIC, END_METRIC) {
for (i in 1:(nDAYS-1)) {
for (j in (i+1):nDAYS) {
DELTA = METRICS[, (((j-1)*nMETRICSPerDay)+1):(j*nMETRICSPerDay)] - METRICS[, (((i-1)*nMETRICSPerDay)+1):(i*nMETRICSPerDay)]
NAMES = paste("DELTA", substr(DAYS[i],4,nchar(DAYS[i])), substr(DAYS[j],4,nchar(DAYS[j])), substr(colnames(DELTA), start=6, stop=nchar(colnames(DELTA))), sep="")
# NAMES = paste("DELTA", substr(DAYS[i],4,nchar(DAYS[i])), substr(DAYS[j],4,nchar(DAYS[j])), substr(colnames(DELTA), start=6, stop=nchar(colnames(DELTA))), sep="")
NAMES = paste("DELTA", DAYS[i], DAYS[j], gsub(DAYS[j], "", colnames(DELTA)), sep="")
colnames(DELTA) = NAMES
DATA = cbind(DATA, DELTA)
}
......@@ -167,7 +168,7 @@ CROSS.VALIDATION <- function(DATA, METRICS, ITERATIONS, FOLDS, NEXPLANATORY="ONE
OUT = data.frame(her1, her2, metric, iter, fold, correct, wrong, performance)
colnames(OUT) = c("TRAIN_HERBI", "TEST_HERBI", "METRIC", "ITER", "FOLD", "CORRECT", "WRONG", "PERFORMANCE")
} else {print("INVALID ANALYSIS TYPE. CHOOSE FROM 'ALL', 'POPULATIONS' and 'HERBICIDES'"); OUT = 0}
############################
### ###
### USE ALL THE METRICS ###
......@@ -343,7 +344,7 @@ PLOT.GRAPHS.SAVE.PREDICTORS <- function(TRAINING_DATA, VALIDATION_DATA, METRICS,
if (length(pred)>5000) {shap.pvalue = shapiro.test(pred[1:5000])$p.value} else {shap.pvalue = shapiro.test(pred)$p.value}
plot(density(outliersRemoved), col="blue", main="Density Plot of Predicted Values", sub=paste("Shapiro-Wilks Test p-value = ", format(shap.pvalue, scientific=T, digits=3), sep=""))
qqnorm(pred, main="QQ Plot of Predicted Values"); qqline(pred, col="red")
pr = prediction(pred, VALIDATION_DATA$SURVI)
prf = performance(pr, measure="tpr", x.measure="fpr")
auc = performance(pr, measure="auc")@y.values[[1]]
......@@ -421,7 +422,8 @@ for (m in METRICS_ALL) {
#remove missing pots
#DATA = DATA[complete.cases(DATA), ] #remove rows with at least 1 NA #I'm removing too much!! DARN!
DATA = DATA[DATA$DAY00_AREA>0, ] #remove empty pots at day 0
# DATA = DATA[DATA$DAY00_AREA>0, ] #remove empty pots at day 0
DATA = eval(parse(text=paste0("DATA[DATA$", levels(dat$DAY)[1], "_AREA>0, ]"))) #remove empty pots at day 0
# DATA = DATA[DATA$DAY07_AREA!=0, ] #remove empty pots at day 7
# DATA = DATA[DATA$DAY14_AREA!=0, ] # remove empty pots at dat 14
DATA = DATA[, colSums(is.na(DATA))==0]
......@@ -490,7 +492,7 @@ for (pop1 in levels(DATA$POPUL)) {
}
for (her1 in levels(DATA$HERBI)) {
for (her2 in levels(DATA$HERBI)) {
tryCatch(PLOT.GRAPHS.SAVE.PREDICTORS(TRAINING_DATA=subset(DATA, HERBI==her1), VALIDATION_DATA=subset(DATA, HERBI==her2), METRICS=METRICS_ALL, training_name=her1, validation_name=her2),
tryCatch(PLOT.GRAPHS.SAVE.PREDICTORS(TRAINING_DATA=subset(DATA, HERBI==her1), VALIDATION_DATA=subset(DATA, HERBI==her2), METRICS=METRICS_ALL, training_name=her1, validation_name=her2),
error=function(e) {print(paste0(her1, " x ", her2, " - ERROR!"))})
}
}
......
......@@ -8,7 +8,10 @@ library(ROCR)
#import data
# dat = read.csv("EXP16_ANALYSIS_INPUT.csv")
QUANTITATIVE_RESISTANCE_ESTIMATE <- function(dat) {
QUANTITATIVE_RESISTANCE_ESTIMATE <- function(dat, METRICS=c("AREA", "BLUE", "GREEN", "RED", "GREEN_INDEX",
"GREEN_FRACTION", "AUC_BLUE", "AUC_GREEN", "AUC_RED",
"THRESH_GREEN1", "THRESH_GREEN2", "THRESH_RED1", "THRESH_RED2", "MEDIAN_BLUE",
"MEDIAN_GREEN", "MEDIAN_RED")) {
#order
dat = dat[order(dat$REPLI), ]
dat = dat[order(dat$PLANT), ]
......@@ -29,7 +32,7 @@ QUANTITATIVE_RESISTANCE_ESTIMATE <- function(dat) {
#decompose metrics per measurement time and cbind into the new data set
DAYS = levels(dat$DAY)
METRICS = colnames(dat)[7:ncol(dat)]
# METRICS = colnames(dat)[7:ncol(dat)]
for (i in DAYS) {
SUBSET = subset(dat, DAY==i)
for (j in METRICS) {
......@@ -48,7 +51,8 @@ QUANTITATIVE_RESISTANCE_ESTIMATE <- function(dat) {
for (i in 1:(nDAYS-1)) {
for (j in (i+1):nDAYS) {
DELTA = METRICS[, (((j-1)*nMETRICSPerDay)+1):(j*nMETRICSPerDay)] - METRICS[, (((i-1)*nMETRICSPerDay)+1):(i*nMETRICSPerDay)]
NAMES = paste("DELTA", substr(DAYS[i],4,nchar(DAYS[i])), substr(DAYS[j],4,nchar(DAYS[j])), substr(colnames(DELTA), start=6, stop=nchar(colnames(DELTA))), sep="")
# NAMES = paste("DELTA", substr(DAYS[i],4,nchar(DAYS[i])), substr(DAYS[j],4,nchar(DAYS[j])), substr(colnames(DELTA), start=6, stop=nchar(colnames(DELTA))), sep="")
NAMES = paste("DELTA", DAYS[i], DAYS[j], gsub(DAYS[j], "", colnames(DELTA)), sep="")
colnames(DELTA) = NAMES
DATA = cbind(DATA, DELTA)
}
......@@ -78,7 +82,8 @@ QUANTITATIVE_RESISTANCE_ESTIMATE <- function(dat) {
#remove missing pots
#DATA = DATA[complete.cases(DATA), ] #remove rows with at least 1 NA #I'm removing too much!! DARN!
DATA = DATA[DATA$DAY00_AREA>0, ] #remove empty pots at day 0
# DATA = DATA[DATA$DAY00_AREA>0, ] #remove empty pots at day 0
DATA = eval(parse(text=paste0("DATA[DATA$", levels(dat$DAY)[1], "_AREA>0, ]"))) #remove empty pots at day 0
DATA = DATA[!is.na(DATA$SURVI), ] #remove empty pots at day 0
# DATA = DATA[DATA$DAY07_AREA!=0, ] #remove empty pots at day 7
# DATA = DATA[DATA$DAY14_AREA!=0, ] # remove empty pots at dat 14
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment