Commit 4ced2caa authored by Sean Manzi's avatar Sean Manzi

updated files for upload

parent 1cb6fc3c
#[p]
#Inititate the required libraries
#[12]
library(psych)
library(plyr)
library(reshape2)
library(ggplot2)
#[F]
#Read in the data
#[4]
data <- read.csv("workshopRawData.csv")
#[E]
#Convert dates
#[11]
data$Referral_Date <- as.Date(data$Referral_Date)
data$Treat_Date <- as.Date(data$Treat_Date)
#[L]
#Check the data for errors
#Missing data
#[6]
id_na <- sum(is.na(data$ID))
age_na <- sum(is.na(data$Age))
ref_na <- sum(is.na(data$Referral_Date))
trt_na <- sum(is.na(data$Treat_Date))
ser_na <- sum(is.na(data$Service_Name))
#[A]
#Category typos, ommissions and duplications
#[16]
id_uni <- unique(data$ID)
age_uni <- unique(data$Age)
ser_uni <- unique(data$Service_Name)
id_dup <- count(data, 'ID')
id_dup_sub <- subset(id_dup,id_dup$freq > 1)
dups <- id_dup_sub$ID
dupRows <- data[data$ID %in% dups,]
#[C]
#Incorrectly entered date stamps
#[9]
dataTime <- data
dataTime$time_check <- dataTime$Treat_Date - dataTime$Referral_Date
time_sub <- subset(dataTime, dataTime$time_check < 0)
#[M]
#Swap incorrectly entered date stamps
#[2]
time_rows <- as.integer(row.names(time_sub))
for (i in 1:length(time_rows)){
# print(c("Before ",data[time_rows[i],]))
a <- data[time_rows[i],3]
b <- data[time_rows[i],4]
data[time_rows[i],3] <- b
data[time_rows[i],4] <- a
# print(c("After ",data[time_rows[i],]))
}
#[J]
#Remove patients without team entries
#[13]
omiss <- subset(data, data$Service_Name == "")
omiss_rows <- as.integer(row.names(omiss))
dataClean <- data[-c(omiss_rows),]
#[O]
#Calculate the patient wait times
#[8]
dataClean$waitTime <- as.integer(dataClean$Treat_Date - dataClean$Referral_Date)
waitDes <- describe(dataClean$waitTime)
#[H]
#Subset wait time data by month
#[7]
startDate <- min(dataClean$Referral_Date)
endDate <- max(dataClean$Treat_Date, na.rm=TRUE)
numMonths <- length(seq(from=startDate, to=endDate, by='month'))
months <- seq(from=startDate, to=endDate, by='month')
monthData <- list()
waitList <- list()
for (i in 1:(numMonths-1)){
a <- dataClean[which(dataClean$Treat_Date > months[i+1] &
dataClean$Referral_Date < months[i+1]),]
monthData[[i]] <- a
waitList[[i]] <- data.frame(a$waitTime,a$Service_Name)
}
#[N]
#Convert wait list into single multilevel dataframe
#[5]
dfWait <- melt(waitList)
dfWait <- droplevels(dfWait)
#[K]
#Patient numbers barchart and descriptives
#[14]
waitListTab <- table(dfWait$L1)
waitListDf <- as.data.frame(waitListTab)
waitListDes <- describe(waitListDf$Freq)
barplot(waitListTab, xlab="Month", ylab="Frequency",
main="Number of patients on the waiting list",
cex.names = 0.8)
#[G]
#Patient numbers by team barchart and descriptives
#[10]
teamWaitListTab <- table(dfWait$a.Service_Name,dfWait$L1)
teamWaitListDf <- as.data.frame(teamWaitListTab)
teamWaitListDes <- describeBy(teamWaitListDf$Freq,teamWaitListDf$Var1)
barplot(teamWaitListTab,beside=TRUE, xlab="Month", ylab="Frequency",
main="Number of patients on the waiting list by team",
cex.names = 0.8, legend=TRUE,
args.legend=list(x="topright", legend=c("Team 1", "Team 2")))
abline(h=mean(teamWaitListTab[1,]))
abline(h=mean(teamWaitListTab[2,]), lty=2)
#[I]
#Patient waiting times boxplot and descriptives by month
#[1]
timeWaitTimePlot <- qplot(factor(L1), value, data=dfWait, geom='boxplot')
timeWaitTimePlot + xlab("Month") + ylab("Waiting time (days)") +
ggtitle("Patient waiting times for service X over time")
waitTimeDes <- describeBy(dfWait$value,group=dfWait$L1)
#[B]
#Patient waiting times boxplot and descriptives by team
#[3]
teamWaitTimePlot <- qplot(a.Service_Name,value, data=dfWait, geom='boxplot')
teamWaitTimePlot + xlab("Month") + ylab("Waiting time (days)") +
ggtitle("Patient waiting times for service by team")
teamWaitTimeDes <- describeBy(dfWait$value, group=dfWait$a.Service_Name)
#[D]
#Patient waiting times boxplot and descriptives by month and team
#[15]
dfWait$ServTime <- interaction(dfWait$a.Service_Name,dfWait$L1)
breaks <- c("Team1.1","Team1.2","Team1.3","Team1.4","Team1.5",
"Team1.6","Team1.7","Team1.8","Team1.9","Team1.10",
"Team1.11","Team1.12","Team1.13","Team1.14","Team1.15",
"Team1.16","Team1.17","Team1.18","Team1.19","Team1.20",
"Team1.21","Team1.22","Team1.23")
lab <- as.character(seq(1,23))
teamTimeWaitTimePlot <- qplot(factor(ServTime), value, data=dfWait, fill=a.Service_Name,
geom='boxplot')
teamTimeWaitTimePlot + scale_x_discrete(name="Month", breaks=breaks, labels=lab) +
ylab("Waiting Time")
teamTimeWaitTimeDes <- describeBy(dfWait$value, group=list(dfWait$a.Service_Name,dfWait$L1))
#[B]
#Inititate the required libraries
#[7]
library(psych)
library(plyr)
library(reshape2)
library(ggplot2)
#[C]
#Read in the data
#[13]
data <- read.csv("workshopRawData.csv")
#[G]
#Get list of column names
#[2]
name_list <- function(data){
nList <- as.list(colnames(data))
print("The structure of the data is as follows")
print("Any dates will need to be converted from factor to Date format")
str(data)
return(nList)
}
nList <- name_list(data) #Inputs: dataframe
#[I]
#Convert dates
#[15]
convertDates <- function(data, cols){
for (i in 1:length(cols)){
data[[cols[i]]] <- as.Date(data[[cols[i]]])
print(paste("data column",cols[i],"converted to Date format"))
}
return(data)
}
data <- convertDates(data, c(3,4))
#Inputs: dataframe, columns to be converted to date format
#[F]
#Check the data for errors
#Missing data
#[3]
missing_data <- function(data){
colsN <- colnames(data)
cols <- seq(1, length(colsN))
missList <- list()
for (i in 1:length(cols)){
missList[i] <- sum(is.na(data[[cols[i]]]))
if (missList[i] > 0){
print(paste("In column",cols[i],"there are",missList[i],"missing data points"))
}
}
return(missList)
}
missList <- missing_data(data) #Inputs: dataframe
#[K]
#Category typos, ommissions and duplications
#[9]
typo_check <- function(data,cols){
typoList <- list()
for (i in 1:length(cols)){
typoList[[i]] <- unique(data[[cols[i]]])
print(paste("There are",length(typoList[[i]]),"unique entries in column",cols[i]))
}
return(typoList)
}
typoList <- typo_check(data, c(1,2,5))
#Inputs: dataframe, columns to check for number of unique categories
dup_check <- function(data,nList,idCol){
idName <- nList[[idCol]]
id_dup <- count(data, idName)
id_dup_sub <- subset(id_dup,id_dup$freq > 1)
dups <- id_dup_sub[[idCol]]
dupRows <- data[data[[idCol]] %in% dups,]
print("The rows with duplicated ID's are...")
print(dupRows)
return(dupRows)
}
dupRows <- dup_check(data,nList,1)
#Inputs: dataframe, list of column names, column containing unique ID's
#[M]
#Incorrectly entered date stamps
#[11]
date_check <- function(data){
dataTime <- data
dataTime$time_check <- dataTime$Treat_Date - dataTime$Referral_Date
time_sub <- subset(dataTime, dataTime$time_check < 0)
print("The following rows likely have incorrectly entered date stamps...")
print(time_sub)
return(time_sub)
}
time_sub <- date_check(data) #Inputs: dataframe
#[L]
#Swap incorrectly entered date stamps
#[14]
correct_dates <- function(data,time_sub,dColOne,dColTwo){
time_rows <- as.integer(row.names(time_sub))
for (i in 1:length(time_rows)){
print("The entries")
print(data[time_rows[i],])
a <- data[time_rows[i],dColOne]
b <- data[time_rows[i],dColTwo]
data[time_rows[i],dColOne] <- b
data[time_rows[i],dColTwo] <- a
print("Have been changed to")
print(data[time_rows[i],])
}
return(data)
}
data <- correct_dates(data,time_sub,3,4)
#Inputs: dataframe,
#subset dataframe of rows with incorrectly entered timestamps,
#column with referral date, column with treatment date
#[H]
#Remove patients without team entries
#[1]
omission_removal <- function(data,cols,targets){
for (i in 1:length(cols)){
for (j in 1:length(targets)){
omiss <- subset(data, data[[cols[i]]] == targets[j])
omiss_rows <- as.integer(row.names(omiss))
dataClean <- data[-c(omiss_rows),]
print("The following rows have been removed")
print(data[omiss_rows,])
}
}
return(dataClean)
}
dataClean <- omission_removal(data,c(5),c(""))
#Inputs: dataframe, column(s) to check for targets, target(s)
#[E]
#Calculate the patient wait times
#[18]
calc_wait <- function(dataClean,dateCols){
dataClean$waitTime <- as.integer(dataClean[[dateCols[2]]] - dataClean[[dateCols[1]]])
print("The wait time column has been added to the dataframe dataClean")
return(dataClean)
}
dataClean <- calc_wait(dataClean,c(3,4))
#Inputs: dataframe, columns to calculate wait times
#[Q]
#Generate wait time descriptives and histogram - Aggregate
#[12]
top_level_des <- function(dataClean,colNum){
waitDes <- describe(dataClean[colNum])
write.csv(waitDes,"top_level_descriptives.csv")
jpeg("wait_time_hist.jpeg")
waitHist <- hist(dataClean[[colNum]],main="Frequency of wait times for all patients",
xlab="Wait time")
dev.off()
}
top_level_des(dataClean,6) #Inputs: dataframe, column to describe and plot
#[A]
#Subset wait time data by month
#(patients treated and still awaiting treatment during a given month)
#[17]
data_monthly <- function(dataClean,refDate,treatDate){
startDate <- min(dataClean[,refDate])
endDate <- max(dataClean[,treatDate], na.rm=TRUE)
months <- seq(from=startDate, to=endDate, by='month')
numMonths <- length(months)
waitList <- list()
errorCount <- 0
for (i in 1:(numMonths-1)){
a <- dataClean[which(dataClean[[treatDate]] > months[i] &
dataClean[[refDate]] < months[i+1]),]
actWait <- rep(0,length(a[[1]]))
for (j in 1:length(a[[1]])){
if (a[j,treatDate] < months[i+1]){
actWait[j] <- a[j,treatDate] - a[j,refDate]
}else {
actWait[j] <- months[i+1] - a[j,refDate]
}
if (actWait[j] < 0){
errorCount <- errorCount+1
print("Negative value in month",i,"row",j)
}
}
a$activeWaitTime <- actWait
waitList[[i]] <- data.frame(a$activeWaitTime,a$Service_Name)
}
if (errorCount > 0){
print(paste("Warning:",errorCount,"negative values detected check the output"))
} else{
print("Calculations completed. No negative values detected")
}
return(waitList)
}
waitList <- data_monthly(dataClean,3,4) #Inputs: dataframe, referral date column,
#treatment date column
#[D]
#Convert wait list into single multilevel dataframe
#[5]
convert_list <- function(waitList){
dfWait <- melt(waitList)
dfWait <- droplevels(dfWait)
print("The column names for the analysis are...")
print(colnames(dfWait))
return(dfWait)
}
dfWait <- convert_list(waitList) #Inputs: dataframe
#[P]
#Patient numbers barchart and descriptives
#[6]
patient_nums <- function(dfWait,addDate){
waitListTab <- table(dfWait$L1)
waitListDf <- as.data.frame(waitListTab)
waitListDes <- describe(waitListDf$Freq)
if (addDate == 1){
write.csv(waitListDes,paste(Sys.Date(),"wait_list_numbers_monthly_des.csv",
sep="_"))
jpeg(paste(Sys.Date(),"wait_list_numbers_monthly.csv",sep=""))
}else{
write.csv(waitListDes,"wait_list_numbers_monthly_des.csv")
jpeg("wait_list_numbers_monthly.jpeg")
}
barplot(waitListTab, xlab="Month", ylab="Frequency",
main="Number of patients on the waiting list",
cex.names = 0.8)
dev.off()
}
patient_nums(dfWait,1) #Inputs: dataframe, use 1 to add todays date to filenames
#[R]
#Patient numbers by team barchart and descriptives
#[16]
team_patient_nums <- function(dfWait){
teamWaitListTab <- table(dfWait$a.Service_Name,dfWait$L1)
teamWaitListDf <- as.data.frame(teamWaitListTab)
teamWaitListDes <- describeBy(teamWaitListDf$Freq,teamWaitListDf$Var1)
teamWaitListDes <- do.call(rbind.data.frame,teamWaitListDes)
jpeg("team_wait_list_numbers_monthly.jpeg")
barplot(teamWaitListTab,beside=TRUE, xlab="Month", ylab="Frequency",
main="Number of patients on the waiting list by team",
cex.names = 0.8, legend=TRUE,
args.legend=list(x="topright", legend=c("Team 1", "Team 2")))
abline(h=mean(teamWaitListTab[1,]))
abline(h=mean(teamWaitListTab[2,]), lty=2)
dev.off()
write.csv(teamWaitListDes,"team_wait_list_numbers_monthly_des.csv")
}
team_patient_nums(dfWait) #Inputs: dataframe
#[J]
#Patient waiting times boxplot and descriptives by month
#[8]
time_patient_wait_time <- function(dfWait){
jpeg("wait_times_monthly.jpeg")
timeWaitTimePlot <- qplot(factor(L1), value, data=dfWait, geom='boxplot')
timeWaitTimePlot + xlab("Month") + ylab("Waiting time (days)") +
ggtitle("Patient waiting times for service X over time")
dev.off()
waitTimeDes <- describeBy(dfWait$value,group=dfWait$L1)
waitTimeDes <- do.call(rbind.data.frame,waitTimeDes)
write.csv(waitTimeDes,"wait_times_monthly_des.csv")
}
time_patient_wait_time(dfWait) #Inputs: dataframe
#[N]
#Patient waiting times boxplot and descriptives by team
#[10]
team_patient_wait_time <- function(dfWait){
jpeg("wait_times_team.jpeg")
teamWaitTimePlot <- qplot(a.Service_Name,value, data=dfWait, geom='boxplot')
teamWaitTimePlot + xlab("Month") + ylab("Waiting time (days)") +
ggtitle("Patient waiting times for service by team")
dev.off()
teamWaitTimeDes <- describeBy(dfWait$value, group=dfWait$a.Service_Name)
teamWaitTimeDes <- do.call(rbind.data.frame,teamWaitTimeDes)
write.csv(teamWaitTimeDes,"wait_times_team_des.csv")
}
team_patient_wait_time(dfWait) #Inputs: dataframe
#[O]
#Patient waiting times boxplot and descriptives by month and team
#[4]
team_time_patient_wait_time <- function(dfWait){
dfWait$ServTime <- interaction(dfWait$a.Service_Name,dfWait$L1)
breaks <- dfWait$ServTime[seq(1,length(levels(dfWait$ServTime)),2)]
lab <- as.character(seq(1,length(breaks)))
jpeg("wait_times_monthly_team.jpeg")
teamTimeWaitTimePlot <- qplot(factor(ServTime), value, data=dfWait,
fill=a.Service_Name, geom='boxplot')
teamTimeWaitTimePlot + scale_x_discrete(name="Month", breaks=breaks, labels=lab) +
ylab("Waiting Time")
dev.off()
teamTimeWaitTimeDes <- describeBy(dfWait$value,
group=list(dfWait$a.Service_Name,dfWait$L1))
teamTimeWaitTimeDes <- do.call(rbind.data.frame,teamTimeWaitTimeDes)
write.csv(teamTimeWaitTimeDes,"wait_times_monthly_team.csv")
}
team_time_patient_wait_time(dfWait) #Inputs: dataframe
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment