Commit 524b2372 authored by Greg Sutcliffe's avatar Greg Sutcliffe

Refactor to put the chart generation in dedicated files

Some of the data manipulation and chart rendering was getting crazy,
so moving it out to a set of dedicated files makes it more readable.
parent 1eace781
......@@ -10,15 +10,7 @@ output:
```{r globals, include=FALSE}
knitr::opts_chunk$set(echo=FALSE,
dev='svg')
library(dplyr)
library(lubridate)
library(ggplot2)
library(gridExtra)
library(forcats)
```
```{r load_data}
raw_csv <- read.csv('./Foreman Community Survey 2018.csv')
source('./graphs.R')
```
As with previous years, we ran a Foreman Community Survey in order to give you
......@@ -50,32 +42,10 @@ The same page-by-page analysis still works, so let's get to it with:
## <a name="page1"></a>Community Metrics & Core
```{r how-long}
data <- tibble(howlong = factor(raw_csv$How.long.have.you.been.using.Foreman.,
levels = c('3 months ago','6 months ago','1 year ago','2 years ago','3 years ago or more'))) %>%
group_by(howlong) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
levels(data$howlong) <- c('3 months','6 months','1 year','2 years','3 years')
plot1 <- ggplot(data, aes(x = howlong, y = perc, label = perc, fill = howlong)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent of replies") +
ggtitle(" Time in community") + expand_limits(y=45) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -20)))
data <- tibble(version = raw_csv$What.Foreman.version.are.you.using.) %>%
group_by(version) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
plot2 <- ggplot(data, aes(x = version, y = perc, label = perc, fill = version)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent of replies") +
ggtitle(" Current Foreman\n version") + expand_limits(y=55) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -40)))
grid.arrange(plot1, plot2, ncol=2)
First, the community itself:
```{r community}
grid.arrange(howlong_plot, geo_plot, version_plot, overall_plot, ncol=2)
```
For age, we see a 10% jump in the 3+ year group here, and a corresponding drop (8% each)
......@@ -94,38 +64,16 @@ A more concrete measure is that the amount of people running an unsupported
version (`$latest.major-2` or older) has decreased by over half (27% last year
to 11% this year). That's good news!
The geography data is pretty much unchanged, as is the overall satisfaction with
the project - 78% of the community give us 4+ on this. Thanks for the positive
vibes, everyone!
```{r hardware}
# TODO - need to melt the raw_csv$What.hardware.do.you.run.Foreman.on. column into proper observations
```
```{r nodes}
data <- tibble(nodes = factor(raw_csv$How.many.nodes.do.you.manage.with.Foreman.,
levels = c('0 - 9','10 - 49','50 - 199','200 - 599',
'600 - 999','1,000 - 9,999','10,000 - 19,999','20,000+'))) %>%
group_by(nodes) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
levels(data$nodes) <- c('0 - 9','10+','50+','200+','600+','1,000+','10,000+','20,000+')
plot1 <- ggplot(data, aes(x = nodes, y = perc, label = perc, fill = nodes)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent of replies") +
ggtitle(" Number of\n hosts") + expand_limits(y=30) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -40)))
data <- tibble(ppl = factor(raw_csv$How.many.people.use.Foreman.in.your.organization,
levels = c('1 - 4','5 - 9','10 - 49','50+'))) %>%
group_by(ppl) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
plot2 <- ggplot(data, aes(x = ppl, y = perc, label = perc, fill = ppl)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent of replies") +
ggtitle("Number of users ") + expand_limits(y=55) +
theme(legend.position = "none", plot.title = element_text(hjust = 1,margin = margin(b = -20)))
grid.arrange(plot1, plot2, ncol=2)
grid.arrange(arrangeGrob(users_plot, hosts_plot, ncol=2), os_plot, ncol=1)
```
Nodes are interesting. We see a 10% *drop* in the 10-49 group, and a
......@@ -139,75 +87,7 @@ reflected (however minutely) in the results.
The users graph is less interesting - broadly this is the same as last year.
```{r}
# Foreman servers
data1 <- raw_csv$In.your.Foreman.deployment..what.operating.system.do.most.of.your.Foreman.servers.use.
data1 <- as.factor(unlist(strsplit(as.character(data1),split = ";"))) %>% # Split semicolon data
fct_collapse(
Windows = c('Windows','Windows Server 2016','windows 2012'),
Ubuntu = 'Ubuntu Server') %>% # Collapse variants
fct_lump(n=6)
data1 <- as_tibble(data1) %>%
group_by(value) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2), type = 'Foreman')
# Hosts
data2 <- raw_csv$In.your.Foreman.deployment..what.operating.system.do.most.of.your.Hosts.use.
data2 <- as.factor(unlist(strsplit(as.character(data2),split = ";"))) %>%
fct_collapse(
Windows = c('Windows','Windows Server 2016','Windows Server / Deskop','Microsoft Windows'),
Ubuntu = c('Ubuntu Server','Ubuntu Desktop','Ubuntu Desktop/Workstation')) %>%
fct_lump(n=6)
data2 <- as_tibble(data2) %>%
group_by(value) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2), type = 'Host')
data <- rbind(data1,data2)
ggplot(data, aes(x = value, y = perc, label = perc, fill = type)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(size = 3, position = position_dodge(0.9), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle("OS choices ") + expand_limits(y=45) +
theme(legend.position = "none", plot.title = element_text(hjust = 1,margin = margin(b = -20)))
```
```{r geography}
# Need to convert "sweden" and "Europe and North America" to the more useful groups
# We have 1 of each, so add 2 'Europe' rows and 1 'North America'
data <- tibble(geo = raw_csv$What.geographical.region.are.you.in.) %>%
add_row(geo = c('Europe','Europe','North America')) %>%
filter(geo != 'sweden' & geo != 'Europe and North America') %>%
group_by(geo) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
ggplot(data, aes(x = geo, y = perc, label = perc, fill = geo)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent of replies") +
ggtitle(" Geographical Region") + expand_limits(y=75) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -20), hjust = 1))
```
This one isn't a surprise, we know our userbase is mostly in Europe. Apparently
our team in India didn't fill out the survey... ;)
```{r overall}
overall <- as_tibble(raw_csv$Overall..how.well.does.Foreman.meet.your.requirements.)
overall <- as_tibble(overall$value[!is.na(overall$value)])
overall <- overall %>%
group_by(value) %>%
summarise(n=n()) %>%
add_row(value = 1, n = 0, .before = 1) %>%
mutate(perc = round((n / sum(n))*100,2))
ggplot(overall, aes(x = value, y = perc, label = perc, fill = value)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent of replies") +
ggtitle(" Overall Rating 1-5\n (5 is best)") + expand_limits(y=55) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -40)))
```
Very similar to last year, 78% of the community give us 4+ on this. Thanks for
the positive vibes, everyone!
The OS chart isn't directly comparable to last year, as I've correctly broken
down the multi-choice answers into seperate results, so the totals actually make
100% now. However, we do see a similar picture - strong preference for CentOS &
RHEL, backed up by Debian & Ubuntu. Nothing new here, I feel.
\ No newline at end of file
This diff is collapsed.
......@@ -9,8 +9,6 @@ output:
As with previous years, we ran a Foreman Community Survey in order to give you
all the opportunity to tell us how we're doing - where it's good, and where
it's bad. That survey closed a while ago, and I'm here to show you the results.
......@@ -40,7 +38,9 @@ The same page-by-page analysis still works, so let's get to it with:
## <a name="page1"></a>Community Metrics & Core
![](InitialAnalysis_files/figure-html/how-long-1.svg)<!-- -->
First, the community itself:
![](InitialAnalysis_files/figure-html/community-1.svg)<!-- -->
For age, we see a 10% jump in the 3+ year group here, and a corresponding drop (8% each)
in the 3 and 6 month groups. This is worrying, it suggests that we need to look
......@@ -58,6 +58,10 @@ A more concrete measure is that the amount of people running an unsupported
version (`$latest.major-2` or older) has decreased by over half (27% last year
to 11% this year). That's good news!
The geography data is pretty much unchanged, as is the overall satisfaction with
the project - 78% of the community give us 4+ on this. Thanks for the positive
vibes, everyone!
![](InitialAnalysis_files/figure-html/nodes-1.svg)<!-- -->
......@@ -73,15 +77,7 @@ reflected (however minutely) in the results.
The users graph is less interesting - broadly this is the same as last year.
![](InitialAnalysis_files/figure-html/unnamed-chunk-1-1.svg)<!-- -->
![](InitialAnalysis_files/figure-html/geography-1.svg)<!-- -->
This one isn't a surprise, we know our userbase is mostly in Europe. Apparently
our team in India didn't fill out the survey... ;)
![](InitialAnalysis_files/figure-html/overall-1.svg)<!-- -->
Very similar to last year, 78% of the community give us 4+ on this. Thanks for
the positive vibes, everyone!
The OS chart isn't directly comparable to last year, as I've correctly broken
down the multi-choice answers into seperate results, so the totals actually make
100% now. However, we do see a similar picture - strong preference for CentOS &
RHEL, backed up by Debian & Ubuntu. Nothing new here, I feel.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
# Community graphs
data <- tibble(howlong = factor(raw_csv$How.long.have.you.been.using.Foreman.,
levels = c('3 months ago','6 months ago','1 year ago','2 years ago','3 years ago or more'))) %>%
group_by(howlong) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
levels(data$howlong) <- c('3 months','6 months','1 year','2 years','3 years')
howlong_plot <- ggplot(data, aes(x = howlong, y = perc, label = perc, fill = howlong)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle(" Time in community") + expand_limits(y=45) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -20)))
data <- tibble(version = raw_csv$What.Foreman.version.are.you.using.) %>%
group_by(version) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
version_plot <- ggplot(data, aes(x = version, y = perc, label = perc, fill = version)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle(" Current Foreman\n version") + expand_limits(y=55) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -40)))
# Geography
# Need to convert "sweden" and "Europe and North America" to the more useful groups
# We have 1 of each, so add 2 'Europe' rows and 1 'North America'
data <- tibble(geo = raw_csv$What.geographical.region.are.you.in.) %>%
add_row(geo = c('Europe','Europe','North America')) %>%
filter(geo != 'sweden' & geo != 'Europe and North America') %>%
group_by(geo) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
geo_plot <- ggplot(data, aes(x = geo, y = perc, label = perc, fill = geo)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle(" Geographical Region") + expand_limits(y=80) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -20), hjust = 1))
# Overall
overall <- as_tibble(raw_csv$Overall..how.well.does.Foreman.meet.your.requirements.)
overall <- as_tibble(overall$value[!is.na(overall$value)])
overall <- overall %>%
group_by(value) %>%
summarise(n=n()) %>%
add_row(value = 1, n = 0, .before = 1) %>%
mutate(perc = round((n / sum(n))*100,2))
overall_plot <- ggplot(overall, aes(x = value, y = perc, label = perc, fill = value)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle(" Overall Rating\n (5 is best)") + expand_limits(y=55) +
theme(legend.position = "none", plot.title = element_text(margin = margin(b = -40)))
# Hardware graphs
data <- tibble(nodes = factor(raw_csv$How.many.nodes.do.you.manage.with.Foreman.,
levels = c('0 - 9','10 - 49','50 - 199','200 - 599',
'600 - 999','1,000 - 9,999','10,000 - 19,999','20,000+'))) %>%
group_by(nodes) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
levels(data$nodes) <- c('0 - 9','10+','50+','200+','600+','1,000+','10,000+','20,000+')
hosts_plot <- ggplot(data, aes(x = nodes, y = perc, label = perc, fill = nodes)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle("Number of hosts ") + expand_limits(y=35) +
theme(legend.position = "none", plot.title = element_text(hjust = 1,margin = margin(b = -40)))
data <- tibble(ppl = factor(raw_csv$How.many.people.use.Foreman.in.your.organization,
levels = c('1 - 4','5 - 9','10 - 49','50+'))) %>%
group_by(ppl) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2))
users_plot <- ggplot(data, aes(x = ppl, y = perc, label = perc, fill = ppl)) +
geom_bar(stat = 'identity') +
geom_text(size = 3, position = position_stack(), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle("Number of users ") + expand_limits(y=60) +
theme(legend.position = "none", plot.title = element_text(hjust = 1,margin = margin(b = -20)))
# Foreman servers
data1 <- raw_csv$In.your.Foreman.deployment..what.operating.system.do.most.of.your.Foreman.servers.use.
data1 <- as.factor(unlist(strsplit(as.character(data1),split = ";"))) %>% # Split semicolon data
fct_collapse(
Windows = c('Windows','Windows Server 2016','windows 2012'),
Ubuntu = 'Ubuntu Server') %>% # Collapse variants
fct_lump(n=6)
data1 <- as_tibble(data1) %>%
group_by(value) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2), type = 'Foreman')
# Hosts
data2 <- raw_csv$In.your.Foreman.deployment..what.operating.system.do.most.of.your.Hosts.use.
data2 <- as.factor(unlist(strsplit(as.character(data2),split = ";"))) %>%
fct_collapse(
Windows = c('Windows','Windows Server 2016','Windows Server / Deskop','Microsoft Windows'),
Ubuntu = c('Ubuntu Server','Ubuntu Desktop','Ubuntu Desktop/Workstation')) %>%
fct_lump(n=6)
data2 <- as_tibble(data2) %>%
group_by(value) %>%
summarise(n=n()) %>%
mutate(perc = round((n / sum(n))*100,2), type = 'Host')
data <- rbind(data1,data2)
os_plot <- ggplot(data, aes(x = value, y = perc, label = perc, fill = type)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(size = 3, position = position_dodge(0.9), vjust = -0.5) +
labs(x = "", y = "Percent") +
ggtitle("OS choices ") + expand_limits(y=47) +
theme(legend.position = "none", plot.title = element_text(hjust = 1,margin = margin(b = -20)))
library(dplyr)
library(lubridate)
library(ggplot2)
library(gridExtra)
library(forcats)
# Load data
raw_csv <- read.csv('./Foreman Community Survey 2018.csv')
# Generate graphs
files <- list.files(path = './graph-code/', pattern = '*.R$' )
for (f in files) { source(file.path('./graph-code/',f))}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment