Commit d4354ed6 authored by Frédéric Santos's avatar Frédéric Santos

Prepare files for revision R1

parent 74a67a8e
FROM ubuntu:19.10
# Basic config:
### Set non-interactive mode:
ENV DEBIAN_FRONTEND noninteractive
RUN echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf
RUN apt-get update && apt-get install software-properties-common
RUN apt-get install -y \
curl \
git \
gnupg \
gpm \
imagemagick \
ispell \
libacl1 \
libasound2 \
libcanberra-gtk3-module \
libcurl4 \
libcurl4-openssl-dev \
liblcms2-2 \
libdbus-1-3 \
libgif7 \
libgnutls30 \
libgtk-3-0 \
libjansson4 \
libjpeg8 \
libm17n-0 \
libpng16-16 \
librsvg2-2 \
libsm6 \
libssl-dev \
libtiff5 \
libx11-xcb1 \
libxml2 \
libxml++2.6-dev \
libxpm4 \
openssh-client \
texinfo \
### Install basic system packages:
RUN apt-get update && apt-get install software-properties-common
RUN apt-get update && apt-get install -y \
curl \
git \
gnupg \
gpm \
imagemagick \
ispell \
libacl1 \
libasound2 \
libcanberra-gtk3-module \
libcurl4 \
libcurl4-openssl-dev \
liblcms2-2 \
libdbus-1-3 \
libgif7 \
libgnutls30 \
libgtk-3-0 \
libjansson4 \
libjpeg8 \
libm17n-0 \
libpng16-16 \
librsvg2-2 \
libsm6 \
libssl-dev \
libtiff5 \
libx11-xcb1 \
libxml2 \
libxml++2.6-dev \
libxpm4 \
openssh-client \
texinfo \
&& rm -rf /var/lib/apt/lists/*
# Install Latex:
### Install Latex and its dependencies:
RUN add-apt-repository universe
RUN apt-get update && apt-get install -y \
latexmk \
texlive
latexmk \
texlive \
texlive-fonts-extra \
texlive-publishers
# Install R and the R packages used in the article:
### Install R and the R packages used in the article:
RUN apt-get update && apt-get install r-base=3.6.1-4
RUN R -e "options(repos = \
list(CRAN = 'http://mran.revolutionanalytics.com/snapshot/2020-01-05/')); \
install.packages(c('aplpack', 'FactoMineR', 'mvoutlier', 'quantreg', 'remotes', 'robustbase', 'solitude', 'univOutl'))"
RUN R -e "remotes::install_git('https://gitlab.com/f.santos/anthrostat.git', ref = 'v0.1.2')"
list(CRAN = 'http://mran.revolutionanalytics.com/snapshot/2020-01-20/')); \
install.packages(c('aplpack', 'cellWise', 'FactoMineR', 'MASS', 'mvoutlier', 'quantreg', 'remotes', 'robustbase', 'scatterplot3d', 'solitude', 'univOutl'))"
RUN R -e "remotes::install_github('geanes/bioanth', ref = 'b179b396')"
RUN R -e "remotes::install_git('https://gitlab.com/f.santos/anthrostat.git', ref = 'v0.1.4')"
# Install Emacs and its packages:
### Install Emacs 26.3:
RUN apt-get install emacs=1:26.3+1-1ubuntu1
RUN emacs --script /home/article/org_manuscript/init_Santos2020.el
# Move the article files to the Docker (/!\ still have to add bibfile):
RUN mkdir /home/article
RUN mkdir /home/article/org_manuscript
COPY ./model5-names.bst /home/article/org_manuscript/model5-names.bst
COPY ./ox-extra.el /home/article/org_manuscript/ox-extra.el
COPY ./init_Santos2020.el /home/article/org_manuscript/init_Santos2020.el
COPY /manuscript_outliers_Santos_2020.org /home/article/org_manuscript/manuscript_outliers_Santos_2020.org
### Move the article files to the Docker:
RUN mkdir /home/article_jasrep/
RUN mkdir /home/article_jasrep/figures/
COPY ./model5-names.bst /home/article_jasrep/model5-names.bst
COPY ./ox-extra.el /root/.emacs.d/lisp/ox-extra.el
COPY ./init_Santos2020.el /home/article_jasrep/init_Santos2020.el
COPY ./manuscript_outliers_Santos_2020.org /home/article_jasrep/manuscript_outliers_Santos_2020.org
COPY ./complete_biblio.bib /home/article_jasrep/complete_biblio.bib
### Install Emacs packages by running the init file a first time:
RUN emacs --script /home/article_jasrep/init_Santos2020.el
# Launch Emacs:
### Launch Emacs:
COPY ./init_Santos2020.el /root/.emacs.d/init.el
CMD ["emacs"]
CMD ["cd", "/home/article_jasrep"]
CMD ["emacs", "home/article_jasrep/manuscript_outliers_Santos_2020.org"]
An Org source file to reproduce the whole manuscript
====================================================
In search for a perfect reproducibility, this manuscript have been entirely written in [Org mode for Emacs](https://orgmode.org/). The files provided in this folder should allow to rebuild the whole manuscript, in its exact form at the moment of the submission to the *Journal of Archaeological Science: Reports*. All R scripts will be re-evaluated during this process, and the R code chunks can also been evaluated manually in the Org source file.
The following instructions have been mainly tested on Linux, but should work for MS Windows or Mac OS users (maybe with some additional difficulty).
## Prerequisites
1. Please make sure that R and all the required R packages are installed. See [here](https://gitlab.com/f.santos/reproducibility-package-for-santos-2019-jasr/blob/master/README.md) for a complete list and detailed instructions.
2. A complete distribution of LaTeX must be available on your computer.
- **MS Windows users**: most Windows users prefer the [MiKTeX distribution](https://miktex.org/download). The better way is to download the *Net installer* to install a complete MiKTeX distribution, including all the packages available. If you install only a basic MiKTeX distribution, please make sure that the LaTeX package `latexmk` will be installed (this can be done downstream of the installation, through the MiKTeX package manager).
- **Linux users**: most Linux users prefer the [texlive distribution](https://tug.org/texlive/). If your Linux distribution proposes a complete texlive distribution, it is safe to do so. For example, openSuSE users can run the following shell instruction:
sudo zypper install texlive-scheme-full
The users willing to install only a subset of LaTeX packages should at least select the packages for scientific edition, generally grouped under the names `texlive-publishers` and `texlive-science` with most Linux distributions. For example, Manjaro users can run the following shell instruction:
sudo pacman -S texlive-publishers texlive-science
3. Perl must be installed on your computer. This programming language should be natively installed on Linux and Mac OS computers. For MS Windows users, I recommend to install [Strawberry Perl](http://strawberryperl.com/).
4. The last version of [Emacs](https://www.gnu.org/software/emacs/) is required.
5. **MS Windows users only**: please make sure that your *PATH* environment variable includes the paths towards the programs `Rterm.exe` (i.e. the `bin/` subfolder of your R installation) and `latexmk.exe` (i.e. a `bin/` subfolder of your MiKTeX installation).
## Steps to follow
In search for a perfect reproducibility, this manuscript have been entirely written in [Org mode for Emacs](https://orgmode.org/). A Docker image is also available on DockerHub to provide the full computational environment that allowed this study. Using this Docker image, you should be able to rebuild the whole manuscript in its exact form at the moment of the submission to the *Journal of Archaeological Science: Reports*. All R scripts will be re-evaluated during this process, and the R code chunks can also been evaluated manually in the Org source file.
The following instructions have been tested for Linux and Windows. Advanced Mac OS users should also be able to adapt them to their operating system.
## 1. Install prerequisites
### 1.1. Linux users
Simply install Docker on your computer. For example, Linux Manjaro users can execute the following command lines into the shell to install and activate it:
```shell
sudo pacman -Ss docker
sudo systemctl start docker
sudo systemctl enable docker
sudo usermod -aG docker $USER
```
Similar (or even identical) instructions apply for other Linux distributions. For instance, here is a [tutorial for Ubuntu users](https://phoenixnap.com/kb/how-to-install-docker-on-ubuntu-18-04).
### 1.2. Windows users
1. Install [Docker](https://www.docker.com/products/docker-desktop) on your computer from its official website.
2. To improve the way Emacs can be displayed on Windows 10, install [VcXsrv Windows X Server](https://sourceforge.net/projects/vcxsrv/).
## 2. Ways to reproduce the manuscript
The full text of the manuscript, along with the embedded R code which produced each figure and table, can be consulted by opening the file [manuscript_outliers_Santos_2020.org](./manuscript_outliers_Santos_2020.org) directly through the GitLab interface online.
To reproduce the manuscript (i.e., run all the R code and generate all tables and figures again), the easier way is to use a Docker image, as explained below.
### 2.1. Using the Docker image (recommended)
#### 2.1.1. Linux users
Execute the following command line into the shell:
```shell
docker run -it --rm -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix fredsantos/compendium_santos2020_jasrep:R1
```
Emacs should open! Go to section 2.1.3. for more instructions.
#### 2.1.2. Windows users
1. Run Xlaunch from the start menu. Three configuration windows will pop up one after the other. In the first configuration window that shows up, check "Multiple windows", and click next. In the second window, chose the option "Start no client" and click next. In the last window, check all checkboxes (**including the checkbox "Disable access control" which is unchecked by default**). Your configuration is finished.
2. Find out your local ip address by running the following command into a Windows Powershell:
```shell
ipconfig
```
3. Set an environment variable by running the following command into Windows Powershell (replace `<yourip>` with the IP your retrieved from the previous step):
```shell
set-variable -name DISPLAY -value <yourip>:0.0
```
4. You can finally run Emacs by executing the following command into Windows Powershell:
```shell
docker run -it --rm -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix fredsantos/compendium_santos2020_jasrep:R1
```
Emacs should open! Go to section 2.1.3. for more instructions.
### 2.1.3. All users: reproduce the manuscript from Emacs
Once Emacs has opened, you can inspect the full manuscript and the embedded R code. To execute individually each chunk of R code and produce a given figure or table directly into the manuscript, simply press `C-c C-c` (i.e., `Ctrl-c Ctrl-c`) on a line beginning by `#+begin_src R`. R will be called, will execute the whole chunk of R code, and the corresponding result will be displayed just below.
To reproduce the whole manuscript (i.e., execute all chunks of R code at once and get a final pdf), execute the following keybinding: `C-c C-e l o` (i.e., `Ctrl-c Ctrl-e l o`), and press enter to accept the default value of the R working directory. The final pdf file should be displayed directly from within Emacs.
### 2.2. Compiling the Org file locally
This is clearly not the recommended method, and some complications may arise here. However, advanced Emacs users (who also have Perl, R and a complete LaTeX distribution installed) can simply download the Org file and try to compile it locally, without using Docker. The following steps should work:
1. Download and unzip the whole directory `Org_manuscript`.
2. Open a terminal and set this folder as the working directory. Then enter the following command into the shell:
emacs -q -l init_Santos2019.el manuscript_outliers_Santos_2019.org
3. Emacs should open, install the missing Lisp packages if needed, and finally execute a minimal init file. The org source file should show up.
4. Finally, execute the following keybinding (with the Emacs window on the foreground): `C-c C-e l o` (i.e., `Ctrl-c Ctrl-e l o`). Then press enter to accept the default value of the R working directory.
5. The final pdf file should be displayed, and created in the working directory.
## Disclaimer
1. Some files in this repository (e.g., `ox-extra.el` and `model5-names.bst`) are the property of their respective authors. They have made freely available by their authors, and are simply provided here for a greater ease of use.
2. If the previous steps do not work on your computer, feel free to contact me.
```shell
emacs -q -l init_Santos2020.el manuscript_outliers_Santos_2020.org
```
3. Emacs should open, install the missing Lisp packages if needed, and finally execute a minimal init file. The org source file should show up. Instructions given in the previous section still apply to compile the org file.
......@@ -5,13 +5,13 @@
'(("elsarticle" "review" "3p")))
(TeX-add-to-alist 'LaTeX-provided-package-options
'(("inputenc" "utf8") ("fontenc" "T1") ("ulem" "normalem") ("babel" "english") ("mathabx" "matha" "mathb")))
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "href")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperref")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperimage")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperbaseurl")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "nolinkurl")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "url")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "path")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "url")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "nolinkurl")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperbaseurl")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperimage")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperref")
(add-to-list 'LaTeX-verbatim-macros-with-braces-local "href")
(add-to-list 'LaTeX-verbatim-macros-with-delims-local "path")
(TeX-run-style-hooks
"latex2e"
......@@ -37,44 +37,52 @@
(TeX-add-symbols
"med")
(LaTeX-add-labels
"sec:orgeed441b"
"sec:org6cd6a96"
"sec:org3f437ab"
"fig:org403d450"
"sec:orgca31c46"
"sec:orgdcc713c"
"sec:org7d839bf"
"sec:orgb30cad3"
"fig:failure2sd"
"sec:org529599a"
"eq:formula_loc_scale_univ"
"sec:org52409e3"
"sec:orgd78e471"
"eq:mad"
"sec:orgcd6faef"
"eq:sn"
"sec:orgc851503"
"tab:org3e63403"
"sec:orgee27047"
"sec:orgf7c5259"
"tab:comparison_loc_scale_methods"
"sec:orgf6b9c01"
"sec:org8ae0b06"
"eq:boxplot"
"sec:org817fcdc"
"sec:orgff7f9bc"
"eq:adjusted_boxplot"
"sec:orga8d2337"
"fig:org47184de"
"sec:orga619d44"
"sec:orgfddc5ae"
"sec:org1ef0dd9"
"fig:asymGiza"
"sec:orgc2b09ee"
"sec:orgcbd17da"
"eq:maha"
"fig:plot3d_Sayala"
"fig:stripcharts-maha"
"sec:org4d62dd0"
"fig:anomaly_scores_sayala"
"sec:orgc8e1fc9"
"fig:anomaly_scores_dsp2"
"fig:ddc_dsp2"
"sec:orgee5949b"
"sec:org093a091"
"fig:type_outliers_reg"
"fig:robust-corr"
"fig:robust-regression"
"sec:org74165f4"
"fig:bagplot"
"sec:org7e90ad8"
"sec:org020f1b5"
"sec:orgff7b787"
"sec:org1beeccb"
"appendix:rob-scale-estimate"
"sec:org77e2cf0"
"sec:org9c27364"
"eq:mad"
"sec:org3506a35"
"eq:sn"
"sec:org8a30bac"
"appendix:robust-maha"
"eq:robust_maha"
"fig:org5a644e9"
"fig:org30b0333"
"sec:org2ca74d0"
"fig:org5505cf5"
"sec:org96f6bee"
"sec:org82f27cd"
"fig:org2655a96"
"fig:orgbcf90ca"
"fig:org1ed38d9"
"sec:org10fb522"
"fig:org501e243"
"sec:org2d38802"
"sec:org21fac4e"
"sec:orgf117676")
"sec:orgf2f3be7"
"appendix:r-packages")
(LaTeX-add-bibliographies
"../../../../../complete_biblio"))
:latex)
......
This diff is collapsed.
Org_manuscript/figures/anomaly_plot.png

17.4 KB | W: | H:

Org_manuscript/figures/anomaly_plot.png

21.1 KB | W: | H:

Org_manuscript/figures/anomaly_plot.png
Org_manuscript/figures/anomaly_plot.png
Org_manuscript/figures/anomaly_plot.png
Org_manuscript/figures/anomaly_plot.png
  • 2-up
  • Swipe
  • Onion skin
Org_manuscript/figures/bagplot.png

22.6 KB | W: | H:

Org_manuscript/figures/bagplot.png

26 KB | W: | H:

Org_manuscript/figures/bagplot.png
Org_manuscript/figures/bagplot.png
Org_manuscript/figures/bagplot.png
Org_manuscript/figures/bagplot.png
  • 2-up
  • Swipe
  • Onion skin
Org_manuscript/figures/maha-dd.png

8.98 KB | W: | H:

Org_manuscript/figures/maha-dd.png

11.8 KB | W: | H:

Org_manuscript/figures/maha-dd.png
Org_manuscript/figures/maha-dd.png
Org_manuscript/figures/maha-dd.png
Org_manuscript/figures/maha-dd.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -35,8 +35,9 @@
(set-keyboard-coding-system 'utf-8)
(prefer-coding-system 'utf-8)
;; Allow Emacs to execute lisp code in the current directory:
;; Allow Emacs to execute lisp code in given directories:
(add-to-list 'load-path "./")
(add-to-list 'load-path "/root/.emacs.d/lisp/")
;; Load Emacs Speaks Statistics:
(use-package ess
......
This source diff could not be displayed because it is too large. You can view the blob instead.
#+TITLE: First version of the manuscript
You can find in this directory the first version of the manuscript, before any process of peer-review.
Readers can compare it to the final version, and then appreciate the amendments that could be done thanks to the reviewers.
### Load required packages:
library(bioanth)
library(MASS)
library(quantreg)
### Load the Goldman Data Set:
data(goldman)
### Select the population sample "El Hesa":
hesa <- subset(goldman, NOTE == "Dynastic Egyptian, El Hesa")
hesa <- na.omit(hesa[ , c("RTML", "RFML")])
### Scatterplot:
par(cex = 1.12, mar = c(4, 4, 1, 1))
plot(RFML ~ RTML, data = hesa, asp = 1,
xlab = "RTML (mm)", ylab = "RFML (mm)")
## 1. Usual OLS regression line (with outlier):
abline(lm(RFML ~ RTML, data = hesa), lty = 2)
## 2. Usual OLS regression line (without outlier):
abline(lm(RFML ~ RTML, data = hesa[-23, ]), col = "black")
## 3. Robust regression:
abline(rlm(RFML ~ RTML, data = hesa), col = "red")
## 4. Quantile regression:
abline(rq(RFML ~ RTML, data = hesa), col = "blue")
## Add legend:
legend("topleft", lty = c(2, 1, 1, 1), col = c("black", "black", "red", "blue"),
legend = c("OLS (with outlier)",
"OLS (discarding outlier)",
"Robust iterative regression",
"Quantile regression"))
##########################
### Load required packages
##########################
library(aplpack)
library(bioanth)
library(FactoMineR)
#############################
### Load the Goldman Data Set
#############################
data(goldman, package = "bioanth")
## Select a subsample of individuals (Delaware pop. sample):
goldman <- as.data.frame(goldman[ , c("NOTE", "RTMLD", "RTML")])
goldman <- na.omit(subset(goldman, NOTE == "Delaware"))
rownames(goldman) <- 1:nrow(goldman) # relabel the rows
##################
### Draw a bagplot
##################
par(mar = c(4.5, 4.5, 1, 1), cex = 1.15)
bagplot(x = goldman$RTMLD, y = goldman$RTML,
na.rm = TRUE, cex = 1.25,
xlab = "RTMLD (mm)", ylab = "RTML (mm)",
show.center = FALSE, show.whiskers = FALSE)
set.seed(201909) # set seed to ensure reproducibility
autoLab(x = goldman$RTMLD, y = goldman$RTML,
labels = rownames(goldman), cex = 1.1)
## Load the required packages:
library(anthrostat)
library(bioanth)
## Load the Goldman Data Set:
data(goldman, package = "bioanth")
## Select the population sample from Ipituaq (males only):
dat <- subset(goldman, NOTE == 'Ipituaq - Point Hope, AK' & Sex == "M")
## Compute left-right asymmetry in humeral length:
asym <- na.omit(dat$LHML - dat$RHML)
names(asym) <- 1:length(asym) # each individual is given a label
## Set graphical parameters:
par(cex = 1.15, mar = c(4.5, 4.5, 1, 1))
## Perform outliers detection:
id_outl <- norm_outliers(asym, coef = 2)
## Kernel density plot, with decision thresholds for outliers:
plot(id_outl, method = "mean_std", number_id = 2)
##########################
### Load required packages
##########################
library(anthrostat)
library(bioanth)
############################
## Load the Goldman Data Set
############################
data(goldman, package = "bioanth")
## Select the population sample from Ipituaq (males only):
dat <- subset(goldman, NOTE == 'Ipituaq - Point Hope, AK' & Sex == "M")
##################################################
### Compute left-right asymmetry in humeral length
##################################################
asym <- na.omit(dat$LHML - dat$RHML)
names(asym) <- 1:length(asym) # each individual is given a label
#####################################
### Density plot + outliers detection
#####################################
## Set graphical parameters:
par(cex = 1.15, mar = c(4.5, 4.5, 1, 1))
## Perform outliers detection with anthrostat R package:
id_outl <- norm_outliers(asym, coef = 2)
## Kernel density plot, with decision thresholds for outliers:
plot(id_outl, method = "mean_std", number_id = 2)
## Load required packages:
library(bioanth)
library(univOutl)
## Load the Goldman Data Set:
data(goldman)
goldman <- as.data.frame(goldman) # tibble to data.frame
## Select the population sample of Giza:
dat <- subset(goldman, NOTE == "Pyramiden, Gizeh")
## Compute asymmetry in tibia medio-lateral diameter:
dat <- na.omit(dat[ , c("RTMLD", "LTMLD")])
asym <- dat$RTMLD - dat$LTMLD
names(asym) <- 1:length(asym)
## Kernel density estimation:
kde <- density(asym, adjust = 1.4)
## Density plot:
par(cex = 1.15, mar = c(4.5, 4.5, 1, 1))
plot(kde, main = "")
rug(asym, col = "red", lwd = 2)
## Add the names of the most extreme values on the right tail:
text(x = sort(asym, dec = TRUE)[1:4], y = 0, pos = c(3, 4, 2, 3),
labels = names(sort(asym, dec = TRUE)[1:4]), col = "red")
## Add thresholds for outlier detection:
abline(v = boxB(asym, method = "resistant")$fences, # standard fences
col = "darkgoldenrod", lty = 2, lwd = 2)
abline(v = boxB(asym, method = "asymmetric")$fences, # asymmetric fences
col = "purple", lty = 3, lwd = 2)
## Add a legend:
legend("topright", lty = c(2, 3),
col = c("darkgoldenrod", "purple"),
legend = c("Standard boxplot fences",
"Asymmetric boxplot fences")
)
##########################
### Load required packages
##########################
library(bioanth)
library(univOutl)
#############################
### Load the Goldman Data Set
#############################
data(goldman)
goldman <- as.data.frame(goldman) # tibble to data.frame
## Select the population sample of Giza:
dat <- subset(goldman, NOTE == "Pyramiden, Gizeh")
#####################################################
### Compute asymmetry in tibia medio-lateral diameter
#####################################################
dat <- na.omit(dat[ , c("RTMLD", "LTMLD")])
asym <- dat$RTMLD - dat$LTMLD
names(asym) <- 1:length(asym)
#####################################
### Density plot + outliers detection
#####################################
## Kernel density estimation:
kde <- density(asym, adjust = 1.4)
## Density plot:
par(cex = 1.15, mar = c(4.5, 4.5, 1, 1))
plot(kde, main = "")
rug(asym, col = "red", lwd = 2)
## Add the names of the most extreme values on the right tail:
text(x = sort(asym, dec = TRUE)[1:4], y = 0, pos = c(3, 4, 2, 3),
labels = names(sort(asym, dec = TRUE)[1:4]), col = "red")
## Add thresholds for outlier detection:
abline(v = boxB(asym, method = "resistant")$fences, # standard fences
col = "darkgoldenrod", lty = 2, lwd = 2)
abline(v = boxB(asym, method = "asymmetric")$fences, # asymmetric fences
col = "purple", lty = 3, lwd = 2)
## Add a legend:
legend("topright", lty = c(2, 3),
col = c("darkgoldenrod", "purple"),
legend = c("Standard boxplot fences",
"Asymmetric boxplot fences")
)
## Load required packages:
library(bioanth)
library(FactoMineR)
library(scatterplot3d)
## Load the Goldman Data Set:
data(goldman, package = "bioanth")
## Select the population sample "Sayala":
sayala <- subset(goldman, NOTE == "Sayala")
## Select appropriate variables (left bones, 3 max. lengths):
sayala <- na.omit(sayala[ , c("LFML", "LTML", "LHML")])
## Relabel the individuals (more convenient in graphical representation):
rownames(sayala) <- 1:nrow(sayala)
# 3D plot:
s3d <- scatterplot3d(x = sayala[, 1], y = sayala[, 2], z = sayala[, 3],
highlight.3d = TRUE, box = FALSE, type = "h",
pch = 16, lty.hplot = 3,
xlab = "LFML", ylab = "LTML", zlab = "LHML",
mar = c(2.5, 2.5, 0, 2))
text(s3d$xyz.convert(sayala), labels = rownames(sayala), pos = 3, cex = 0.9)
##########################
### Load required packages
##########################
library(bioanth)
library(scatterplot3d)
#############################
### Load the Goldman Data Set
#############################
data(goldman, package = "bioanth")
## Select the population sample "Sayala":
sayala <- subset(goldman, NOTE == "Sayala")
## Select appropriate variables (left bones, 3 max. lengths):
sayala <- na.omit(sayala[ , c("LFML", "LTML", "LHML")])
## Relabel the individuals (more convenient in graphical representation):
rownames(sayala) <- 1:nrow(sayala)
###########
### 3D plot
###########
s3d <- scatterplot3d(x = sayala[, 1], y = sayala[, 2], z = sayala[, 3],
highlight.3d = TRUE, box = FALSE, type = "h",
pch = 16, lty.hplot = 3,
xlab = "LFML (mm)", ylab = "LTML (mm)", zlab = "LHML (mm)",
mar = c(2.5, 2.5, 0, 2))
text(s3d$xyz.convert(sayala), labels = rownames(sayala),
pos = 3, cex = 0.9)
##########################
### Load required packages
##########################
library(bioanth)
library(robustbase)
#############################
### Load the Goldman Data Set
#############################
data(goldman, package = "bioanth")
goldman <- as.data.frame(goldman) # tibble to data.frame
## Select the population sample "Sayala" :
sayala <- subset(goldman, NOTE == "Sayala")
## Select appropriate variables (left bones, 3 max. lengths):
sayala <- na.omit(sayala[ , c("LFML", "LTML", "LHML")])
## Relabel the individuals (more convenient in graphical representation):
rownames(sayala) <- 1:nrow(sayala)
#################################
### Compute Mahalanobis distances
#################################
## Classic distance:
maha <- mahalanobis(sayala, center = colMeans(sayala),
cov = cov(sayala))
## Robust distances:
mcd <- covMcd(sayala, alpha = 0.75,
nsamp = "best")$mah
## Add individual IDs:
names(mcd) <- names(maha) <- rownames(sayala)
#####################################################
### Plot the classic and robust Mahalanobis distances
#####################################################
set.seed(12345) # arbitrary seed to ensure reproducbility
par(cex = 1.15, mar = c(2.5, 4, 1, 1))
stripchart(x = list(maha, mcd), method = "jitter",
vertical = TRUE, group.names = c("Classic", "Robust"),
pch = 16, jitter = 0.04, ylab = "Mahalanobis distances")
## Add thresholds (Pearson quantiles):
abline(h = qchisq(0.99, df = 3), lty = 2, col = "red")
abline(h = qchisq(0.95, df = 3), lty = 2, col = "orange")
## Add the names of the individuals detected as outliers:
text(x = 2, y = sort(mcd, decreasing = TRUE)[1:3],
labels = names(sort(mcd, decreasing = TRUE))[1:3], pos = 2)
text(x = c(0.95, 1.05), y = sort(maha, decreasing = TRUE)[1:2],
labels = names(sort(maha, decreasing = TRUE))[1:2], pos = 3)
## Add the legend:
legend("topleft", lty = 2, col = c("red", "orange"),
legend = c(expression(paste(alpha, " = ", 0.01)),
expression(paste(alpha, " = ", 0.05))))
## Load required packages:
library(bioanth)
library(robustbase)
## Load the Goldman Data Set:
data(goldman, package = "bioanth")
goldman <- as.data.frame(goldman) # tibble to data.frame
## Select the population sample "Sayala" :
sayala <- subset(goldman, NOTE == "Sayala")
## Select appropriate variables (left bones, 3 max. lengths):
sayala <- na.omit(sayala[ , c("LFML", "LTML", "LHML")])
## Relabel the individuals (more convenient in graphical representation):
rownames(sayala) <- 1:nrow(sayala)
# Compute Mahalanobis distances: