Commit 97423978 authored by Gjalt-Jorn Peters's avatar Gjalt-Jorn Peters

Updated pkgdown

parent 2ef8c08a
Pipeline #94418584 failed with stages
in 11 minutes and 44 seconds
......@@ -3,6 +3,7 @@
S3method(plot,rockParsedSources)
S3method(print,rockParsedSource)
S3method(print,rockParsedSources)
export(add_html_tags)
export(apply_graph_theme)
export(base30toNumeric)
export(cat0)
......@@ -13,6 +14,7 @@ export(code_sources)
export(collapse_occurrences)
export(collect_coded_fragments)
export(create_cooccurrence_matrix)
export(export_to_html)
export(extract_codings_by_coderId)
export(generate_uids)
export(load_source)
......
#' Add HTML tags to a source
#'
#' This function adds HTML tags to a source to allow pretty printing/viewing.
#'
#' @inheritParams parsing_sources
#' @param x A character vector with the source
#' @param codeClass The classes to use for, respectively, codes,
#' identifiers (such as case identifiers or coder identifiers), section
#' breaks, utterance identifiers, and full utterances. All `<span>` elements
#' except for the full utterances, which are placed in `<div>` elements.
#'
#' @return The character vector witht he replacements made.
#' @export
#'
#' @examples add_html_tags("[[cid=participant1]]
#' This is something this participant may have said.
#' Just like this. [[thisIsACode]]
#' ---paragraph-break---
#' And another utterance.");
add_html_tags <- function(x,
codeRegexes = c(codes = "\\[\\[([a-zA-Z0-9._>-]+)\\]\\]"),
idRegexes = c(caseId = "\\[\\[cid[=:]([a-zA-Z0-9._-]+)\\]\\]",
......@@ -9,10 +28,20 @@ add_html_tags <- function(x,
codeClass = "code",
idClass = "identifier",
sectionClass = "sectionBreak",
uidClass = "uid") {
uidClass = "uid",
utteranceClass = "utterance") {
res <- x;
### First replace smaller than and bigger than symbols
### with the corresponding entities
res <- gsub("<", "&lt;", res, fixed=TRUE);
res <- gsub(">", "&gt;", res, fixed=TRUE);
### Also replace <> symbols in all codeRegexes
codeRegexes <- gsub("<", "&lt;", codeRegexes, fixed=TRUE);
codeRegexes <- gsub(">", "&gt;", codeRegexes, fixed=TRUE);
### Add html tags
for (currentCodeRegexName in names(codeRegexes)) {
currentCodeRegex <- codeRegexes[currentCodeRegexName];
......@@ -48,6 +77,10 @@ add_html_tags <- function(x,
}
}
### Also replace <> symbols in all sectionRegexes
sectionRegexes <- gsub("<", "&lt;", sectionRegexes, fixed=TRUE);
sectionRegexes <- gsub(">", "&gt;", sectionRegexes, fixed=TRUE);
### Add break tags
for (currentBreakRegexName in names(sectionRegexes)) {
currentBreakRegex <- sectionRegexes[currentBreakRegexName];
......@@ -75,6 +108,10 @@ add_html_tags <- function(x,
}
}
### Also replace <> symbols in all idRegexes
idRegexes <- gsub("<", "&lt;", idRegexes, fixed=TRUE);
idRegexes <- gsub(">", "&gt;", idRegexes, fixed=TRUE);
### Add identifier tags
for (currentIdRegexName in names(idRegexes)) {
currentIdRegex <- idRegexes[currentIdRegexName];
......@@ -96,6 +133,8 @@ add_html_tags <- function(x,
'">\\1</span>'),
res);
res <- paste0('<div class="', utteranceClass, '">', res, '</div>\n');
return(res);
}
......@@ -21,7 +21,7 @@
#' brackets).
#' @param indices A logical vector of the same length as `input`
#' that indicates to which utterance the code in `codes` should be
#' applied. Note that if `indiced` is provided, only the first
#' applied. Note that if `indices` is provided, only the first
#' element of `codes` is used, and its name is ignored.
#' @param codeDelimiters A character vector of two elements
#' specifying the opening and closing delimiters of codes (conform
......@@ -60,7 +60,7 @@
#' ### Show line 71
#' cat(codedExample[71]);
#'
#' ### Also add code "foo" to utteranced with code 'ipsum'
#' ### Also add code "foo" to utterances with code 'ipsum'
#' moreCodedExample <- code_source(codedExample,
#' c("[[ipsum]]" = "foo"));
#'
......
......@@ -87,8 +87,14 @@ export_to_html <- function(input,
"</div>\n";
if ("rockParsedSource" %in% class(input)) {
res <-
add_html_tags(input$rawSourceDf$utterances_raw);
add_html_tags(x = input$rawSourceDf$utterances_raw,
codeRegexes = input$arguments$codeRegexes,
idRegexes = input$arguments$idRegexes,
sectionRegexes = input$arguments$sectionRegexes,
uidRegex = input$arguments$uidRegex,
inductiveCodingHierarchyMarker = input$arguments$inductiveCodingHierarchyMarker);
res <- paste0(utterancePre, res, utterancePost);
res <- paste0(htmlPre,
fullCSS,
......@@ -133,6 +139,7 @@ export_to_html <- function(input,
dir.create(output,
recursive = TRUE);
}
res <-
lapply(filenames,
function(x) {
......@@ -140,7 +147,10 @@ export_to_html <- function(input,
export_to_html(input=input$parsedSources[[x]],
output=file.path(output,
paste0(basename(x), ".html")),
preventOverwriting = preventOverwriting);
template = template,
preventOverwriting = preventOverwriting,
encoding = encoding,
silent=silent);
});
} else {
stop("As argument 'input', only provide an object with parsed sources, ",
......
......@@ -164,7 +164,6 @@ extract_codings_by_coderId <- function(input,
stats::setNames(rawCodings, #list(res$codingsByCoder[[i]][[j]][[k]]),
i);
if (k %in% res$utterances) {
### If this uid already contains information, append the new info
if (j %in% names(res$utterances[[k]])) {
......@@ -185,9 +184,6 @@ extract_codings_by_coderId <- function(input,
}
}
return(res);
}
#' Merge source files by different coders
#'
#' This function takes sets of sources and merges them using the utterance
#' identifiers (UIDs) to match them.
#'
#' @inheritParams parse_sources
#' @param input The directory containing the input sources.
#' @param output The path to the directory where to store the merged sources.
#' This path will be created with a warning if it does not exist. An exception
#' is if "`same`" is specified - in that case, every file will be written to the
#' same directory it was read from.
#' @param outputPrefix,outputSuffix A pre- and/or suffix to add to the filename
#' when writing the merged sources (especially useful when writing them to the
#' same directory).
#' @param primarySourcesRegex A regular expression that specifies how to
#' recognize the primary sources (i.e. the files used as the basis, to which
#' the codes from other sources are added).
#' @param primarySourcesIgnoreRegex A regular expression that specifies which
#' files to ignore as primary files.
#' @param primarySourcesPath The path containing the primary sources.
#' @param coderId A regular expression specifying the coder identifier, specified
#' similarly to the codeRegexes.
#' @param idForOmittedCoderIds The identifier to use for utterances that do not
#' have a coder id (i.e. utterance that occur in a source that does not specify
#' a coder id, or above the line where a coder id is specified).
#' @param recursive,primarySourcesRecursive Whether to read files from
#' sub-directories (`TRUE`) or not.
#' @param filenameRegex Only files matching this regular expression are read.
#' @param overwrite Whether to overwrite existing files or not.
#' @param inheritSilence If not silent, whether to let functions called
#' by `merge_sources` inherit that setting.
#'
#' @return Invisibly, a list of the parsed, primary, and merged sources.
#' @export
#'
#' @examples
merge_sources <- function(input,
output,
outputPrefix = "",
......@@ -27,11 +62,12 @@ merge_sources <- function(input,
ignoreOddDelimiters=FALSE,
postponeDeductiveTreeBuilding = TRUE,
encoding="UTF-8",
silent=TRUE) {
silent=TRUE,
inheritSilence = FALSE) {
if (!dir.exists(primarySourcesPath)) {
stop("Directory specified to read primary sources from (",
primarySourcesPath, ")does not exist!");
primarySourcesPath, ") does not exist!");
}
### Store all arguments and delete the ones specific to this function
......@@ -45,11 +81,15 @@ merge_sources <- function(input,
'output',
'outputPrefix',
'outputSuffix',
'overwrite'))];
'overwrite',
'inheritSilence'))];
### Set 'silent' as function of both imperative for this function and the called functions
args$silent <- ifelse(inheritSilence, silent, TRUE);
if (!silent) {
cat0("\n\nStarting to extracting codings from sources in directory '",
input, "'.\n\n");
cat0("\n\nStarting to extract all codings from all sources in directory '",
input, "' that match regular expression '", filenameRegex, "'.");
}
### Then pass arguments along to extract_codings_by_coderId and store result
......@@ -60,6 +100,13 @@ merge_sources <- function(input,
allCodedUtterances <-
names(parsedSources$utterances);
if (!silent) {
cat0("\n\nProcessed codings for a total of ",
length(allCodedUtterances), " utterances, where the first six utterance identifiers are ",
vecTxtQ(utils::head(allCodedUtterances)), " and the last six utterance identifiers are ",
vecTxtQ(utils::tail(allCodedUtterances)), ".\n\n");
}
if (!silent) {
cat0("\n\nStarting to load primary sources in directory '",
primarySourcesPath, "'.\n\n");
......@@ -73,7 +120,11 @@ merge_sources <- function(input,
recursive=primarySourcesRecursive,
full.names=TRUE,
ignoreRegex=primarySourcesIgnoreRegex,
silent=silent);
silent=ifelse(inheritSilence, silent, TRUE));
if (!silent) {
cat0("\n\nLoaded ", length(primarySources), " primary sources.");
}
if (!(tolower(output) == "same")) {
if (!dir.exists(output)) {
......@@ -86,7 +137,7 @@ merge_sources <- function(input,
}
if (!silent) {
cat0("\n\nStarting to merge codes into primary sources.\n\n");
cat0("\n\nStarting to merge codes into primary sources.\n");
}
mergedSources <- list();
......@@ -94,43 +145,79 @@ merge_sources <- function(input,
for (i in names(primarySources)) {
if (!silent) {
cat0("\nStarting to process primary source '", i, "'.\n");
cat0("\n - Starting to process primary source '", basename(i), "'.");
}
### Check for matches with UID regex
uidRegexMatches <-
grepl(uidRegex,
primarySources[[i]],
perl=TRUE);
if (!silent) {
cat0("\n - Out of the ", length(uidRegexMatches), " lines in this source, ",
sum(uidRegexMatches), " match the uidRegex (i.e. contain an utterance identifier).");
}
### Extract the UIDs and store in a vector, where every element corresponds to a line in the source.
primarySourceUids[[i]] <-
ifelse(grepl(uidRegex,
primarySources[[i]],
perl=TRUE),
ifelse(uidRegexMatches,
gsub(paste0(".*", uidRegex, ".*"),
"\\1",
primarySources[[i]]),
"");
### Store primary source as basis for the merged source text
mergedSources[[i]] <- primarySources[[i]];
### This way, 'j' is both the index for the UID vector and
### for the corresponding line in the sources
nrOfMergedUtteranceCodings <- 0;
for (j in seq_along(primarySourceUids[[i]])) {
currentUID <- primarySourceUids[[i]][j];
if (getOption('rock.debug', FALSE)) {
cat0("\n - Processing UID '", currentUID, "'.");
}
if (currentUID %in% allCodedUtterances) {
nrOfMergedUtteranceCodings <-
nrOfMergedUtteranceCodings + 1;
### Check whether one of them is already applied
codings <-
unname(unlist(parsedSources$utterances[[currentUID]]));
if (getOption('rock.debug', FALSE)) {
cat0("\n - Found codings: ", vecTxtQ(codings), " in utterance:\n",
" '", mergedSources[[i]][j], "'");
}
alreadyAppliedCodings <-
unlist(lapply(codings,
grepl,
x = mergedSources[[i]][j]));
x = mergedSources[[i]][j],
fixed=TRUE));
if (getOption('rock.debug', FALSE)) {
cat0("\n - Already applied codings: ", vecTxtQ(codings[alreadyAppliedCodings]), ".");
}
### Add new codes
mergedSources[[i]][j] <-
paste0(mergedSources[[i]][j], " ",
paste0(codings[!alreadyAppliedCodings],
collapse = " "));
if (getOption('rock.debug', FALSE)) {
cat0("\n - Added codings: ", vecTxtQ(codings[!alreadyAppliedCodings]), ".");
}
}
}
if (!silent) {
cat0("\n - ", nrOfMergedUtteranceCodings, " utterances merged.");
}
newFilename <-
paste0(outputPrefix,
......
......@@ -36,22 +36,29 @@ dependency_url <-
pkgdown_url <-
paste0("https://r-packages.gitlab.io/", packagename);
```
# <img src='img/hex-logo.png' align="right" height="200" /> `r paste(packagename, "\U1F4E6")`
cran_url <-
paste0("https://cran.r-project.org/package=", packagename);
cranVersion_badge <-
paste0("https://www.r-pkg.org/badges/version/", packagename, "?color=brightgreen");
cranLastMonth_badge <-
paste0("https://cranlogs.r-pkg.org/badges/last-month/", packagename, "?color=brightgreen");
cranTotal_badge <-
paste0("https://cranlogs.r-pkg.org/badges/grand-total/", packagename, "?color=brightgreen");
## `r packageSubtitle`
<!-- badges: start -->
[![Pipeline status](`r gitLab_ci_badge`)](`r gitLab_ci_url`)
```
<!-- badges: start -->[![Pipeline status](`r gitLab_ci_badge`)](`r gitLab_ci_url`)
[![Coverage status](`r codecov_badge`)](`r codecov_url`)
[![Version on CRAN](`r cranVersion_badge`)](`r cran_url`)
[![Version on CRAN](`r cranLastMonth_badge`)](`r cran_url`)
[![Version on CRAN](`r cranTotal_badge`)](`r cran_url`)
<!-- [![Dependency status](`r dependency_badge`)](`r dependency_url`) -->
<!-- badges: end -->
# <img src='img/hex-logo.png' align="right" height="200" /> `r paste(packagename, "\U1F4E6")`
## `r packageSubtitle`
The pkgdown website for this project is located at `r pkgdown_url`.
<!--------------------------------------------->
......@@ -70,6 +77,7 @@ construct taxonomy (DCT).
See the [introductory vignette](https://r-packages.gitlab.io/rock/articles/introduction_to_rock.html) for an introduction.
There is a FLOSS interface for the ROCK, iROCK, available at https://r-packages.gitlab.io/rock/iROCK/
<!--------------------------------------------->
<!-- End of a custom bit for every package -->
<!--------------------------------------------->
......
<!-- README.md is generated from README.Rmd. Please edit that file -->
# <img src='img/hex-logo.png' align="right" height="200" /> rock 📦
## Reproducible Open Coding Kit
<!-- badges: start -->
[![Pipeline
status](https://gitlab.com/r-packages/rock/badges/master/pipeline.svg)](https://gitlab.com/r-packages/rock/commits/master)
[![Coverage
status](https://codecov.io/gl/r-packages/rock/branch/master/graph/badge.svg)](https://codecov.io/gl/r-packages/rock?branch=master)
[![Version on
CRAN](https://www.r-pkg.org/badges/version/rock?color=brightgreen)](https://cran.r-project.org/package=rock)
[![Version on
CRAN](https://cranlogs.r-pkg.org/badges/last-month/rock?color=brightgreen)](https://cran.r-project.org/package=rock)
[![Version on
CRAN](https://cranlogs.r-pkg.org/badges/grand-total/rock?color=brightgreen)](https://cran.r-project.org/package=rock)
<!-- [![Dependency status](https://tinyverse.netlify.com/badge/rock)](https://CRAN.R-project.org/package=rock) -->
<!-- badges: end -->
# <img src='img/hex-logo.png' align="right" height="200" /> rock 📦
## Reproducible Open Coding Kit
The pkgdown website for this project is located at
<https://r-packages.gitlab.io/rock>.
......@@ -41,8 +44,11 @@ for an introduction.
There is a FLOSS interface for the ROCK, iROCK, available at
<https://r-packages.gitlab.io/rock/iROCK/>
<!--------------------------------------------->
<!-- End of a custom bit for every package -->
<!--------------------------------------------->
## Installation
......
add_html_tags <- function(x,
codeRegexes = c(codes = "\\[\\[([a-zA-Z0-9._>-]+)\\]\\]"),
idRegexes = c(caseId = "\\[\\[cid=([a-zA-Z0-9._-]+)\\]\\]",
stanzaId = "\\[\\[sid=([a-zA-Z0-9._-]+)\\]\\]"),
sectionRegexes = c(paragraphs = "---paragraph-break---",
secondary = "---<[a-zA-Z0-9]?>---"),
uidRegex = "\\[\\[uid=([a-zA-Z0-9._-]+)\\]\\]",
inductiveCodingHierarchyMarker = ">",
codeClass = "code",
idClass = "identifier",
sectionClass = "sectionBreak",
uidClass = "uid") {
res <- x;
### Add html tags
for (currentCodeRegexName in names(codeRegexes)) {
currentCodeRegex <- codeRegexes[currentCodeRegexName];
codeContentMatches <- grepl(currentCodeRegex, res);
codeContent <-
ifelse(codeContentMatches,
gsub(paste0(".*", currentCodeRegex, ".*"),
"\\1",
res),
"");
splitCodeContent <-
unlist(lapply(strsplit(codeContent,
inductiveCodingHierarchyMarker),
paste0,
collapse=" "));
splitCodeContent <-
paste0('<span class="', codeClass,
' ', currentCodeRegexName,
' ', splitCodeContent, '">');
res <- gsub(paste0("(", currentCodeRegex, ")"),
paste0('\\1</span>'),
res);
res <- paste0(splitCodeContent,
res);
}
return(res);
}
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -23,3 +23,6 @@
box-sizing: border-box;
}
.utterance {
display: block;
}
......@@ -14,3 +14,7 @@
background-color: #eeeeee;
color: #bbbbbb;
}
.utterance {
border-bottom: 0.5px dotted black;
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/add_html_tags.R
\name{add_html_tags}
\alias{add_html_tags}
\title{Add HTML tags to a source}
\usage{
add_html_tags(x, codeRegexes = c(codes =
"\\\\[\\\\[([a-zA-Z0-9._>-]+)\\\\]\\\\]"), idRegexes = c(caseId =
"\\\\[\\\\[cid[=:]([a-zA-Z0-9._-]+)\\\\]\\\\]", stanzaId =
"\\\\[\\\\[sid[=:]([a-zA-Z0-9._-]+)\\\\]\\\\]"),
sectionRegexes = c(paragraphs = "---paragraph-break---", secondary =
"---<[a-zA-Z0-9]?>---"),
uidRegex = "\\\\[\\\\[uid[=:]([a-zA-Z0-9._-]+)\\\\]\\\\]",
inductiveCodingHierarchyMarker = ">", codeClass = "code",
idClass = "identifier", sectionClass = "sectionBreak",
uidClass = "uid", utteranceClass = "utterance")
}
\arguments{
\item{x}{A character vector with the source}
\item{codeClass}{The classes to use for, respectively, codes,
identifiers (such as case identifiers or coder identifiers), section
breaks, utterance identifiers, and full utterances. All \code{<span>} elements
except for the full utterances, which are placed in \code{<div>} elements.}
}
\value{
The character vector witht he replacements made.
}
\description{
This function adds HTML tags to a source to allow pretty printing/viewing.
}
......@@ -30,7 +30,7 @@ brackets).}
\item{indices}{A logical vector of the same length as \code{input}
that indicates to which utterance the code in \code{codes} should be
applied. Note that if \code{indiced} is provided, only the first
applied. Note that if \code{indices} is provided, only the first
element of \code{codes} is used, and its name is ignored.}
\item{codeDelimiters}{A character vector of two elements
......@@ -78,7 +78,7 @@ codedExample <- code_source(loadedExample,
### Show line 71
cat(codedExample[71]);
### Also add code "foo" to utteranced with code 'ipsum'
### Also add code "foo" to utterances with code 'ipsum'
moreCodedExample <- code_source(codedExample,
c("[[ipsum]]" = "foo"));
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/export_to_html.R
\name{export_to_html}
\alias{export_to_html}
\title{Export parsed sources to HTML}
\usage{
export_to_html(input, output = NULL, template = "default",
preventOverwriting = TRUE, encoding = "UTF-8", silent = TRUE)
}
\arguments{
\item{input}{An object of class \code{rockParsedSource} (as resulting from a call
to \code{\link[rock:parse_source]{rock::parse_source()}}) or of class \code{rockParsedSources} (as resulting from a call
to \code{\link[rock:parse_sources]{rock::parse_sources()}}).}
\item{output}{Either NULL to not write any files, or, if \code{input} is a single
\code{rockParsedSource}, the filename to write to, and if \code{input} is a \code{rockParsedSources}
object, the path to write to. This path will be created with a warning
if it does not exist.}
\item{preventOverwriting}{Whether to prevent overwriting of output files.}
\item{encoding}{The encoding to use when writing the exported source(s).}
\item{silent}{Whether to suppress messages.}
}
\value{
A list of character vectors.
}
\description{
These function can be used to convert one or more parsed sources to HTML.
}
\examples{
### Get path to example source
examplePath <-
system.file("extdata", package="rock");
### Parse all example sources in that directory
parsedExamples <- rock::parse_sources(examplePath);
### Export results to a temporary directory
tmpDir <- tempdir(check = TRUE);
prettySources <-
export_to_html(input = parsedExamples);
### Show first one
print(prettySources[[1]]);
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/merge_sources.R
\name{merge_sources}
\alias{merge_sources}
\title{Merge source files by different coders}
\usage{
merge_sources(input, output, outputPrefix = "",
outputSuffix = "_merged", primarySourcesRegex = ".*",
primarySourcesIgnoreRegex = outputSuffix, primarySourcesPath = input,
coderId = "\\\\[\\\\[coderId=([a-zA-Z0-9._-]+)\\\\]\\\\]",
idForOmittedCoderIds = "noCoderId", codeRegexes = c(codes =
"\\\\[\\\\[([a-zA-Z0-9._>-]+)\\\\]\\\\]"), idRegexes = c(caseId =
"\\\\[\\\\[cid=([a-zA-Z0-9._-]+)\\\\]\\\\]", stanzaId =
"\\\\[\\\\[sid=([a-zA-Z0-9._-]+)\\\\]\\\\]", coderId =
"\\\\[\\\\[coderId=([a-zA-Z0-9._-]+)\\\\]\\\\]"),
sectionRegexes = c(paragraphs = "---paragraph-break---", secondary =
"---<[a-zA-Z0-9]?>---"),
uidRegex = "\\\\[\\\\[uid=([a-zA-Z0-9._-]+)\\\\]\\\\]",
autoGenerateIds = c("stanzaId"), persistentIds = c("caseId",
"coderId"), noCodes = "^uid:|^uid=|^dct:|^ci:", recursive = TRUE,
primarySourcesRecursive = recursive, filenameRegex = ".*",
delimiterRegEx = "^---$", ignoreRegex = "^#", overwrite = FALSE,
ignoreOddDelimiters = FALSE, postponeDeductiveTreeBuilding = TRUE,
encoding = "UTF-8", silent = TRUE, inheritSilence = FALSE)
}
\arguments{
\item{input}{The directory containing the input sources.}
\item{output}{The path to the directory where to store the merged sources.
This path will be created with a warning if it does not exist. An exception
is if "\code{same}" is specified - in that case, every file will be written to the
same directory it was read from.}
\item{outputPrefix, outputSuffix}{A pre- and/or suffix to add to the filename
when writing the merged sources (especially useful when writing them to the
same directory).}
\item{primarySourcesRegex}{A regular expression that specifies how to
recognize the primary sources (i.e. the files used as the basis, to which
the codes from other sources are added).}
\item{primarySourcesIgnoreRegex}{A regular expression that specifies which
files to ignore as primary files.}
\item{primarySourcesPath}{The path containing the primary sources.}
\item{coderId}{A regular expression specifying the coder identifier, specified
similarly to the codeRegexes.}
\item{idForOmittedCoderIds}{The identifier to use for utterances that do not
have a coder id (i.e. utterance that occur in a source that does not specify
a coder id, or above the line where a coder id is specified).}
\item{codeRegexes}{These are named character vectors with one
or more regular expressions. For \code{codeRegexes}, these specify how to extract the codes
(that were used to code the sources). For \code{idRegexes}, these specify how to extract the
different types of identifiers. For \code{sectionRegexes}, these specify how to extract the
different types of sections. The \code{codeRegexes} and \code{idRegexes} must each contain one
capturing group to capture the codes and identifiers, respectively.}
\item{idRegexes}{These are named character vectors with one
or more regular expressions. For \code{codeRegexes}, these specify how to extract the codes
(that were used to code the sources). For \code{idRegexes}, these specify how to extract the
different types of identifiers. For \code{sectionRegexes}, these specify how to extract the
different types of sections. The \code{codeRegexes} and \code{idRegexes} must each contain one
capturing group to capture the codes and identifiers, respectively.}
\item{sectionRegexes}{These are named character vectors with one
or more regular expressions. For \code{codeRegexes}, these specify how to extract the codes
(that were used to code the sources). For \code{idRegexes}, these specify how to extract the
different types of identifiers. For \code{sectionRegexes}, these specify how to extract the
different types of sections. The \code{codeRegexes} and \code{idRegexes} must each contain one
capturing group to capture the codes and identifiers, respectively.}
\item{autoGenerateIds}{The names of the \code{idRegexes} that, if missing, should receive
autogenerated identifiers (which consist of 'autogenerated_' followed by an incrementing
number).}
\item{persistentIds}{The names of the \code{idRegexes} for the identifiers which, once
attached to an utterance, should be attached to all following utterances as well (until
a new identifier with the same name is encountered, after which that identifier will be
attached to all following utterances, etc).}
\item{noCodes}{This regular expression is matched with all codes after they have been
extracted using the \code{codeRegexes} regular expression (i.e. they're matched against the
codes themselves without, for example, the square brackets in the default code regex). Any
codes matching this \code{noCodes} regular expression will be \strong{ignored}, i.e., removed from the
list of codes.}
\item{recursive, primarySourcesRecursive}{Whether to read files from
sub-directories (\code{TRUE}) or not.}
\item{filenameRegex}{Only files matching this regular expression are read.}
\item{delimiterRegEx}{The regular expression that is used to extract the YAML fragments.}
\item{ignoreRegex}{The regular expression that is used to delete lines before any other
processing. This can be used to enable adding comments to sources, which are then ignored
during analysis.}
\item{overwrite}{Whether to overwrite existing files or not.}
\item{ignoreOddDelimiters}{If an odd number of YAML delimiters is encountered, whether this
should result in an error (\code{FALSE}) or just be silently ignored (\code{TRUE}).}
\item{postponeDeductiveTreeBuilding}{Whether to imediately try to build the deductive
tree(s) based on the information in this file (\code{FALSE}) or whether to skip that. Skipping
this is useful if the full tree information is distributed over multiple files (in which case
you should probably call \code{parse_sources} instead of \code{parse_source}).}
\item{encoding}{The encoding of the file to read (in \code{file}).}
\item{silent}{Whether to provide (\code{FALSE}) or suppress (\code{TRUE}) more detailed progress updates.}
\item{inheritSilence}{If not silent, whether to let functions called
by \code{merge_sources} inherit that setting.}
}
\value{
Invisibly, a list of the parsed, primary, and merged sources.
}
\description{
This function takes sets of sources and merges them using the utterance
identifiers (UIDs) to match them.
}
......@@ -73,7 +73,7 @@
<h1>Introduction to the Reproducible Open Coding Kit (ROCK)</h1>
<h4 class="author">Gjalt-Jorn Ygram Peters &amp; Szilvia Zörgő</h4>
<h4 class="date">2019-09-13</h4>
<h4 class="date">2019-11-07</h4>