...
 
Commits (5)
R/zzz.R
src/compact_enc_det
ci
# ced 0.1.0
- Inital release
......@@ -3,6 +3,11 @@ expect_true(is.numeric_version(ced_version()))
expect_true(ced_version() >= "2.2")
# test empty input
expect_error(ced_enc_detect(NA_integer_))
expect_error(ced_enc_detect(character(3), character(2), NULL))
expect_silent(ced_enc_detect(character(3), character(1), NULL))
expect_error(ced_enc_detect(character(3), NULL, character(2)))
expect_silent(ced_enc_detect(character(3), NULL, character(1)))
expect_identical(ced_enc_detect(NULL), character(0))
expect_identical(ced_enc_detect(raw()), character(0))
expect_identical(ced_enc_detect(character()), character(0))
......@@ -10,11 +15,16 @@ expect_identical(ced_enc_detect(character(1)), NA_character_)
expect_identical(ced_enc_detect(NA_character_), NA_character_)
# test ASCII encoding
expect_identical(ced_enc_detect(letters), rep("US-ASCII", length(letters)))
expect_identical(ced_enc_detect("Hello"), "US-ASCII")
expect_identical(ced_enc_detect(c("Hello", "World")), c("US-ASCII", "US-ASCII"))
# test preserve names
expect_identical(ced_enc_detect(c(a = "test")), c(a = "US-ASCII"))
# test hints
expect_identical(ced_enc_detect("Hello", "ASCII", "EN"), "US-ASCII")
expect_identical(ced_enc_detect(c("Hello", "\u041f\u0440\u0438\u0432\u0435\u0442"), c("ASCII", "UTF-8"), c("EN", "RU")), c("US-ASCII", "UTF-8"))
test_file <- system.file("test.txt", package = "ced")
test_txt <- read.dcf(test_file, all = TRUE)
......@@ -34,3 +44,9 @@ expect_identical(ced_enc_detect(test_txt[["Arabic(3)"]]), "UTF-8")
# test non UTF-8 enodings
expect_identical(ced_enc_detect(iconv(test_txt[["Russian"]], "UTF-8", "WINDOWS-1251")), "windows-1251")
expect_identical(ced_enc_detect(iconv(test_txt[["Russian"]], "UTF-8", "IBM866")), "IBM866")
# test raw input
expect_identical(ced_enc_detect(charToRaw("Hello")), "US-ASCII")
expect_identical(ced_enc_detect(charToRaw("\u041f\u0440\u0438\u0432\u0435\u0442")), "UTF-8")
expect_identical(ced_enc_detect(charToRaw("Hello"), "ASCII", "EN"), "US-ASCII")
expect_identical(ced_enc_detect(charToRaw("\u041f\u0440\u0438\u0432\u0435\u0442"), "UTF-8", "RU"), "UTF-8")
......@@ -2,6 +2,7 @@ CED_DIR = ./compact_enc_det
CED_CC = compact_enc_det/compact_enc_det.cc compact_enc_det/compact_enc_det_hint_code.cc util/encodings/encodings.cc util/languages/languages.cc
CED_SRC = $(addprefix $(CED_DIR)/,$(CED_CC))
CED_OBJ = $(CED_SRC:.cc=.o)
CED_FLAGS = $(CXX11STD) $(CXXFLAGS) $(CXXPICFLAGS) -I$(CED_DIR) -Wno-narrowing
CED_LIB = $(CED_DIR)/libced.a
CXX_STD = CXX11
......@@ -12,7 +13,7 @@ PKG_LIBS = $(CED_LIB) -pthread
$(SHLIB): $(CED_LIB)
$(CED_OBJ): %.o : %.cc
$(CXX) $(CXXFLAGS) $(CXXPICFLAGS) -I$(CED_DIR) -Wno-narrowing -c $< -o $@
$(CXX) $(CED_FLAGS) -c $< -o $@
$(CED_LIB): $(CED_OBJ)
$(AR) cr $(CED_LIB) $(CED_OBJ)
......
CED_DIR = ./compact_enc_det
CED_CC = compact_enc_det/compact_enc_det.cc compact_enc_det/compact_enc_det_hint_code.cc util/encodings/encodings.cc util/languages/languages.cc
CED_SRC = $(addprefix $(CED_DIR)/,$(CED_CC))
CED_OBJ = $(CED_SRC:.cc=.o)
CED_FLAGS = $(CXX11STD) $(CXXFLAGS) $(CXXPICFLAGS) -I$(CED_DIR) -Wno-narrowing
CED_LIB = $(CED_DIR)/libced.a
CXX_STD = CXX11
PKG_CPPFLAGS = -I$(CED_DIR)
PKG_LIBS = $(CED_LIB) -pthread
$(SHLIB): $(CED_LIB)
$(CED_OBJ): %.o : %.cc
$(CXX) $(CED_FLAGS) -c $< -o $@
$(CED_LIB): $(CED_OBJ)
$(AR) cr $(CED_LIB) $(CED_OBJ)
$(RANLIB) $(CED_LIB)
clean:
rm -f $(SHLIB) $(OBJECTS)
rm -f $(CED_OBJ) $(CED_LIB)
......@@ -23,7 +23,7 @@ Rcpp::String detect(const char* x, size_t n, const Encoding& enc, const Language
// if (!is_reliable) {
// return NA_STRING;
// }
return Rcpp::wrap(res);
return Rcpp::wrap(MimeEncodingName(res));
}
//' @title
......
#include "wrap.h"
namespace Rcpp {
template <>
SEXP wrap(const ::Encoding& x) {
return wrap(MimeEncodingName(x));
}
template <>
SEXP wrap(const std::vector<::Encoding>& x) {
Rcpp::StringVector res(x.size());
std::transform(x.begin(), x.end(), res.begin(), MimeEncodingName);
return res;
}
template <>
SEXP wrap(const std::vector<::Language>& x) {
StringVector res(x.size());
std::transform(x.begin(), x.end(), res.begin(), LanguageName);
return res;
}
template <>
::Encoding as(SEXP x) {
if (Rf_isNull(x)) {
......
......@@ -3,12 +3,6 @@
namespace Rcpp {
template <>
SEXP wrap(const ::Encoding&);
template <>
SEXP wrap(const std::vector<::Encoding>&);
template <>
SEXP wrap(const std::vector<::Language>&);
template <>
::Encoding as(SEXP);
template <>
::Language as(SEXP);
......