...
 
Commits (3)
......@@ -6,7 +6,7 @@ Description: R bindings of the Google Compact Encoding Detection library
a source buffer of raw text bytes and probabilistically determines the
most likely encoding for that text. It was designed with accuracy,
robustness, small size, and speed in mind.
Version: 0.1.0
Version: 1.0.0
Authors@R: c(
person(given = "Artem",
family = "Klevtsov",
......
# ced 0.1.0
# ced 1.0.0
- Inital release
......@@ -31,7 +31,6 @@ ced_enc_detect(ex_txt[["Korean"]])
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "uhc"))
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "iso-2022-kr"))
# japanese text
print(ex_txt[["Japanese"]])
ced_enc_detect(ex_txt[["Japanese"]])
......@@ -39,8 +38,8 @@ ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "shift_jis"))
ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "iso-2022-jp"))
# detect encoding of the web pages content
\donttest{
# detect encoding of the web pages content
if (require("curl")) {
fetch_url <- function(u) curl_fetch_memory(u)$content
ced_enc_detect(fetch_url("https://www.corriere.it"))
......@@ -50,4 +49,3 @@ if (require("curl")) {
ced_enc_detect(fetch_url("https://www.incruit.com/"))
}
}
......@@ -53,7 +53,6 @@ ced_enc_detect(ex_txt[["Korean"]])
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "uhc"))
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "iso-2022-kr"))
# japanese text
print(ex_txt[["Japanese"]])
ced_enc_detect(ex_txt[["Japanese"]])
......@@ -61,8 +60,8 @@ ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "shift_jis"))
ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "iso-2022-jp"))
# detect encoding of the web pages content
\donttest{
# detect encoding of the web pages content
if (require("curl")) {
fetch_url <- function(u) curl_fetch_memory(u)$content
ced_enc_detect(fetch_url("https://www.corriere.it"))
......@@ -72,5 +71,4 @@ if (require("curl")) {
ced_enc_detect(fetch_url("https://www.incruit.com/"))
}
}
}
CED_DIR = ./compact_enc_det
CED_CC = compact_enc_det/compact_enc_det.cc compact_enc_det/compact_enc_det_hint_code.cc util/encodings/encodings.cc util/languages/languages.cc
CED_SRC = $(addprefix $(CED_DIR)/,$(CED_CC))
CED_OBJ = $(CED_SRC:.cc=.o)
CED_FLAGS = $(CXX11STD) $(CXXFLAGS) $(CXXPICFLAGS) -I$(CED_DIR) -Wno-narrowing
CED_LIB = $(CED_DIR)/libced.a
CXX_STD = CXX11
PKG_CPPFLAGS = -I$(CED_DIR)
PKG_LIBS = $(CED_LIB) -pthread
$(SHLIB): $(CED_LIB)
$(CED_OBJ): %.o : %.cc
$(CXX) $(CED_FLAGS) -c $< -o $@
$(CED_LIB): $(CED_OBJ)
$(AR) cr $(CED_LIB) $(CED_OBJ)
$(RANLIB) $(CED_LIB)
clean:
rm -f $(SHLIB) $(OBJECTS)
rm -f $(CED_OBJ) $(CED_LIB)