Adding horizontal space for raw text.
Hello, thanks for this tool.
I found that some text in the same line, but spaced between them is parsed as a single continuous text
This code might help with it:
return pageData.getTextContent(render_options)
.then(function(textContent) {
let last, text = '';
//https://github.com/mozilla/pdf.js/issues/8963
//https://github.com/mozilla/pdf.js/issues/2140
//https://gist.github.com/hubgit/600ec0c224481e910d2a0f883a7b98e3
//https://gist.github.com/hubgit/600ec0c224481e910d2a0f883a7b98e3
for (let item of textContent.items) {
//console.log(item)
debugger;
if (!last || last.transform[5] == item.transform[5]){
if(!last || last.transform[4] + last.width - item.transform[4] > -10){
text += item.str;
}
else{
text += " " + item.str;
}
}
else{
text += '\n' + item.str;
}
last = item;
}
//let strings = textContent.items.map(item => item.str);
//let text = strings.join("\n");
//text = text.replace(/[ ]+/ig," ");
//ret.text = `${ret.text} ${text} \n\n`;
return text;
});
I'm sorry for not do a merge request, but I don´t known gitlab that well yet.