Skip to content

Adding horizontal space for raw text.

Hello, thanks for this tool.

I found that some text in the same line, but spaced between them is parsed as a single continuous text

This code might help with it:

return pageData.getTextContent(render_options)
        .then(function(textContent) {
            let last, text = '';
            //https://github.com/mozilla/pdf.js/issues/8963
            //https://github.com/mozilla/pdf.js/issues/2140
            //https://gist.github.com/hubgit/600ec0c224481e910d2a0f883a7b98e3
            //https://gist.github.com/hubgit/600ec0c224481e910d2a0f883a7b98e3
            for (let item of textContent.items) {
                //console.log(item)
                debugger;
                if (!last || last.transform[5] == item.transform[5]){
                  if(!last || last.transform[4] + last.width - item.transform[4] > -10){
                    text += item.str;
                  }
                  else{
                    text += " " + item.str;
                  }
                }
                else{
                    text += '\n' + item.str;
                }
                last = item;
            }
            //let strings = textContent.items.map(item => item.str);
            //let text = strings.join("\n");
            //text = text.replace(/[ ]+/ig," ");
            //ret.text = `${ret.text} ${text} \n\n`;
            return text;
        });

I'm sorry for not do a merge request, but I don´t known gitlab that well yet.