You can do this on the client side using this library
https://mozilla.github.io/pdf.js/getting_started/
I made an example based on this tutorial
https://ourcodeworld.com/articles/read/405/how-to-convert-pdf-to-text-extract-text-from-pdf-with-javascript
I installed the two files from the library in the same folder as HTML and read the selected PDF in the input file. The result appears in the browser console, it creates an array, where each item is the text of a page
<!DOCTYPE html>
<html lang="en">
<head>
<title></title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="pdf.js"></script>
</head>
<body>
<h1>PDF.js</h1>
<input id="inputFile" type="file" onchange="convertToBase64();" />
<script>
pdfjsLib.workerSrc = 'pdf.worker.js';
function convertToBase64() {
var selectedFile = document.getElementById("inputFile").files;
if (selectedFile.length > 0) {
var fileToLoad = selectedFile[0];
var fileReader = new FileReader();
var base64;
fileReader.onload = function(fileLoadedEvent) {
base64 = fileLoadedEvent.target.result;
lerPDF(base64);
};
fileReader.readAsDataURL(fileToLoad);
}
}
function lerPDF(fileToLoad) {
pdfjsLib.getDocument(fileToLoad).then(function (pdf) {
var pdfDocument = pdf;
var pagesPromises = [];
for (var i = 0; i < pdf.numPages; i++) {
// Required to prevent that i is always the total of pages
(function (pageNumber) {
pagesPromises.push(getPageText(pageNumber, pdfDocument));
})(i + 1);
}
Promise.all(pagesPromises).then(function (pagesText) {
// Display text of all the pages in the console
console.log(pagesText);
});
}, function (reason) {
// PDF loading error
console.error(reason);
});
}
/**
* Retrieves the text of a specif page within a PDF Document obtained through pdf.js
*
* @param {Integer} pageNum Specifies the number of the page
* @param {PDFDocument} PDFDocumentInstance The PDF document obtained
**/
function getPageText(pageNum, PDFDocumentInstance) {
// Return a Promise that is solved once the text of the page is retrieven
return new Promise(function (resolve, reject) {
PDFDocumentInstance.getPage(pageNum).then(function (pdfPage) {
// The main trick to obtain the text of the PDF page, use the getTextContent method
pdfPage.getTextContent().then(function (textContent) {
var textItems = textContent.items;
var finalString = "";
// Concatenate the string of the item to the final string
for (var i = 0; i < textItems.length; i++) {
var item = textItems[i];
finalString += item.str + " ";
}
// Solve promise with the text retrieven from the page
resolve(finalString);
});
});
});
}
</script>
</body>
</html>
Does this Answer your Question? Read content from a PDF in Javascript
– tvdias