const fs = require('fs');
const pdf = require('pdf-parse');
let dataBuffer = fs.readFileSync('path to PDF file...');
pdf(dataBuffer).then(function(data) {
	// number of pages
	console.log(data.numpages);
	// number of rendered pages
	console.log(data.numrender);
	// PDF info
	console.log(data.info);
	// PDF metadata
	console.log(data.metadata); 
	// PDF.js version
	// check https://mozilla.github.io/pdf.js/getting_started/
	console.log(data.version);
	// PDF text
	console.log(data.text); 
        
});const fs = require('fs');
const pdf = require('pdf-parse');
let dataBuffer = fs.readFileSync('path to PDF file...');
pdf(dataBuffer).then(function(data) {
	// use data
})
.catch(function(error){
	// handle exceptions
})- v1.0.9 and above break pagerender callback changelog
- If you need another format like json, you can change page render behaviour with a callback
- Check out https://mozilla.github.io/pdf.js/
// default render callback
function render_page(pageData) {
    //check documents https://mozilla.github.io/pdf.js/
    let render_options = {
        //replaces all occurrences of whitespace with standard spaces (0x20). The default value is `false`.
        normalizeWhitespace: false,
        //do not attempt to combine same line TextItem's. The default value is `false`.
        disableCombineTextItems: false
    }
    return pageData.getTextContent(render_options)
	.then(function(textContent) {
		let lastY, text = '';
		for (let item of textContent.items) {
			if (lastY == item.transform[5] || !lastY){
				text += item.str;
			}  
			else{
				text += '\n' + item.str;
			}    
			lastY = item.transform[5];
		}
		return text;
	});
}
let options = {
    pagerender: render_page
}
let dataBuffer = fs.readFileSync('path to PDF file...');
pdf(dataBuffer,options).then(function(data) {
	//use new format
});const DEFAULT_OPTIONS = {
	// internal page parser callback
	// you can set this option, if you need another format except raw text
	pagerender: render_page,
	
	// max page number to parse
	max: 0,
	
	//check https://mozilla.github.io/pdf.js/getting_started/
	version: 'v1.10.100'
}If you need another format except raw text.
Max number of page to parse. If the value is less than or equal to 0, parser renders all pages.
check pdf.js
- 'default'
- 'v1.9.426'
- 'v1.10.100'
- 'v1.10.88'
- 'v2.0.550'
default version is v1.10.100
mozilla.github.io/pdf.js
- mochaor- npm test
- Check test folder and quickstart.js for extra usages.
MIT licensed and all it's dependencies are MIT or BSD licensed.