123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
<?php
if (!defined('GETID3_INCLUDEPATH')) {
exit;
}
class getid3_pdf extends getid3_handler
{
public $returnXREF = false;
public function Analyze() {
$info = &$this->getid3->info;
$this->fseek(0);
if (preg_match('#^%PDF-([0-9\\.]+)$#', rtrim($this->fgets()), $matches)) {
$info['pdf']['header']['version'] = floatval($matches[1]);
$info['fileformat'] = 'pdf';
$this->fseek(-40, SEEK_END);
if (preg_match('#[\r\n]startxref[ \r\n]+([0-9]+)[ \r\n]+#', $this->fread(40), $matches)) {
$info['pdf']['trailer']['startxref'] = intval($matches[1]);
$this->parseXREF($info['pdf']['trailer']['startxref']);
if (!empty($info['pdf']['xref']['offset'])) {
while (!$this->feof() && (max(array_keys($info['pdf']['xref']['offset'])) > $info['pdf']['xref']['count'])) {
$this->fseek(0);
while (!$this->feof()) {
$XREFoffset = $this->ftell();
if (rtrim($this->fgets()) == 'xref') {
if (empty($info['pdf']['xref']['xref_offsets']) || !in_array($XREFoffset, $info['pdf']['xref']['xref_offsets'])) {
$this->parseXREF($XREFoffset);
break;
}
}
}
}
asort($info['pdf']['xref']['offset']);
$maxObjLengths = array();
$prevOffset = 0;
$prevObjNum = 0;
foreach ($info['pdf']['xref']['offset'] as $objectNumber => $offset) {
if ($prevObjNum) {
$maxObjLengths[$prevObjNum] = $offset - $prevOffset;
}
$prevOffset = $offset;
$prevObjNum = $objectNumber;
}
ksort($maxObjLengths);
foreach ($info['pdf']['xref']['offset'] as $objectNumber => $offset) {
if ($info['pdf']['xref']['entry'][$objectNumber] == 'f') {
continue;
}
if (!empty($maxObjLengths[$objectNumber]) && ($maxObjLengths[$objectNumber] < $this->getid3->option_fread_buffer_size)) {
$this->fseek($offset);
$objBlob = $this->fread($maxObjLengths[$objectNumber]);
if (preg_match('#^'.$objectNumber.'[\\x00 \\r\\n\\t]*([0-9]+)[\\x00 \\r\\n\\t]*obj[\\x00 \\r\\n\\t]*(.*)(endobj)?[\\x00 \\r\\n\\t]*$#s', $objBlob, $matches)) {
list($dummy, $generation, $objectData) = $matches;
if (preg_match('#^<<[\r\n\s]*(/Type|/Pages|/Parent [0-9]+ [0-9]+ [A-Z]|/Count [0-9]+|/Kids *\\[[0-9A-Z ]+\\]|[\r\n\s])+[\r\n\s]*>>#', $objectData, $matches)) {
if (preg_match('#/Count ([0-9]+)#', $objectData, $matches)) {
$info['pdf']['pages'] = (int) $matches[1];
break;
}
}
} else {
$this->error('Unexpected structure "'.substr($objBlob, 0, 100).'" at offset '.$offset);
break;
}
}
}
if (!$this->returnXREF) {
unset($info['pdf']['xref']['offset'], $info['pdf']['xref']['generation'], $info['pdf']['xref']['entry'], $info['pdf']['xref']['xref_offsets']);
}
} else {
$this->error('Did not find "xref" at offset '.$info['pdf']['trailer']['startxref']);
}
} else {
$this->error('Did not find "startxref" in the last 40 bytes of the PDF');
}
$this->warning('PDF parsing incomplete in this version of getID3() ['.$this->getid3->version().']');
return true;
}
$this->error('Did not find "%PDF" at the beginning of the PDF');
return false;
}
private function parseXREF($XREFoffset) {
$info = &$this->getid3->info;
$this->fseek($XREFoffset);
if (rtrim($this->fgets()) == 'xref') {
$info['pdf']['xref']['xref_offsets'][$XREFoffset] = $XREFoffset;
list($firstObjectNumber, $XREFcount) = explode(' ', rtrim($this->fgets()));
$firstObjectNumber = (int) $firstObjectNumber;
$XREFcount = (int) $XREFcount;
$info['pdf']['xref']['count'] = $XREFcount + (!empty($info['pdf']['xref']['count']) ? $info['pdf']['xref']['count'] : 0);
for ($i = 0; $i < $XREFcount; $i++) {
$line = rtrim($this->fgets());
if (preg_match('#^([0-9]+) ([0-9]+) ([nf])$#', $line, $matches)) {
$info['pdf']['xref']['offset'][($firstObjectNumber + $i)] = (int) $matches[1];
$info['pdf']['xref']['generation'][($firstObjectNumber + $i)] = (int) $matches[2];
$info['pdf']['xref']['entry'][($firstObjectNumber + $i)] = $matches[3];
} else {
$this->error('failed to parse XREF entry #'.$i.' in XREF table at offset '.$XREFoffset);
return false;
}
}
sort($info['pdf']['xref']['xref_offsets']);
return true;
}
$this->warning('failed to find expected XREF structure at offset '.$XREFoffset);
return false;
}
}