-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathParser.php
More file actions
47 lines (44 loc) · 1.24 KB
/
Parser.php
File metadata and controls
47 lines (44 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
<?php
namespace XXXcrawler;
/**
* 页面内容提取
*/
class Parser implements iParser {
public function find($fields, $html) {
$re = [];
$dom = new \DOMDocument();
@$dom -> loadHTML($html);
$dom -> normalize();
$xpath = new \DOMXPath($dom);
foreach($fields as $k => $v) {
list($type, $path) = explode(":", $v);
switch(strtolower(trim($type))) {
case 'xpath':
$r = $this -> _getByXpath($path, $xpath);
if($r) $re[$k] = $r;
break;
}
}
return $re;
}
// 用xpath获取
private function _getByXpath($path, $xpath) {
$re = [];
$nodes = $xpath -> query($path);
// foreach ($nodes as $val) {
// $item = $val -> childNodes;
// foreach ($item as $v) {
// $re[] = trim($v -> textContent);
// }
// }
if($nodes -> length == 1) {
$re = $nodes -> item(0) -> nodeValue;
} else {
for ($i = 0; $i < $nodes -> length; $i++) {
$item = $nodes -> item($i);
$re[] = $item -> nodeValue;
}
}
return $re;
}
}