diff --git a/src/Document.php b/src/Document.php index 6db07663..65c299a1 100644 --- a/src/Document.php +++ b/src/Document.php @@ -28,11 +28,11 @@ public function __construct(Extractor $extractor) $encoding = null; $contentType = $extractor->getResponse()->getHeaderLine('content-type'); - preg_match('/charset="?(.*?)(?=$|\s|;|")/i', $contentType, $match); + preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $contentType, $match); if (!empty($match[1])) { $encoding = trim($match[1], ','); } elseif (!empty($html)) { - preg_match('/charset="?(.*?)(?=$|\s|;|")/i', $html, $match); + preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $html, $match); if (!empty($match[1])) { $encoding = trim($match[1], ','); }