Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix undef boolean attributes #36

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
tags-ignore:
- '*'
pull_request:
workflow_dispatch:
jobs:
perl:
runs-on: ubuntu-latest
Expand All @@ -14,18 +15,19 @@ jobs:
matrix:
perl-version:
- '5.38'
# - '5.34'
# - '5.32'
# - '5.30'
# - '5.28'
# - '5.26'
# - '5.24'
# - '5.22'
# - '5.20'
- '5.36'
- '5.34'
- '5.32'
- '5.30'
- '5.28'
- '5.26'
- '5.24'
- '5.22'
- '5.20'
- '5.18'
# - '5.16'
# - '5.14'
# - '5.12'
- '5.16'
- '5.14'
- '5.12'
- '5.10'
container:
image: perl:${{ matrix.perl-version }}
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ Makefile
MYMETA.*
.build/
HTML-Parser-*/

local/
71 changes: 64 additions & 7 deletions hparser.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,40 @@ static const char * const argname[] = {
/* ARG_FLAG_FLAT_ARRAY */
};

/* https://meiert.com/en/blog/boolean-attributes-of-html/ */
const static struct boolean_attribute {
int len;
const char* str;
}
boolean_attributes[] = {
{15, "allowfullscreen"},
{19, "allowpaymentrequest"},
{5, "async"},
{9, "autofocus"},
{8, "autoplay"},
{7, "checked"},
{8, "controls"},
{7, "default"},
{8, "disabled"},
{14, "formnovalidate"},
{6, "hidden"},
{5, "ismap"},
{9, "itemscope"},
{4, "loop"},
{8, "multiple"},
{5, "muted"},
{8, "nomodule"},
{10, "novalidate"},
{4, "open"},
{11, "playsinline"},
{8, "readonly"},
{8, "required"},
{8, "reversed"},
{8, "selected"},
{9, "truespeed"},
{0, 0}
};

#define CASE_SENSITIVE(p_state) \
((p_state)->xml_mode || (p_state)->case_sensitive)
#define STRICT_NAMES(p_state) \
Expand Down Expand Up @@ -438,8 +472,8 @@ report_event(PSTATE* p_state,
}

for (i = 1; i < num_tokens; i += 2) {
SV* attrname = newSVpvn(tokens[i].beg,
tokens[i].end-tokens[i].beg);
int attrlen = tokens[i].end-tokens[i].beg;
SV* attrname = newSVpvn(tokens[i].beg, attrlen);
SV* attrval;

if (utf8)
Expand All @@ -465,11 +499,34 @@ report_event(PSTATE* p_state,
}
}
else { /* boolean */
if (p_state->bool_attr_val)
attrval = newSVsv(p_state->bool_attr_val);
else
attrval = newSVsv(attrname);
}
int i;
int found = 0;
for ( i = 0; boolean_attributes[i].len; i++ ) {
if( attrlen == boolean_attributes[i].len ) {
char *attrname_s = SvPVbyte_nolen(attrname);
const char *t = boolean_attributes[i].str;
int len = attrlen;
while(len) {
if(toLOWER(*attrname_s) != *t)
break;
attrname_s++;
t++;
if(!--len) {
/* this is a boolean attribute */
if (p_state->bool_attr_val)
attrval = newSVsv(p_state->bool_attr_val);
else
attrval = newSVsv(attrname);
}
found = 1;
}
}
}
/* no matches were found, so set attr to undef */
if (!found)
attrval = newSV(0);

}

if (!CASE_SENSITIVE(p_state))
sv_lower(aTHX_ attrname);
Expand Down
13 changes: 7 additions & 6 deletions t/cases.t
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ my @result;
my ($self, $tag, $attr) = @_;
push @result, "START[$tag]";
for (sort keys %$attr) {
push @result, "\t$_: " . $attr->{$_};
push @result, "\t$_: " . ( defined $attr->{$_} ? $attr->{$_} : '<undef>' );
}
$start++;
}
Expand Down Expand Up @@ -57,10 +57,10 @@ my @result;
}

my @tests = (
'<a ">' => ['START[a]', "\t\": \""],
'<a ">' => ['START[a]', "\t\": <undef>"],
'<a/>' => ['START[a/]',],
'<a />' => ['START[a]', "\t/: /"],
'<a a/>' => ['START[a]', "\ta/: a/"],
'<a />' => ['START[a]', "\t/: <undef>"],
'<a a/>' => ['START[a]', "\ta/: <undef>"],
'<a a/=/>' => ['START[a]', "\ta/: /"],
'<a x="foo&nbsp;bar">' => ['START[a]', "\tx: foo\xA0bar"],
'<a x="foo&nbspbar">' => ['START[a]', "\tx: foo&nbspbar"],
Expand All @@ -73,7 +73,7 @@ my @tests = (
"2 <a href='foo bar'> 2" =>
['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'],
'2 <a href=foo bar> 2' =>
['TEXT[2 ]', 'START[a]', "\tbar: bar", "\thref: foo", 'TEXT[ 2]'],
['TEXT[2 ]', 'START[a]', "\tbar: <undef>", "\thref: foo", 'TEXT[ 2]'],
'2 <a href="foo bar"> 2' =>
['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'],
'2 <a href="foo\'bar"> 2' =>
Expand All @@ -84,7 +84,7 @@ my @tests = (
['TEXT[2 ]', 'START[a]', "\thref: foo\"bar", 'TEXT[ 2]'],
'2 <a.b> 2' => ['TEXT[2 ]', 'START[a.b]', 'TEXT[ 2]'],
'2 <a.b-12 a.b = 2 a> 2' =>
['TEXT[2 ]', 'START[a.b-12]', "\ta: a", "\ta.b: 2", 'TEXT[ 2]'],
['TEXT[2 ]', 'START[a.b-12]', "\ta: <undef>", "\ta.b: 2", 'TEXT[ 2]'],
'2 <a_b> 2' => ['TEXT[2 ]', 'START[a_b]', 'TEXT[ 2]'],
'<!ENTITY nbsp CDATA "&#160;" -- no-break space -->' =>
['DECLARATION[ENTITY nbsp CDATA "&#160;" -- no-break space --]'],
Expand All @@ -94,6 +94,7 @@ my @tests = (
'<!-- comment <!-- not comment --> comment -->' =>
['COMMENT[ comment <!]', 'COMMENT[> comment ]'],
'<!-- <a href="foo"> -->' => ['COMMENT[ <a href="foo"> ]'],
'<input type="checkbox" checked disabled foo>' => ['START[input]', "\tchecked: checked", "\tdisabled: disabled", "\tfoo: <undef>", "\ttype: checkbox" ],
);

plan tests => @tests / 2;
Expand Down
2 changes: 1 addition & 1 deletion t/msie-compat.t
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ $p->eof;

is($TEXT, <<'EOT');
[start_document,<undef>,,]
[start,a,<a name=`foo bar`>,name:`foo:bar`:bar`]
[start,a,<a name=`foo bar`>,name:`foo:bar`:<undef>]
[end_document,<undef>,,]
EOT

Expand Down
2 changes: 1 addition & 1 deletion t/parser.t
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ HTML

sub start {
my ($self, $tag, $attr) = @_;
$attr = join("/", map "$_=$attr->{$_}", sort keys %$attr);
$attr = join("/", map { "$_=" . ( defined $attr->{$_} ? $attr->{$_} : '<undef>' ) } sort keys %$attr);
$attr = "/$attr" if length $attr;
$OUT .= "<<$tag$attr>>|";
}
Expand Down