diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index bcd7c26..1a250ab 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -6,6 +6,7 @@ on: tags-ignore: - '*' pull_request: + workflow_dispatch: jobs: perl: runs-on: ubuntu-latest @@ -14,18 +15,19 @@ jobs: matrix: perl-version: - '5.38' - # - '5.34' - # - '5.32' - # - '5.30' - # - '5.28' - # - '5.26' - # - '5.24' - # - '5.22' - # - '5.20' + - '5.36' + - '5.34' + - '5.32' + - '5.30' + - '5.28' + - '5.26' + - '5.24' + - '5.22' + - '5.20' - '5.18' - # - '5.16' - # - '5.14' - # - '5.12' + - '5.16' + - '5.14' + - '5.12' - '5.10' container: image: perl:${{ matrix.perl-version }} diff --git a/.gitignore b/.gitignore index 053e57a..2d80996 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ Makefile MYMETA.* .build/ HTML-Parser-*/ - +local/ diff --git a/hparser.c b/hparser.c index 769e6e0..a906c51 100644 --- a/hparser.c +++ b/hparser.c @@ -85,6 +85,40 @@ static const char * const argname[] = { /* ARG_FLAG_FLAT_ARRAY */ }; +/* https://meiert.com/en/blog/boolean-attributes-of-html/ */ +const static struct boolean_attribute { + int len; + const char* str; +} +boolean_attributes[] = { + {15, "allowfullscreen"}, + {19, "allowpaymentrequest"}, + {5, "async"}, + {9, "autofocus"}, + {8, "autoplay"}, + {7, "checked"}, + {8, "controls"}, + {7, "default"}, + {8, "disabled"}, + {14, "formnovalidate"}, + {6, "hidden"}, + {5, "ismap"}, + {9, "itemscope"}, + {4, "loop"}, + {8, "multiple"}, + {5, "muted"}, + {8, "nomodule"}, + {10, "novalidate"}, + {4, "open"}, + {11, "playsinline"}, + {8, "readonly"}, + {8, "required"}, + {8, "reversed"}, + {8, "selected"}, + {9, "truespeed"}, + {0, 0} +}; + #define CASE_SENSITIVE(p_state) \ ((p_state)->xml_mode || (p_state)->case_sensitive) #define STRICT_NAMES(p_state) \ @@ -438,8 +472,8 @@ report_event(PSTATE* p_state, } for (i = 1; i < num_tokens; i += 2) { - SV* attrname = newSVpvn(tokens[i].beg, - tokens[i].end-tokens[i].beg); + int attrlen = tokens[i].end-tokens[i].beg; + SV* attrname = newSVpvn(tokens[i].beg, attrlen); SV* attrval; if (utf8) @@ -465,11 +499,34 @@ report_event(PSTATE* p_state, } } else { /* boolean */ - if (p_state->bool_attr_val) - attrval = newSVsv(p_state->bool_attr_val); - else - attrval = newSVsv(attrname); - } + int i; + int found = 0; + for ( i = 0; boolean_attributes[i].len; i++ ) { + if( attrlen == boolean_attributes[i].len ) { + char *attrname_s = SvPVbyte_nolen(attrname); + const char *t = boolean_attributes[i].str; + int len = attrlen; + while(len) { + if(toLOWER(*attrname_s) != *t) + break; + attrname_s++; + t++; + if(!--len) { + /* this is a boolean attribute */ + if (p_state->bool_attr_val) + attrval = newSVsv(p_state->bool_attr_val); + else + attrval = newSVsv(attrname); + } + found = 1; + } + } + } + /* no matches were found, so set attr to undef */ + if (!found) + attrval = newSV(0); + + } if (!CASE_SENSITIVE(p_state)) sv_lower(aTHX_ attrname); diff --git a/t/cases.t b/t/cases.t index 4331a27..7d8a66b 100644 --- a/t/cases.t +++ b/t/cases.t @@ -24,7 +24,7 @@ my @result; my ($self, $tag, $attr) = @_; push @result, "START[$tag]"; for (sort keys %$attr) { - push @result, "\t$_: " . $attr->{$_}; + push @result, "\t$_: " . ( defined $attr->{$_} ? $attr->{$_} : '<undef>' ); } $start++; } @@ -57,10 +57,10 @@ my @result; } my @tests = ( - '<a ">' => ['START[a]', "\t\": \""], + '<a ">' => ['START[a]', "\t\": <undef>"], '<a/>' => ['START[a/]',], - '<a />' => ['START[a]', "\t/: /"], - '<a a/>' => ['START[a]', "\ta/: a/"], + '<a />' => ['START[a]', "\t/: <undef>"], + '<a a/>' => ['START[a]', "\ta/: <undef>"], '<a a/=/>' => ['START[a]', "\ta/: /"], '<a x="foo bar">' => ['START[a]', "\tx: foo\xA0bar"], '<a x="foo bar">' => ['START[a]', "\tx: foo bar"], @@ -73,7 +73,7 @@ my @tests = ( "2 <a href='foo bar'> 2" => ['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'], '2 <a href=foo bar> 2' => - ['TEXT[2 ]', 'START[a]', "\tbar: bar", "\thref: foo", 'TEXT[ 2]'], + ['TEXT[2 ]', 'START[a]', "\tbar: <undef>", "\thref: foo", 'TEXT[ 2]'], '2 <a href="foo bar"> 2' => ['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'], '2 <a href="foo\'bar"> 2' => @@ -84,7 +84,7 @@ my @tests = ( ['TEXT[2 ]', 'START[a]', "\thref: foo\"bar", 'TEXT[ 2]'], '2 <a.b> 2' => ['TEXT[2 ]', 'START[a.b]', 'TEXT[ 2]'], '2 <a.b-12 a.b = 2 a> 2' => - ['TEXT[2 ]', 'START[a.b-12]', "\ta: a", "\ta.b: 2", 'TEXT[ 2]'], + ['TEXT[2 ]', 'START[a.b-12]', "\ta: <undef>", "\ta.b: 2", 'TEXT[ 2]'], '2 <a_b> 2' => ['TEXT[2 ]', 'START[a_b]', 'TEXT[ 2]'], '<!ENTITY nbsp CDATA " " -- no-break space -->' => ['DECLARATION[ENTITY nbsp CDATA " " -- no-break space --]'], @@ -94,6 +94,7 @@ my @tests = ( '<!-- comment <!-- not comment --> comment -->' => ['COMMENT[ comment <!]', 'COMMENT[> comment ]'], '<!-- <a href="foo"> -->' => ['COMMENT[ <a href="foo"> ]'], + '<input type="checkbox" checked disabled foo>' => ['START[input]', "\tchecked: checked", "\tdisabled: disabled", "\tfoo: <undef>", "\ttype: checkbox" ], ); plan tests => @tests / 2; diff --git a/t/msie-compat.t b/t/msie-compat.t index 3c170c5..8ba6279 100644 --- a/t/msie-compat.t +++ b/t/msie-compat.t @@ -62,7 +62,7 @@ $p->eof; is($TEXT, <<'EOT'); [start_document,<undef>,,] -[start,a,<a name=`foo bar`>,name:`foo:bar`:bar`] +[start,a,<a name=`foo bar`>,name:`foo:bar`:<undef>] [end_document,<undef>,,] EOT diff --git a/t/parser.t b/t/parser.t index ea5a3a4..a947dc0 100644 --- a/t/parser.t +++ b/t/parser.t @@ -71,7 +71,7 @@ HTML sub start { my ($self, $tag, $attr) = @_; - $attr = join("/", map "$_=$attr->{$_}", sort keys %$attr); + $attr = join("/", map { "$_=" . ( defined $attr->{$_} ? $attr->{$_} : '<undef>' ) } sort keys %$attr); $attr = "/$attr" if length $attr; $OUT .= "<<$tag$attr>>|"; }