Skip to content

Commit 30b6089

Browse files
authored
Allow :is(), :has(), and :where() to forgive empty slots (#226)
Resolves #122
1 parent ac996fe commit 30b6089

File tree

10 files changed

+194
-62
lines changed

10 files changed

+194
-62
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
.DS_Store
2+
13
# Byte-compiled / optimized / DLL files
24
__pycache__/
35
*.py[cod]

docs/src/markdown/about/changelog.md

+9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Changelog
22

3+
## 2.3
4+
5+
- **NEW**: `:has()`, `:is()`, and `:where()` now use use a forgiving selector list. While not as forgiving as due to
6+
syntax errors as CSS might be, it will forgive such things as empty sets and empty slots due to multiple consecutive
7+
commas, leading commas, or trailing commas. Essentially, these pseudo-classes will match all non-empty selectors and
8+
ignore empty ones. As the scraping environment is different that a browser environment, it was chosen not to
9+
aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform
10+
as expected.
11+
312
## 2.2.1
413

514
- **FIX**: Fix an issue with namespaces when one of the keys is `self`.

mkdocs.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,6 @@ plugins:
105105
- search:
106106
separator: '[:\s\-]+'
107107
- git-revision-date-localized
108-
- minify:
109-
minify_html: true
108+
# - minify:
109+
# minify_html: true
110110
- mkdocs_pymdownx_material_extras

requirements/docs.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
mkdocs_pymdownx_material_extras==1.2.2
22
mkdocs-git-revision-date-localized-plugin
3-
mkdocs-minify-plugin
43
pyspelling

soupsieve/__meta__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -188,5 +188,5 @@ def parse_version(ver):
188188
return Version(major, minor, micro, release, pre, post, dev)
189189

190190

191-
__version_info__ = Version(2, 2, 1, "final")
191+
__version_info__ = Version(2, 3, 0, ".dev")
192192
__version__ = __version_info__._get_canonical()

soupsieve/css_match.py

+3
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,9 @@ def match_relations(self, el, relation):
784784

785785
found = False
786786

787+
if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
788+
return found
789+
787790
if relation[0].rel_type.startswith(':'):
788791
found = self.match_future_relations(el, relation)
789792
else:

soupsieve/css_parser.py

+77-34
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@
196196
FLG_IN_RANGE = 0x80
197197
FLG_OUT_OF_RANGE = 0x100
198198
FLG_PLACEHOLDER_SHOWN = 0x200
199+
FLG_FORGIVE = 0x400
199200

200201
# Maximum cached patterns to store
201202
_MAXCACHE = 500
@@ -715,11 +716,14 @@ def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
715716
flags = FLG_PSEUDO | FLG_OPEN
716717
if name == ':not':
717718
flags |= FLG_NOT
718-
if name == ':has':
719-
flags |= FLG_RELATIVE
719+
elif name == ':has':
720+
flags |= FLG_RELATIVE | FLG_FORGIVE
721+
elif name in (':where', ':is'):
722+
flags |= FLG_FORGIVE
720723

721724
sel.selectors.append(self.parse_selectors(iselector, index, flags))
722725
has_selector = True
726+
723727
return has_selector
724728

725729
def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index):
@@ -731,12 +735,9 @@ def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index)
731735
if combinator == COMMA_COMBINATOR:
732736
if not has_selector:
733737
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
734-
# or following another comma, both of which are unexpected. Commas must split selectors.
735-
raise SelectorSyntaxError(
736-
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
737-
self.pattern,
738-
index
739-
)
738+
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
739+
sel.no_match = True
740+
740741
sel.rel_type = rel_type
741742
selectors[-1].relations.append(sel)
742743
rel_type = ":" + WS_COMBINATOR
@@ -757,41 +758,50 @@ def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index)
757758
self.pattern,
758759
index
759760
)
761+
760762
# Set the leading combinator for the next selector.
761763
rel_type = ':' + combinator
762-
sel = _Selector()
763764

765+
sel = _Selector()
764766
has_selector = False
765767
return has_selector, sel, rel_type
766768

767-
def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index):
769+
def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index):
768770
"""Parse combinator tokens."""
769771

770772
combinator = m.group('relation').strip()
771773
if not combinator:
772774
combinator = WS_COMBINATOR
773775
if not has_selector:
774-
raise SelectorSyntaxError(
775-
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
776-
self.pattern,
777-
index
778-
)
776+
if not is_forgive or combinator != COMMA_COMBINATOR:
777+
raise SelectorSyntaxError(
778+
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
779+
self.pattern,
780+
index
781+
)
779782

780-
if combinator == COMMA_COMBINATOR:
781-
if not sel.tag and not is_pseudo:
782-
# Implied `*`
783-
sel.tag = ct.SelectorTag('*', None)
784-
sel.relations.extend(relations)
785-
selectors.append(sel)
786-
del relations[:]
783+
# If we are in a forgiving pseudo class, just make the selector a "no match"
784+
if combinator == COMMA_COMBINATOR:
785+
sel.no_match = True
786+
del relations[:]
787+
selectors.append(sel)
787788
else:
788-
sel.relations.extend(relations)
789-
sel.rel_type = combinator
790-
del relations[:]
791-
relations.append(sel)
792-
sel = _Selector()
789+
if combinator == COMMA_COMBINATOR:
790+
if not sel.tag and not is_pseudo:
791+
# Implied `*`
792+
sel.tag = ct.SelectorTag('*', None)
793+
sel.relations.extend(relations)
794+
selectors.append(sel)
795+
del relations[:]
796+
else:
797+
sel.relations.extend(relations)
798+
sel.rel_type = combinator
799+
del relations[:]
800+
relations.append(sel)
793801

802+
sel = _Selector()
794803
has_selector = False
804+
795805
return has_selector, sel
796806

797807
def parse_class_id(self, sel, m, has_selector):
@@ -862,12 +872,15 @@ def parse_pseudo_dir(self, sel, m, has_selector):
862872
def parse_selectors(self, iselector, index=0, flags=0):
863873
"""Parse selectors."""
864874

875+
# Initialize important variables
865876
sel = _Selector()
866877
selectors = []
867878
has_selector = False
868879
closed = False
869880
relations = []
870881
rel_type = ":" + WS_COMBINATOR
882+
883+
# Setup various flags
871884
is_open = bool(flags & FLG_OPEN)
872885
is_pseudo = bool(flags & FLG_PSEUDO)
873886
is_relative = bool(flags & FLG_RELATIVE)
@@ -878,7 +891,9 @@ def parse_selectors(self, iselector, index=0, flags=0):
878891
is_in_range = bool(flags & FLG_IN_RANGE)
879892
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
880893
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
894+
is_forgive = bool(flags & FLG_FORGIVE)
881895

896+
# Print out useful debug stuff
882897
if self.debug: # pragma: no cover
883898
if is_pseudo:
884899
print(' is_pseudo: True')
@@ -900,7 +915,10 @@ def parse_selectors(self, iselector, index=0, flags=0):
900915
print(' is_out_of_range: True')
901916
if is_placeholder_shown:
902917
print(' is_placeholder_shown: True')
918+
if is_forgive:
919+
print(' is_forgive: True')
903920

921+
# The algorithm for relative selectors require an initial selector in the selector list
904922
if is_relative:
905923
selectors.append(_Selector())
906924

@@ -929,11 +947,13 @@ def parse_selectors(self, iselector, index=0, flags=0):
929947
is_html = True
930948
elif key == 'pseudo_close':
931949
if not has_selector:
932-
raise SelectorSyntaxError(
933-
"Expected a selector at postion {}".format(m.start(0)),
934-
self.pattern,
935-
m.start(0)
936-
)
950+
if not is_forgive:
951+
raise SelectorSyntaxError(
952+
"Expected a selector at postion {}".format(m.start(0)),
953+
self.pattern,
954+
m.start(0)
955+
)
956+
sel.no_match = True
937957
if is_open:
938958
closed = True
939959
break
@@ -950,7 +970,7 @@ def parse_selectors(self, iselector, index=0, flags=0):
950970
)
951971
else:
952972
has_selector, sel = self.parse_combinator(
953-
sel, m, has_selector, selectors, relations, is_pseudo, index
973+
sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
954974
)
955975
elif key == 'attribute':
956976
has_selector = self.parse_attribute_selector(sel, m, has_selector)
@@ -969,13 +989,15 @@ def parse_selectors(self, iselector, index=0, flags=0):
969989
except StopIteration:
970990
pass
971991

992+
# Handle selectors that are not closed
972993
if is_open and not closed:
973994
raise SelectorSyntaxError(
974995
"Unclosed pseudo-class at position {}".format(index),
975996
self.pattern,
976997
index
977998
)
978999

1000+
# Cleanup completed selector piece
9791001
if has_selector:
9801002
if not sel.tag and not is_pseudo:
9811003
# Implied `*`
@@ -987,8 +1009,28 @@ def parse_selectors(self, iselector, index=0, flags=0):
9871009
sel.relations.extend(relations)
9881010
del relations[:]
9891011
selectors.append(sel)
990-
else:
1012+
1013+
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
1014+
elif is_forgive:
1015+
if is_relative:
1016+
# Handle relative selectors pseudo-classes with empty slots like `:has()`
1017+
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
1018+
sel.rel_type = rel_type
1019+
sel.no_match = True
1020+
selectors[-1].relations.append(sel)
1021+
has_selector = True
1022+
else:
1023+
# Handle normal pseudo-classes with empty slots
1024+
if not selectors or not relations:
1025+
# Others like `:is()` etc.
1026+
sel.no_match = True
1027+
del relations[:]
1028+
selectors.append(sel)
1029+
has_selector = True
1030+
1031+
if not has_selector:
9911032
# We will always need to finish a selector when `:has()` is used as it leads with combining.
1033+
# May apply to others as well.
9921034
raise SelectorSyntaxError(
9931035
'Expected a selector at position {}'.format(index),
9941036
self.pattern,
@@ -1009,6 +1051,7 @@ def parse_selectors(self, iselector, index=0, flags=0):
10091051
if is_placeholder_shown:
10101052
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
10111053

1054+
# Return selector list
10121055
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
10131056

10141057
def selector_iter(self, pattern):

tests/test_level3/test_not.py

+21
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Test not selectors."""
22
from .. import util
33
from bs4 import BeautifulSoup as BS
4+
from soupsieve import SelectorSyntaxError
45

56

67
class TestNot(util.TestCase):
@@ -55,3 +56,23 @@ def test_none_inputs(self):
5556
soup = BS('<span foo="something">text</span>', 'html.parser')
5657
soup.span['foo'] = None
5758
self.assertEqual(len(soup.select('span:not([foo])')), 0)
59+
60+
def test_invalid_pseudo_empty(self):
61+
"""Test pseudo class group with empty set."""
62+
63+
self.assert_raises(':not()', SelectorSyntaxError)
64+
65+
def test_invalid_pseudo_trailing_comma(self):
66+
"""Test pseudo class group with trailing comma."""
67+
68+
self.assert_raises(':not(.class,)', SelectorSyntaxError)
69+
70+
def test_invalid_pseudo_leading_comma(self):
71+
"""Test pseudo class group with leading comma."""
72+
73+
self.assert_raises(':not(,.class)', SelectorSyntaxError)
74+
75+
def test_invalid_pseudo_multi_comma(self):
76+
"""Test pseudo class group with multiple commas."""
77+
78+
self.assert_raises(':not(.this,,.that)', SelectorSyntaxError)

tests/test_level4/test_has.py

+39-19
Original file line numberDiff line numberDiff line change
@@ -129,20 +129,50 @@ def test_has_nested_pseudo(self):
129129
flags=util.HTML
130130
)
131131

132-
def test_invalid_incomplete_has(self):
133-
"""Test `:has()` fails with just a combinator."""
132+
def test_has_empty(self):
133+
"""Test has with empty slot due to multiple commas."""
134134

135-
self.assert_raises(':has(>)', SelectorSyntaxError)
135+
self.assert_selector(
136+
self.MARKUP2,
137+
'div:has()',
138+
[],
139+
flags=util.HTML
140+
)
136141

137-
def test_invalid_has_empty(self):
138-
"""Test `:has()` fails with empty function parameters."""
142+
def test_has_multi_commas(self):
143+
"""Test has with empty slot due to multiple commas."""
139144

140-
self.assert_raises(':has()', SelectorSyntaxError)
145+
self.assert_selector(
146+
self.MARKUP2,
147+
'div:has(> .bbbb, .ffff, , .jjjj)',
148+
['0', '4', '8'],
149+
flags=util.HTML
150+
)
141151

142-
def test_invalid_has_double_comma(self):
143-
"""Test `:has()` fails with consecutive commas."""
152+
def test_has_leading_commas(self):
153+
"""Test has with empty slot due to leading commas."""
144154

145-
self.assert_raises(':has(> has,, a)', SelectorSyntaxError)
155+
self.assert_selector(
156+
self.MARKUP2,
157+
'div:has(, > .bbbb, .ffff, .jjjj)',
158+
['0', '4', '8'],
159+
flags=util.HTML
160+
)
161+
162+
def test_has_trailing_commas(self):
163+
"""Test has with empty slot due to trailing commas."""
164+
165+
self.assert_selector(
166+
self.MARKUP2,
167+
'div:has(> .bbbb, .ffff, .jjjj, )',
168+
['0', '4', '8'],
169+
flags=util.HTML
170+
)
171+
172+
def test_invalid_incomplete_has(self):
173+
"""Test `:has()` fails with just a combinator."""
174+
175+
self.assert_raises(':has(>)', SelectorSyntaxError)
146176

147177
def test_invalid_has_double_combinator(self):
148178
"""Test `:has()` fails with consecutive combinators."""
@@ -155,13 +185,3 @@ def test_invalid_has_trailing_combinator(self):
155185
"""Test `:has()` fails with trailing combinator."""
156186

157187
self.assert_raises(':has(> has >)', SelectorSyntaxError)
158-
159-
def test_invalid_has_trailing_comma(self):
160-
"""Test `:has()` fails with trailing comma."""
161-
162-
self.assert_raises(':has(> has,)', SelectorSyntaxError)
163-
164-
def test_invalid_has_start_comma(self):
165-
"""Test `:has()` fails with trailing comma."""
166-
167-
self.assert_raises(':has(, p)', SelectorSyntaxError)

0 commit comments

Comments
 (0)