Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cassandra/c_shard_info.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ cdef class ShardingInfo():

def __init__(self, shard_id, shards_count, partitioner, sharding_algorithm, sharding_ignore_msb, shard_aware_port,
shard_aware_port_ssl):
self.shards_count = int(shards_count)
self.shards_count = int(shards_count) if shards_count else 0
self.partitioner = partitioner
self.sharding_algorithm = sharding_algorithm
self.sharding_ignore_msb = int(sharding_ignore_msb)
self.sharding_ignore_msb = int(sharding_ignore_msb) if sharding_ignore_msb else 0
self.shard_aware_port = int(shard_aware_port) if shard_aware_port else 0
self.shard_aware_port_ssl = int(shard_aware_port_ssl) if shard_aware_port_ssl else 0

Expand Down
11 changes: 5 additions & 6 deletions cassandra/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3888,14 +3888,13 @@ def _try_connect(self, endpoint):
"registering watchers and refreshing schema and topology",
connection)

# Indirect way to determine if conencted to a ScyllaDB cluster, which does not support peers_v2
# If sharding information is available, it's a ScyllaDB cluster, so do not use peers_v2 table.
if connection.features.sharding_info is not None:
# ScyllaDB does not support peers_v2. Use is_scylla (not sharding_info)
# so that clusters with shard-awareness disabled are still detected correctly.
if connection.features.is_scylla:
self._uses_peers_v2 = False

# Only ScyllaDB supports "USING TIMEOUT"
# Sharding information signals it is ScyllaDB
self._metadata_request_timeout = None if connection.features.sharding_info is None or not self._cluster.metadata_request_timeout \
# Only ScyllaDB supports "USING TIMEOUT". Use is_scylla for the same reason.
self._metadata_request_timeout = None if not connection.features.is_scylla or not self._cluster.metadata_request_timeout \
else datetime.timedelta(seconds=self._cluster.metadata_request_timeout)

self._tablets_routing_v1 = connection.features.tablets_routing_v1
Expand Down
10 changes: 8 additions & 2 deletions cassandra/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2578,8 +2578,14 @@ class SchemaParserV3(SchemaParserV22):
_SELECT_VIEWS = "SELECT * FROM system_schema.views"

def _is_not_scylla(self):
"""Check if NOT connected to ScyllaDB by checking for shard awareness."""
return getattr(getattr(self.connection, 'features', None), 'shard_id', None) is None
"""Check if NOT connected to ScyllaDB.

Uses the is_scylla flag from ProtocolFeatures, which is set from the
presence of Scylla-specific extension keys in the SUPPORTED response
(e.g. SCYLLA_LWT_ADD_METADATA_MARK, SCYLLA_RATE_LIMIT_ERROR) and
therefore remains True even when shard-awareness is disabled.
"""
return not getattr(getattr(self.connection, 'features', None), 'is_scylla', False)
Comment on lines +2581 to +2588

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ProtocolFeatures always have is_scylla attribute after your changes, why do you need getattr here?

Also, is it possible for connection to not have features?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was mostly a copy-paste from the original code, IIRC. I'll look into it.
(I'm also unsure - what if self.connection is null?)


_table_name_col = 'table_name'

Expand Down
34 changes: 30 additions & 4 deletions cassandra/protocol_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,43 @@ class ProtocolFeatures(object):
sharding_info = None
tablets_routing_v1 = False
lwt_info = None
is_scylla = False

def __init__(self, rate_limit_error=None, shard_id=0, sharding_info=None, tablets_routing_v1=False, lwt_info=None):
def __init__(self, rate_limit_error=None, shard_id=0, sharding_info=None, tablets_routing_v1=False, lwt_info=None, is_scylla=False):
self.rate_limit_error = rate_limit_error
self.shard_id = shard_id
self.sharding_info = sharding_info
self.tablets_routing_v1 = tablets_routing_v1
self.lwt_info = lwt_info
self.is_scylla = is_scylla

@staticmethod
def parse_from_supported(supported):
rate_limit_error = ProtocolFeatures.maybe_parse_rate_limit_error(supported)
shard_id, sharding_info = ProtocolFeatures.parse_sharding_info(supported)
tablets_routing_v1 = ProtocolFeatures.parse_tablets_info(supported)
lwt_info = ProtocolFeatures.parse_lwt_info(supported)
return ProtocolFeatures(rate_limit_error, shard_id, sharding_info, tablets_routing_v1, lwt_info)
is_scylla = ProtocolFeatures.detect_scylla(supported, sharding_info)
return ProtocolFeatures(rate_limit_error, shard_id, sharding_info, tablets_routing_v1, lwt_info, is_scylla)

@staticmethod
def detect_scylla(supported, sharding_info):
"""Detect ScyllaDB from SUPPORTED extensions, independent of shard awareness.

ScyllaDB is identified by the presence of any known Scylla-specific
extension key in the SUPPORTED response. Checking only shard-related
fields (SCYLLA_NR_SHARDS, etc.) is insufficient because those are
absent when shard-awareness is disabled on the server side
(allow_shard_aware_drivers: false), which would cause the driver to
misidentify a ScyllaDB cluster as Cassandra and, for example, try
to query the peers_v2 table that ScyllaDB does not support.
"""
return (
LWT_ADD_METADATA_MARK in supported
or RATE_LIMIT_ERROR_EXTENSION in supported
or TABLETS_ROUTING_V1 in supported
or sharding_info is not None
)

@staticmethod
def maybe_parse_rate_limit_error(supported):
Expand Down Expand Up @@ -73,8 +95,12 @@ def parse_sharding_info(options):
sharding_algorithm == "biased-token-round-robin" or sharding_ignore_msb):
return 0, None

return int(shard_id), _ShardingInfo(shard_id, shards_count, partitioner, sharding_algorithm, sharding_ignore_msb,
shard_aware_port, shard_aware_port_ssl)
# SCYLLA_SHARD may be absent even when other shard fields are present
# (e.g. the connection landed on shard 0 and the server omits the field).
# Default to 0 to avoid int(None) crash.
resolved_shard_id = int(shard_id) if shard_id is not None else 0
return resolved_shard_id, _ShardingInfo(shard_id, shards_count, partitioner, sharding_algorithm, sharding_ignore_msb,
shard_aware_port, shard_aware_port_ssl)
Comment on lines +101 to +103
Comment on lines +98 to +103

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can SCYLLA_SHARD be absent, as this comment indicates?
protocol-extensions.md doesn't mention it being optional. Scylla code also seems to insert it unconditionally: https://github.com/scylladb/scylladb/blob/b9d508d4097a91181da582305900c9318a9fec50/transport/server.cc#L2046-L2058

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the whole issue that started this thread - if Scylla server-side disables sharding, it doesn't advertise it whatsoever.

@Lorak-mmk Lorak-mmk Jun 30, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is not what I'm asking.

SCYLLA_SHARD may be absent even when other shard fields are present

Afaik disabling sharding on scylla-side will make it not send ANY sharding-related values in SUPPORTED, which makes this comment false. See the code fragment I linked - all sharding related values are guarded by _server._config.allow_shard_aware_drivers. I don't see a possible scenario where SCYLLA_SHARD is absent but e.g. SCYLLA_NR_SHARDS is present.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. OK.



@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions cassandra/shard_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
class _ShardingInfo(object):

def __init__(self, shard_id, shards_count, partitioner, sharding_algorithm, sharding_ignore_msb, shard_aware_port, shard_aware_port_ssl):
self.shards_count = int(shards_count)
self.shards_count = int(shards_count) if shards_count else 0
self.partitioner = partitioner
self.sharding_algorithm = sharding_algorithm
self.sharding_ignore_msb = int(sharding_ignore_msb)
self.sharding_ignore_msb = int(sharding_ignore_msb) if sharding_ignore_msb else 0
self.shard_aware_port = int(shard_aware_port) if shard_aware_port else None
self.shard_aware_port_ssl = int(shard_aware_port_ssl) if shard_aware_port_ssl else None
Comment on lines 22 to 29

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it is possible for shards_count / sharding_ignore_msb when other values are present. Either sharding info is enabled, in which those values are present, or it is not, in which case we should not attempt to construct _ShardingInfo object. In both cases, those new guards are unnecessary.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So do you want me to change the code to check for everything works with or without _ShardingInfo? I thought it might be a slightly bigger change. I can look into it.


Expand Down
90 changes: 90 additions & 0 deletions tests/unit/test_protocol_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,93 @@ class OptionsHolder(object):
assert protocol_features.rate_limit_error == 123
assert protocol_features.shard_id == 0
assert protocol_features.sharding_info is None

# -----------------------------------------------------------------
# Tests for is_scylla detection (independent of shard awareness)
# Regression for: ScyllaDB misidentified as Cassandra when sharding
# is disabled (allow_shard_aware_drivers: false).
# -----------------------------------------------------------------

def test_is_scylla_detected_via_lwt(self):
"""ScyllaDB is recognised from SCYLLA_LWT_ADD_METADATA_MARK alone."""
pf = ProtocolFeatures.parse_from_supported({
'SCYLLA_LWT_ADD_METADATA_MARK': ['LWT_OPTIMIZATION_META_BIT_MASK=8'],
})
assert pf.is_scylla is True
assert pf.shard_id == 0
assert pf.sharding_info is None # no shard-aware connections expected

def test_is_scylla_detected_via_rate_limit(self):
"""ScyllaDB is recognised from SCYLLA_RATE_LIMIT_ERROR alone."""
pf = ProtocolFeatures.parse_from_supported({
'SCYLLA_RATE_LIMIT_ERROR': ['ERROR_CODE=42'],
})
assert pf.is_scylla is True
assert pf.shard_id == 0
assert pf.sharding_info is None

def test_is_scylla_detected_via_tablets(self):
"""ScyllaDB is recognised from TABLETS_ROUTING_V1 alone."""
pf = ProtocolFeatures.parse_from_supported({
'TABLETS_ROUTING_V1': [''],
})
assert pf.is_scylla is True
assert pf.shard_id == 0
assert pf.sharding_info is None

def test_is_scylla_detected_via_sharding(self):
"""ScyllaDB with full sharding is recognised and sharding_info is populated."""
pf = ProtocolFeatures.parse_from_supported({
'SCYLLA_SHARD': ['3'],
'SCYLLA_NR_SHARDS': ['12'],
'SCYLLA_PARTITIONER': ['org.apache.cassandra.dht.Murmur3Partitioner'],
'SCYLLA_SHARDING_ALGORITHM': ['biased-token-round-robin'],
'SCYLLA_SHARDING_IGNORE_MSB': ['12'],
'SCYLLA_LWT_ADD_METADATA_MARK': ['LWT_OPTIMIZATION_META_BIT_MASK=8'],
})
assert pf.is_scylla is True
assert pf.shard_id == 3
assert pf.sharding_info is not None
assert pf.sharding_info.shards_count == 12

def test_cassandra_is_not_scylla(self):
"""Pure Cassandra SUPPORTED response must not set is_scylla."""
pf = ProtocolFeatures.parse_from_supported({
'CQL_VERSION': ['3.0.0'],
'COMPRESSION': ['lz4', 'snappy'],
})
assert pf.is_scylla is False
assert pf.sharding_info is None

def test_scylla_without_sharding_no_crash(self):
"""
Regression test for F1: SCYLLA_PARTITIONER present but SCYLLA_NR_SHARDS
and SCYLLA_SHARDING_IGNORE_MSB absent must not raise TypeError.
Mirrors the scenario where only some shard fields are advertised.
"""
# Should not raise even though shards_count / sharding_ignore_msb are None.
pf = ProtocolFeatures.parse_from_supported({
'SCYLLA_PARTITIONER': ['org.apache.cassandra.dht.Murmur3Partitioner'],
'SCYLLA_LWT_ADD_METADATA_MARK': ['LWT_OPTIMIZATION_META_BIT_MASK=8'],
})
assert pf.is_scylla is True
# SCYLLA_PARTITIONER passes the sharding guard, so sharding_info is populated
# with zero defaults rather than crashing.
assert pf.shard_id == 0
assert pf.sharding_info is not None
assert pf.sharding_info.shards_count == 0
assert pf.sharding_info.sharding_ignore_msb == 0

def test_scylla_sharding_algorithm_only_no_crash(self):
"""
Regression: SCYLLA_SHARDING_ALGORITHM present without SCYLLA_NR_SHARDS
must not raise TypeError.
"""
pf = ProtocolFeatures.parse_from_supported({
'SCYLLA_SHARDING_ALGORITHM': ['biased-token-round-robin'],
'SCYLLA_RATE_LIMIT_ERROR': ['ERROR_CODE=42'],
})
assert pf.is_scylla is True
assert pf.shard_id == 0
assert pf.sharding_info is not None
assert pf.sharding_info.shards_count == 0
Loading