Skip to content

Commit 9157c7b

Browse files
nsdeschenesClaude Sonnet 4codexclaude
authored
fix(attributes): Validate user tag attributes exist in storage (#110745)
The goal of this PR is to actually validate that the keys the user is querying for actually exist in their projects. The problem with the initial implementation is that it seems to have just checked to see if the passed keys "fit" what an attribute key would look like and not if it actually existed 🤦 To do this validation we check each of the different types (`boolean`, `number`, and `string`) behind a thread pool kicking one RPC call for each time to a max of 3 so that we can validate in parallel. Also made some modifications to the endpoint: - Added support for `statsPeriod` so we can search smaller time windows and ensure that the attributes exist when the user thinks they should - Moved `itemType` from the post body to a query param. It's not something that we're validating against so i think it's better to live in the query params. --------- Co-authored-by: Claude Sonnet 4 <noreply@example.com> Co-authored-by: OpenAI Codex <noreply@openai.com> Co-authored-by: Claude Sonnet 4 <noreply@anthropic.com>
1 parent 1e6cf10 commit 9157c7b

3 files changed

Lines changed: 267 additions & 63 deletions

File tree

src/sentry/api/endpoints/organization_trace_item_attributes.py

Lines changed: 116 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
TraceItemAttributeNamesResponse,
1313
TraceItemAttributeValuesRequest,
1414
)
15-
from sentry_protos.snuba.v1.request_common_pb2 import PageToken, RequestMeta
15+
from sentry_protos.snuba.v1.request_common_pb2 import (
16+
PageToken,
17+
RequestMeta,
18+
)
1619
from sentry_protos.snuba.v1.request_common_pb2 import (
1720
TraceItemType as ProtoTraceItemType,
1821
)
1922
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
20-
from sentry_protos.snuba.v1.trace_item_filter_pb2 import TraceItemFilter
23+
from sentry_protos.snuba.v1.trace_item_filter_pb2 import ExistsFilter, OrFilter, TraceItemFilter
2124

2225
from sentry import features, options
2326
from sentry.api.api_owners import ApiOwner
@@ -840,10 +843,13 @@ def adjust_start_end_window(start_date: datetime, end_date: datetime) -> tuple[d
840843
return start_date, end_date
841844

842845

843-
class OrganizationTraceItemAttributeValidateSerializer(serializers.Serializer):
846+
class OrganizationTraceItemAttributeValidateQuerySerializer(serializers.Serializer):
844847
itemType = serializers.ChoiceField(
845848
[e.value for e in SupportedTraceItemType], required=True, source="item_type"
846849
)
850+
851+
852+
class OrganizationTraceItemAttributeValidateBodySerializer(serializers.Serializer):
847853
attributes = serializers.ListField(
848854
child=serializers.CharField(max_length=300),
849855
min_length=1,
@@ -862,6 +868,72 @@ def serialize_type(search_type: constants.SearchType) -> str:
862868
return "number"
863869

864870

871+
def _check_attributes_by_type(
872+
meta: RequestMeta,
873+
attr_type: AttributeKey.Type.ValueType,
874+
names: list[str],
875+
) -> set[tuple[AttributeKey.Type.ValueType, str]]:
876+
"""Check which typed attribute names exist in storage for the active window."""
877+
if not names:
878+
return set()
879+
880+
requested_names = set(names)
881+
names_request = TraceItemAttributeNamesRequest(
882+
meta=meta,
883+
limit=10000,
884+
type=attr_type,
885+
intersecting_attributes_filter=TraceItemFilter(
886+
or_filter=OrFilter(
887+
filters=[
888+
TraceItemFilter(
889+
exists_filter=ExistsFilter(key=AttributeKey(type=attr_type, name=name))
890+
)
891+
for name in requested_names
892+
]
893+
)
894+
),
895+
)
896+
names_response = snuba_rpc.attribute_names_rpc(names_request)
897+
return {
898+
(attr_type, attribute.name)
899+
for attribute in names_response.attributes
900+
if attribute.name in requested_names
901+
}
902+
903+
904+
# We want to limit the number of threads to the number of attribute types to avoid overwhelming the RPC server.
905+
MAX_ATTRIBUTE_VALIDATION_THREADS = 3
906+
907+
908+
def _check_attributes_exist(
909+
resolver: SearchResolver,
910+
item_type: SupportedTraceItemType,
911+
attrs_by_type: dict[AttributeKey.Type.ValueType, list[str]],
912+
) -> set[tuple[AttributeKey.Type.ValueType, str]]:
913+
"""Check which typed attribute internal names exist in storage."""
914+
if not attrs_by_type:
915+
return set()
916+
917+
meta = resolver.resolve_meta(referrer=Referrer.API_TRACE_ITEM_ATTRIBUTE_VALIDATE.value)
918+
meta.trace_item_type = constants.SUPPORTED_TRACE_ITEM_TYPE_MAP.get(
919+
item_type, ProtoTraceItemType.TRACE_ITEM_TYPE_SPAN
920+
)
921+
922+
found: set[tuple[AttributeKey.Type.ValueType, str]] = set()
923+
with ContextPropagatingThreadPoolExecutor(
924+
thread_name_prefix="attr_validate",
925+
max_workers=MAX_ATTRIBUTE_VALIDATION_THREADS,
926+
) as pool:
927+
futures = [
928+
pool.submit(_check_attributes_by_type, meta, attr_type, names)
929+
for attr_type, names in attrs_by_type.items()
930+
]
931+
for future in futures:
932+
found.update(future.result())
933+
934+
return found
935+
936+
865937
@cell_silo_endpoint
866938
class OrganizationTraceItemAttributeValidateEndpoint(OrganizationTraceItemAttributesEndpointBase):
867939
publish_status = {
@@ -873,13 +945,16 @@ def post(self, request: Request, organization: Organization) -> Response:
873945
if not self.has_feature(organization, request):
874946
return Response(status=404)
875947

876-
serializer = OrganizationTraceItemAttributeValidateSerializer(data=request.data)
948+
query_serializer = OrganizationTraceItemAttributeValidateQuerySerializer(data=request.GET)
949+
if not query_serializer.is_valid():
950+
return Response(query_serializer.errors, status=400)
951+
952+
serializer = OrganizationTraceItemAttributeValidateBodySerializer(data=request.data)
877953
if not serializer.is_valid():
878954
return Response(serializer.errors, status=400)
879955

880-
validated = serializer.validated_data
881-
item_type = SupportedTraceItemType(validated["item_type"])
882-
attribute_names: list[str] = validated["attributes"]
956+
item_type = SupportedTraceItemType(query_serializer.validated_data["item_type"])
957+
attribute_names: list[str] = serializer.validated_data["attributes"]
883958

884959
try:
885960
snuba_params = self.get_snuba_params(request, organization)
@@ -897,17 +972,47 @@ def post(self, request: Request, organization: Organization) -> Response:
897972
)
898973

899974
results: dict[str, dict[str, Any]] = {}
975+
# Collect unknown (user tag) attributes that need storage validation
976+
unknown_attrs: list[tuple[str, Any]] = []
977+
900978
for attr_name in attribute_names:
901979
try:
902980
resolved, _context = resolver.resolve_attribute(attr_name)
903-
results[attr_name] = {
904-
"valid": True,
905-
"type": serialize_type(resolved.search_type),
906-
}
981+
if attr_name in definitions.contexts or attr_name in definitions.columns:
982+
# Known column or virtual context — always valid
983+
results[attr_name] = {
984+
"valid": True,
985+
"type": serialize_type(resolved.search_type),
986+
}
987+
else:
988+
# User tag — need to verify it exists in storage
989+
unknown_attrs.append((attr_name, resolved))
907990
except InvalidSearchQuery as e:
908991
results[attr_name] = {
909992
"valid": False,
910993
"error": str(e),
911994
}
912995

996+
if unknown_attrs:
997+
# Group by proto type because the storage check is keyed on
998+
# (proto_type, internal_name) — the same display name can exist
999+
# as both a string and a number attribute simultaneously.
1000+
attrs_by_type: dict[AttributeKey.Type.ValueType, list[str]] = {}
1001+
for _, resolved in unknown_attrs:
1002+
attrs_by_type.setdefault(resolved.proto_type, []).append(resolved.internal_name)
1003+
with handle_query_errors():
1004+
existing = _check_attributes_exist(resolver, item_type, attrs_by_type)
1005+
1006+
for attr_name, resolved in unknown_attrs:
1007+
if (resolved.proto_type, resolved.internal_name) in existing:
1008+
results[attr_name] = {
1009+
"valid": True,
1010+
"type": serialize_type(resolved.search_type),
1011+
}
1012+
else:
1013+
results[attr_name] = {
1014+
"valid": False,
1015+
"error": f"Unknown attribute: {attr_name}",
1016+
}
1017+
9131018
return Response({"attributes": results})

src/sentry/snuba/referrer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ class Referrer(StrEnum):
575575
API_PREPROD_TAG_VALUES_RPC = "api.preprod.tags-values.rpc"
576576
API_PROCESSING_ERRORS_TAG_KEYS_RPC = "api.processing-errors.tags-keys.rpc"
577577
API_PROCESSING_ERRORS_TAG_VALUES_RPC = "api.processing-errors.tags-values.rpc"
578+
API_TRACE_ITEM_ATTRIBUTE_VALIDATE = "api.trace-item.attribute-validate"
578579
API_SPANS_TAG_KEYS = "api.spans.tags-keys"
579580
API_SPANS_TAG_KEYS_RPC = "api.spans.tags-keys.rpc"
580581
API_SPANS_TAG_VALUES = "api.spans.tags-values"

0 commit comments

Comments
 (0)