From 11fb400d37591e931ab44860c747acea8c08ede4 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Fri, 5 Jun 2026 17:00:02 +0200 Subject: [PATCH 1/6] docs - add relational evidence RFC roadmap --- docs/rfcs/027_relational_evidence_program.md | 166 ++++++++++++++ docs/rfcs/028_semantic_identity_targets.md | 139 +++++++++++ docs/rfcs/029_metadata_attachments.md | 118 ++++++++++ docs/rfcs/030_prism_lineage_graph.md | 139 +++++++++++ docs/rfcs/031_inspection_artifacts.md | 127 +++++++++++ docs/rfcs/032_execution_observations.md | 109 +++++++++ .../rfcs/033_adapter_requirements_coverage.md | 128 +++++++++++ .../034_quality_assertions_observations.md | 117 ++++++++++ ..._governed_attributes_policy_checkpoints.md | 119 ++++++++++ docs/rfcs/036_governed_plan_bundle.md | 128 +++++++++++ .../rfcs/037_plan_diff_blast_radius_inputs.md | 130 +++++++++++ docs/rfcs/038_evidence_export_bridges.md | 110 +++++++++ .../040_interoperability_semantic_profiles.md | 215 ++++++++++++++++++ docs/rfcs/041_prism_plan_ingress_frontends.md | 167 ++++++++++++++ docs/rfcs/README.md | 28 +++ 15 files changed, 1940 insertions(+) create mode 100644 docs/rfcs/027_relational_evidence_program.md create mode 100644 docs/rfcs/028_semantic_identity_targets.md create mode 100644 docs/rfcs/029_metadata_attachments.md create mode 100644 docs/rfcs/030_prism_lineage_graph.md create mode 100644 docs/rfcs/031_inspection_artifacts.md create mode 100644 docs/rfcs/032_execution_observations.md create mode 100644 docs/rfcs/033_adapter_requirements_coverage.md create mode 100644 docs/rfcs/034_quality_assertions_observations.md create mode 100644 docs/rfcs/035_governed_attributes_policy_checkpoints.md create mode 100644 docs/rfcs/036_governed_plan_bundle.md create mode 100644 docs/rfcs/037_plan_diff_blast_radius_inputs.md create mode 100644 docs/rfcs/038_evidence_export_bridges.md create mode 100644 docs/rfcs/040_interoperability_semantic_profiles.md create mode 100644 docs/rfcs/041_prism_plan_ingress_frontends.md diff --git a/docs/rfcs/027_relational_evidence_program.md b/docs/rfcs/027_relational_evidence_program.md new file mode 100644 index 0000000..d5a04e2 --- /dev/null +++ b/docs/rfcs/027_relational_evidence_program.md @@ -0,0 +1,166 @@ +# InQL RFC 027: Relational evidence program + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 000 (core language model and layer boundaries) + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 004 (execution context) + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 012 (unified scalar expression surface) + - InQL RFC 013 (function catalog program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 032 (execution observations) + - InQL RFC 033 (adapter requirements and coverage) + - InQL RFC 034 (quality assertions and observations) + - InQL RFC 035 (governed attributes and policy checkpoints) + - InQL RFC 036 (governed plan bundle) + - InQL RFC 037 (plan diff and blast-radius inputs) + - InQL RFC 038 (evidence export bridges) + - InQL RFC 040 (interoperability semantic profiles) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC is the umbrella tracking RFC for InQL's relational evidence program. The program defines the local, open semantic evidence contracts that make typed relational computation inspectable before execution and reviewable after execution: stable semantic targets, metadata attachments, Prism lineage, inspection artifacts, execution observations, adapter coverage, quality observations, governed attributes, plan bundles, plan diffs, export bridges, interoperability semantic profiles, and Prism plan ingress. This RFC is complete only when the child RFCs are implemented, rejected, or explicitly superseded by design decision. + +## Core model + +1. InQL owns typed relational evidence, not enterprise governance operations. +2. Prism is the semantic checkpoint for authored and rewritten relational meaning. +3. Substrait remains a portable interchange boundary, not the only semantic evidence store. +4. Session and adapter execution may attach observations to semantic targets, but they must not redefine authored relational meaning. +5. Local evidence must be useful without any hosted control plane, catalog service, approval workflow, or proprietary governance product. +6. Downstream systems may consume InQL evidence, but those systems are outside the InQL contract. +7. Interoperability profiles provide evidence context for target environments, not alternate semantic owners for InQL. + +## Motivation + +InQL already has the pieces of a stronger relational evidence layer: typed carriers, Prism planning, Substrait lowering, registry-backed expressions, aggregate/window/generator semantics, and a session boundary. What is missing is a coherent contract for the evidence that tools need to answer questions such as which source fields produced an output field, which plan rewrite changed a relation, which backend capability was required, which quality assertion failed, and which execution attempt produced a result. + +Without this program, lineage, governance, quality, observability, and change-impact work will grow as disconnected features. Some tools will reconstruct meaning from Substrait, some from backend plans, some from session logs, and some from user-facing helper names. That would repeat the same failure InQL exists to avoid: typed relational meaning would be present during authoring, then weakened or reinterpreted at the next boundary. + +## Goals + +- Establish relational evidence as one coordinated InQL program. +- Define the child RFC set required for semantic identity, lineage, inspection, observations, coverage, quality, governed attributes, plan bundles, plan diffs, exports, and interoperability profiles. +- Keep the program open, local, and backend-neutral. +- Make Prism-authored relational meaning the source of local lineage and schema-flow evidence. +- Define target-environment profile evidence without making any external engine, dialect, or interchange format the semantic owner. +- Ensure execution observations and adapter coverage attach to semantic targets without redefining semantics. +- Allow higher-level governance, catalog, orchestration, audit, and approval systems to consume InQL evidence without becoming part of the InQL contract. + +## Non-Goals + +- Defining a hosted control plane, managed catalog, approval workflow, policy registry, stewardship UI, certification lifecycle, or proprietary blast-radius service. +- Defining organization-wide governance policy semantics. +- Defining pipeline orchestration, scheduling, retries, checkpointing, or cross-step lifecycle state. +- Making Substrait extension metadata the authoritative evidence store. +- Making a specific backend adapter the semantic owner of lineage, quality, policy, or coverage. +- Defining every external export mapping directly in this umbrella RFC. + +## Guide-level explanation (how authors think about it) + +Authors and tools should be able to inspect an InQL plan as structured evidence rather than formatted prose: + +```incan +from pub::inql.inspect import inspect_lineage + +summary = ( + orders + .filter(col("status") == "paid") + .group_by([col("customer_id")]) + .agg([sum(col("amount")).alias("total_amount")]) +) + +lineage = inspect_lineage(summary) +total = lineage.field("total_amount") +``` + +The exact API is defined in the child RFCs. The important user model is stable: InQL can explain typed relational computation locally, before a backend runs it and without requiring an external governance service. + +## Reference-level explanation (precise rules) + +The relational evidence program must consist of the following child RFCs unless this RFC is amended or superseded: + +- InQL RFC 028 (semantic identity and target model) +- InQL RFC 029 (typed metadata attachments) +- InQL RFC 030 (Prism lineage graph) +- InQL RFC 031 (local inspection APIs and artifacts) +- InQL RFC 032 (execution observations) +- InQL RFC 033 (adapter requirements and coverage) +- InQL RFC 034 (quality assertions and observations) +- InQL RFC 035 (governed attributes and policy checkpoints) +- InQL RFC 036 (governed plan bundle) +- InQL RFC 037 (plan diff and blast-radius inputs) +- InQL RFC 038 (evidence export bridges) +- InQL RFC 040 (interoperability semantic profiles) +- InQL RFC 041 (Prism plan ingress and external client frontends) + +This umbrella RFC must not be marked Implemented while any required child RFC remains Draft, Planned, In Progress, Blocked, or otherwise unresolved. A child RFC may be removed from the required completion set only by a design decision recorded in this RFC or by a superseding RFC. + +Child RFCs must preserve the layer boundary established by this RFC. They may define local InQL evidence contracts and generic export shapes. They must not define proprietary product behavior, hosted storage behavior, managed approval semantics, or organization-wide policy lifecycle rules. + +Relational evidence must derive from InQL semantic sources where possible. Prism-authored and Prism-rewritten plans are the authoritative source for local relational lineage. Session and backend adapter observations may report execution facts, diagnostics, and capability coverage, but they must not decide that an authored lineage edge exists or does not exist. + +Evidence that affects correctness must not be encoded only as ignorable interchange metadata. If a downstream consumer must understand evidence for correctness, the plan must require a real supported capability, reject execution, or report unknown/uncovered coverage. + +## Design details + +### Syntax + +This umbrella RFC introduces no new syntax. Child RFCs should prefer APIs and artifact contracts before proposing new authoring syntax. + +### Semantics + +This RFC is normative for program structure, lifecycle, and layer boundaries. Individual semantic contracts are normative only in the child RFC that owns the corresponding evidence family. + +### Interaction with other InQL surfaces + +Evidence must be independent of authoring surface. Equivalent method chains, `query {}` blocks, and future relational surfaces should produce equivalent semantic targets and lineage where they express equivalent relational intent. + +External client frontends must follow the same rule. A Spark Connect, SQL, or other plan ingress frontend may preserve client-origin evidence, but Prism remains responsible for analyzed relational meaning. + +The function catalog program remains relevant because function identity, aggregate measures, windows, generators, nested functions, format functions, approximate functions, and extensions all affect lineage and adapter coverage. The evidence program must consume function registry metadata rather than hardcoding function semantics in a separate evidence catalog. + +### Compatibility / migration + +This program should be additive. Existing plans may lack evidence artifacts until child RFCs are implemented. Serialized evidence artifacts must carry version metadata so consumers can distinguish unsupported evidence from empty evidence. + +## Alternatives considered + +- **One giant governance RFC.** Rejected because governance, lineage, quality, execution evidence, adapter coverage, and exports are too broad to specify responsibly in one normative document. +- **One RFC per artifact file.** Rejected because artifacts are downstream views of semantic contracts; the RFC boundary should be the concept, not the filename. +- **Use Substrait metadata as the evidence store.** Rejected because Substrait consumers may ignore extension metadata and because InQL needs richer local semantic targets than portable interchange can guarantee. +- **Let each downstream integration reconstruct evidence.** Rejected because it would make lineage and quality inconsistent across tools. + +## Drawbacks + +- The program creates several RFCs before implementation begins. +- Stable identity and artifact versioning add design surface that simple execution does not need. +- Some evidence will initially be conservative or unknown, which may feel less satisfying than overconfident lineage. +- The umbrella RFC may remain Draft or Planned for a long time while children land. + +## Layers affected + +- **InQL specification** — the RFC set must define a coherent relational evidence model across existing expression, planning, execution, and function-catalog RFCs. +- **InQL library package** — public inspection, quality, and artifact APIs must follow the child RFCs rather than growing as unrelated helpers. +- **Incan compiler** — compiler-facing support is affected only where child RFCs require typed metadata, stable symbols, or package inspection. +- **Execution / interchange** — Session, Substrait lowering, and adapters must attach execution evidence and capability coverage without owning relational semantics. +- **Documentation** — public docs must distinguish InQL local evidence contracts from downstream governance, catalog, and orchestration products. + +## Unresolved questions + +- Should this program include an explicit project brief or tracking issue before child RFCs move from Draft to Planned? +- Should any child RFC be split further before implementation begins? +- Should the umbrella completion set include future syntax RFCs if evidence-driven authoring syntax is later proposed? + + diff --git a/docs/rfcs/028_semantic_identity_targets.md b/docs/rfcs/028_semantic_identity_targets.md new file mode 100644 index 0000000..88d21db --- /dev/null +++ b/docs/rfcs/028_semantic_identity_targets.md @@ -0,0 +1,139 @@ +# InQL RFC 028: Semantic identity and target model + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 000 (core language model and layer boundaries) + - InQL RFC 004 (execution context) + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 027 (relational evidence program) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines the semantic identity and target model required by InQL's relational evidence program. It establishes stable, typed targets for plans, Prism nodes, plan ingress requests, client sessions, relation outputs, fields, scalar expressions, aggregate measures, window expressions, generator outputs, read roots, quality assertions, policy decisions, adapter requirements, coverage records, and execution attempts. + +## Motivation + +Lineage, metadata attachments, quality observations, policy decisions, execution observations, adapter coverage, and plan diffs all need something precise to attach to. If evidence attaches only to display names or backend plan fragments, it becomes fragile under aliases, rewrites, projections, and execution adapter differences. InQL needs a semantic target model before it can produce trustworthy evidence. + +## Goals + +- Define the required semantic target categories for local InQL evidence. +- Define which identities are plan-local, artifact-stable, and execution-local. +- Require deterministic identities within one plan, ingress, or profile snapshot where possible. +- Distinguish authored Prism targets from rewritten Prism targets and execution observations. +- Provide a common target vocabulary for child evidence RFCs. + +## Non-Goals + +- Defining a global registry of all project assets. +- Defining organization-wide identity, ownership, certification, or publication lifecycle. +- Defining physical backend plan identifiers as semantic identities. +- Defining the exact serialization format for every downstream artifact. + +## Guide-level explanation (how authors think about it) + +An InQL author should not usually construct semantic IDs manually. They appear when tools inspect a plan: + +```incan +lineage = inspect_lineage(summary) +field = lineage.output_field("total_amount") + +assert field.target.kind == "field" +assert field.target.plan_id == lineage.plan_id +``` + +The target lets tools attach evidence to the field even if the field was produced by an aggregate measure, renamed by a projection, lowered through Substrait, and executed by a backend adapter. + +## Reference-level explanation (precise rules) + +InQL must define semantic targets for at least the following categories: + +- plan +- ingress request +- client session +- ingress node +- ingress expression +- analyzer binding +- authored Prism node +- rewritten Prism node +- relation output +- field +- scalar expression +- aggregate measure +- window expression +- generator output +- read root +- quality assertion +- quality observation +- policy decision +- adapter requirement +- coverage record +- semantic profile +- profile assessment +- execution attempt + +A semantic target must identify its category, its containing plan, ingress request, client session, profile, or execution scope, and the minimum structural path needed to distinguish it from sibling targets in the same scope. + +Plan, ingress request, ingress node, ingress expression, analyzer binding, authored node, rewritten node, relation output, field, scalar expression, aggregate measure, window expression, generator output, read-root, semantic profile, and profile assessment identities must be deterministic within one plan, ingress request, or profile snapshot where possible. Client sessions, execution attempts, quality observations, and runtime coverage records may be unique per frontend, session, or execution lifecycle. + +InQL must distinguish authored targets from rewritten targets. If an optimizer rewrite removes, fuses, or replaces a target, the rewritten target must preserve an authored-origin relationship rather than reusing an authored identity for a different structure. + +Field identities must not be based only on output display names. Renames, aliases, generated names, and duplicate field names must still produce unambiguous field targets. + +Backend adapter identifiers may be attached as metadata or observations, but they must not replace InQL semantic targets. + +External client protocol identifiers may be attached as ingress-origin evidence, but they must not replace InQL semantic targets. + +A client session target identifies external client session state that can affect plan ingress analysis, such as current catalog or namespace, session configuration, temporary relation names, function registrations, profile selection, case-sensitivity mode, or dialect flags. Client session targets are not execution session identities unless an RFC or implementation explicitly binds the external client session to an InQL execution session. + +## Design details + +### Syntax + +This RFC introduces no authoring syntax. + +### Semantics + +Semantic targets are evidence anchors. They do not by themselves define lineage, policy, quality, or execution behavior. Those contracts belong to child RFCs that reference this target model. + +### Interaction with other InQL surfaces + +Method chains and `query {}` blocks that express equivalent relational intent should produce equivalent target categories. They are not required to produce byte-identical IDs if their authored syntax differs, but the resulting semantic graph must preserve comparable targets for downstream tools. + +### Compatibility / migration + +Existing plans without semantic targets remain valid for execution. Tools that require relational evidence must report missing semantic identity as unsupported evidence rather than inferring identity from names alone. + +## Alternatives considered + +- **Use display names as identities.** Rejected because aliases, duplicate names, rewrites, and generated columns make display names insufficient. +- **Use backend plan node identifiers.** Rejected because backend adapters are not semantic owners and may change plans physically. +- **Require global durable IDs immediately.** Rejected because local plan evidence should work before global registry or publication semantics exist. + +## Drawbacks + +- Stable identity increases the complexity of planning and inspection. +- Some targets may need conservative IDs until Prism has richer authored-origin tracking. +- Deterministic local IDs can be mistaken for global identity unless docs are clear. + +## Layers affected + +- **InQL specification** — semantic target categories must become shared terminology for relational evidence RFCs. +- **InQL library package** — inspection and artifact APIs must expose targets rather than unstructured strings. +- **Execution / interchange** — Session, Substrait lowering, and adapters must preserve references to semantic targets where they emit related evidence. +- **Documentation** — docs must explain local plan identity versus global asset identity. + +## Unresolved questions + +- Which targets require artifact-stable IDs beyond one plan snapshot in the first release? +- Should target IDs be human-readable, opaque, or both? +- How should target identity behave when an optimizer rewrite duplicates one authored expression into multiple rewritten positions? + + diff --git a/docs/rfcs/029_metadata_attachments.md b/docs/rfcs/029_metadata_attachments.md new file mode 100644 index 0000000..7009740 --- /dev/null +++ b/docs/rfcs/029_metadata_attachments.md @@ -0,0 +1,118 @@ +# InQL RFC 029: Typed metadata attachments + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines typed metadata attachments for InQL semantic targets. Attachments provide a common way to associate lifecycle, source, visibility, typed payloads, provenance, and evidence references with plans, fields, expressions, requirements, observations, and other semantic targets without hardcoding every evidence family into one model. + +## Motivation + +Relational evidence needs more than lineage edges. A field may carry a redacted label, a source assertion, a planner diagnostic, a session observation, an adapter capability result, or an exported catalog reference. Without a typed attachment model, each feature will invent its own string map and lifecycle rules, making evidence inconsistent and difficult to export. + +## Goals + +- Define a common attachment shape for semantic targets. +- Require lifecycle, source, visibility, and typed payload metadata. +- Preserve provenance and evidence references. +- Support sensitive and redacted metadata without forcing it into portable plans. +- Let child RFCs define specialized payload schemas while sharing one attachment contract. + +## Non-Goals + +- Defining every possible metadata key. +- Defining business glossary, certification, stewardship, or ownership lifecycle. +- Defining a hosted metadata store. +- Making arbitrary untyped string maps the public evidence model. + +## Guide-level explanation (how authors think about it) + +Most authors should see attachments through inspection results: + +```incan +plan = inspect_plan(summary) +field = plan.output_field("customer_id") + +for attachment in field.attachments(): + print(attachment.namespace, attachment.key, attachment.visibility) +``` + +The attachment shape lets tools distinguish a user-authored label, a planner-derived fact, an adapter-reported observation, and a redacted sensitive value. + +## Reference-level explanation (precise rules) + +An InQL metadata attachment must include: + +- target semantic identity +- namespace +- key +- typed payload +- lifecycle +- source +- visibility +- evidence references + +Attachment lifecycle must distinguish at least authored, planned, analyzed, rewritten, lowered, bound, executed, exported, and imported states. + +Attachment source must distinguish at least InQL, user, Prism, Session, adapter, function registry, quality engine, policy engine, external catalog, and imported artifact. + +Attachment visibility must distinguish at least public, internal, sensitive, and redacted. Sensitive attachments must not be emitted into portable artifacts by default. Redacted attachments may preserve the existence, target, and reason code of a hidden fact without exposing the payload. + +Typed payloads must be schema-versioned when serialized. Consumers must be able to reject unknown payload schemas without treating the attachment as absent. + +Attachments must not override semantic structure. A metadata attachment may describe a field, but it must not create a field identity or lineage edge by itself. Structural evidence belongs in the semantic target and lineage models. + +## Design details + +### Syntax + +This RFC introduces no syntax. Future helper APIs may expose attachments, but authoring syntax is not required. + +### Semantics + +Attachments are evidence records, not semantic authority by default. A child RFC may define an authoritative attachment kind only when the authority, lifecycle, and conflict behavior are explicit. + +### Interaction with other InQL surfaces + +Function registry metadata may produce attachments when functions affect lineage, adapter requirements, null behavior, determinism, or extension support. Those attachments must derive from registry facts rather than duplicating function names or signatures in a separate evidence catalog. + +### Compatibility / migration + +Existing APIs may continue returning simple inspection data. New evidence APIs should expose the attachment model so clients can migrate away from ad hoc metadata maps. + +## Alternatives considered + +- **One model per evidence family with no shared attachment layer.** Rejected because lifecycle, visibility, provenance, and evidence references would drift. +- **Arbitrary string-key maps.** Rejected because untyped payloads are hard to validate and unsafe to export. +- **Put all metadata into Substrait extensions.** Rejected because Substrait extension metadata is not a reliable authoritative store for local evidence. + +## Drawbacks + +- Attachments introduce generic machinery before all payload families exist. +- Visibility rules require discipline from export adapters. +- Poorly scoped namespaces could still become clutter if review is weak. + +## Layers affected + +- **InQL specification** — metadata attachments must become the shared extension point for evidence families. +- **InQL library package** — inspection and artifact APIs must preserve typed attachment payloads and visibility. +- **Execution / interchange** — lowering and adapters may carry attachment references but must not leak sensitive payloads by default. +- **Documentation** — docs must show which attachment namespaces are stable public contracts. + +## Unresolved questions + +- Which attachment namespaces should be reserved by InQL core? +- Should users be able to author arbitrary attachments directly, or only through typed helper APIs? +- What is the first serialized payload schema format for attachments? + + diff --git a/docs/rfcs/030_prism_lineage_graph.md b/docs/rfcs/030_prism_lineage_graph.md new file mode 100644 index 0000000..50509ee --- /dev/null +++ b/docs/rfcs/030_prism_lineage_graph.md @@ -0,0 +1,139 @@ +# InQL RFC 030: Prism lineage graph + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 012 (unified scalar expression surface) + - InQL RFC 019 (window functions) + - InQL RFC 020 (nested data functions) + - InQL RFC 021 (generator and table-valued functions) + - InQL RFC 022 (semi-structured and format functions) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines the Prism lineage graph for InQL. The graph records relation-level, field-level, and expression-level dependencies over authored and rewritten Prism plans, including reads, projections, filters, joins, aggregates, windows, generators, ordering, limits, nested data access, and semi-structured or format parsing. + +## Motivation + +Lineage reconstructed from backend SQL, backend plans, or Substrait alone is too late and too lossy. Prism sees typed relational intent before backend lowering and before execution. That makes Prism the right source for local lineage evidence, provided lineage is modeled explicitly instead of inferred later from display names or emitted plans. + +## Goals + +- Define native InQL lineage edges over semantic targets. +- Distinguish value, control, grouping, join, sort, policy, and quality dependencies. +- Preserve authored-origin and ingress-origin relationships through rewrites. +- Represent exact, conservative, and unknown lineage confidence. +- Cover current relational operations, including functions added by the function catalog program. + +## Non-Goals + +- Defining global cross-workspace lineage storage. +- Defining business meaning, certification, or ownership. +- Replacing Substrait relation lowering. +- Guaranteeing that every external lineage tool can represent every native InQL edge kind. + +## Guide-level explanation (how authors think about it) + +Given a grouped relation: + +```incan +summary = ( + orders + .filter(col("status") == "paid") + .group_by([col("customer_id")]) + .agg([sum(col("amount")).alias("total_amount")]) +) +``` + +InQL should be able to explain that `total_amount` has value lineage from `orders.amount`, grouping lineage from `orders.customer_id`, and control lineage from `orders.status`. + +## Reference-level explanation (precise rules) + +The Prism lineage graph must contain lineage edges between semantic targets. Each edge must include: + +- source target +- destination target +- relationship kind +- transformation kind +- optional expression reference +- confidence +- evidence references + +Relationship kind must distinguish at least value, control, grouping, join, sort, window, policy, and quality relationships. + +Transformation kind must distinguish at least identity, expression, aggregate, window, generator, filter, join, mask, format_parse, nested_access, semi_structured_access, opaque, and unknown transformations. + +Confidence must distinguish exact, conservative, and unknown lineage. Exact lineage means InQL can identify the relevant source target set. Conservative lineage means InQL can identify a safe over-approximation. Unknown lineage means InQL cannot determine the dependency and must report that uncertainty explicitly. + +Projection of an input field without transformation must produce value lineage from the input field to the output field. Projection of an expression must produce value lineage from every scalar input expression dependency to the output field. + +Filtering must produce control lineage from filter input fields to the filtered relation output. A filter must not be represented as value lineage to every output field unless a child RFC explicitly defines that projection. + +Joins must produce join lineage from join key and predicate fields to the joined relation output. Output fields must preserve value lineage from the side that produced the field. + +Aggregates must produce grouping lineage from group keys to aggregate result rows and value lineage from aggregate measure inputs to aggregate output fields. + +Window functions must produce window lineage from partition, order, frame, function input, and default expressions to the window output field. A window output remains row-level in the surrounding projection. + +Generator outputs must have generator output targets. Generated fields must preserve lineage from generator input expressions and any declared schema information. + +Nested and semi-structured access must be exact when the accessed schema is typed and known. It must be conservative or unknown when dynamic payload shape prevents exact field lineage. + +Rewrites must preserve authored origins. A rewritten edge may point to rewritten targets, but tools must be able to recover the authored targets that explain the lineage. + +Plans produced through external client frontends must preserve ingress-origin relationships from client request nodes to Prism targets where available. Ingress-origin relationships explain protocol provenance, but they must not be treated as value, control, grouping, join, sort, policy, or quality lineage by themselves. + +## Design details + +### Syntax + +This RFC introduces no syntax. + +### Semantics + +Lineage is a semantic graph over Prism targets. It is not an execution log and not a formatted explanation string. + +### Interaction with other InQL surfaces + +Function registry metadata must provide lineage-relevant facts for functions whose behavior is not derivable from argument structure alone. Opaque user-defined or extension functions must produce conservative or unknown lineage unless they provide explicit metadata. + +### Compatibility / migration + +Plans without lineage remain executable. Inspection APIs must distinguish unsupported lineage from empty lineage. + +## Alternatives considered + +- **Use OpenLineage column lineage as the internal model.** Rejected because InQL needs richer relationship kinds than common post-hoc run events. +- **Infer lineage from Substrait only.** Rejected because Prism has authored-origin and type information that may be lost during interchange lowering. +- **Only expose field-level direct lineage.** Rejected because governance, quality, and blast-radius analysis need control, grouping, join, and sort relationships. + +## Drawbacks + +- Rich lineage requires careful tests for every relation and expression family. +- Conservative lineage may produce noisy downstream impact results. +- Rewrites become more constrained because they must preserve origin evidence. + +## Layers affected + +- **InQL specification** — lineage relationship and transformation kinds become normative vocabulary. +- **InQL library package** — inspection APIs must expose typed lineage edges. +- **Execution / interchange** — Substrait and adapters may carry references to lineage targets but must not redefine them. +- **Documentation** — docs must explain the difference between value, control, grouping, join, and sort lineage. + +## Unresolved questions + +- Should sort lineage attach to relation outputs, fields, or a separate ordering target? +- Which nested and semi-structured operations can claim exact lineage in the first release? +- How should lineage represent set operations once InQL adds them? + + diff --git a/docs/rfcs/031_inspection_artifacts.md b/docs/rfcs/031_inspection_artifacts.md new file mode 100644 index 0000000..96d791b --- /dev/null +++ b/docs/rfcs/031_inspection_artifacts.md @@ -0,0 +1,127 @@ +# InQL RFC 031: Local inspection APIs and artifacts + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 040 (interoperability semantic profiles) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines local inspection APIs and deterministic evidence artifacts for InQL plans. The APIs expose plan structure, schema flow, lineage, metadata attachments, semantic profile evidence, ingress evidence, and diagnostics as typed records, while artifacts provide versioned serialized views suitable for CI, IDEs, agents, documentation, and downstream integrations. + +## Motivation + +Relational evidence is only useful if authors and tools can inspect it without scraping logs or formatted explanations. InQL needs local APIs and artifacts that work without a hosted service, without a catalog product, and without executing the plan. This keeps plan inspection open, reproducible, and testable. + +## Goals + +- Define local inspection APIs over InQL plans. +- Define deterministic artifact families for plan graph, lineage graph, schema flow, metadata attachments, semantic profiles, ingress mappings, client session context, and diagnostics. +- Require artifact versioning and unsupported-evidence markers. +- Keep human reports as projections from structured artifacts. + +## Non-Goals + +- Defining a UI, hosted artifact store, or managed catalog. +- Defining every external export mapping. +- Requiring plan execution before inspection. +- Making Markdown or console output the primary evidence format. + +## Guide-level explanation (how authors think about it) + +An author can inspect a lazy plan locally: + +```incan +from pub::inql.inspect import inspect_plan + +inspection = inspect_plan(summary) +inspection.output_schema() +inspection.lineage().field("total_amount") +``` + +The same inspection data can be written as artifacts for CI or downstream tools: + +```incan +inspection.write_artifacts("target/inql") +``` + +The exact helper names are illustrative; the contract is that structured inspection exists before execution. + +## Reference-level explanation (precise rules) + +InQL must expose a local inspection capability for plans that returns typed records, not only formatted strings. + +Inspection records must include semantic targets, output schema information, relation structure, lineage when available, metadata attachments when available, semantic profile records or assessments when available, ingress origin mappings, client session context, and frontend coverage when available, diagnostics, and evidence-version metadata. + +InQL must define deterministic serialized artifacts for at least: + +- plan graph +- lineage graph +- schema flow +- metadata attachments +- semantic profiles +- ingress mappings +- client session context +- diagnostics + +Artifacts must include schema version, InQL version, relevant rule versions, target identifiers, and unsupported-evidence markers. An empty lineage graph must be distinguishable from lineage that was not computed or is not supported. + +Human-readable reports may exist, but they must be generated from structured inspection records or artifacts. + +Sensitive attachments must be redacted or omitted according to the visibility rules from InQL RFC 029. + +## Design details + +### Syntax + +This RFC introduces no language syntax. + +### Semantics + +Inspection is read-only. It must not execute a plan, bind physical sources, mutate Prism-authored meaning, or make policy decisions. + +### Interaction with other InQL surfaces + +Method-chain, query-block, and future authoring surfaces should be inspectable through the same API once they lower to Prism. + +### Compatibility / migration + +Existing code remains valid. New tooling should prefer structured inspection over parsing `repr`, `debug`, or backend plan strings. + +## Alternatives considered + +- **Only expose formatted explanations.** Rejected because tools need structured data. +- **Only emit files, no API.** Rejected because IDEs and tests need in-memory inspection. +- **Wait for a higher-level catalog.** Rejected because local InQL users need inspection without external services. + +## Drawbacks + +- Artifact schemas create compatibility obligations. +- Deterministic output may constrain internal representation changes. +- Multiple artifact families require clear documentation. + +## Layers affected + +- **InQL specification** — local inspection becomes part of the relational evidence contract. +- **InQL library package** — inspection APIs and artifact writers must expose structured records. +- **Execution / interchange** — no execution is required, but artifacts may reference Substrait lowering status. +- **Documentation** — docs must present artifacts as primary evidence and reports as derived views. + +## Unresolved questions + +- Which artifact serialization format should be mandatory first? +- Should artifact writing be part of the core package or a separate tooling module? +- How stable must artifact ordering be for snapshot tests and CI diffs? + + diff --git a/docs/rfcs/032_execution_observations.md b/docs/rfcs/032_execution_observations.md new file mode 100644 index 0000000..722d347 --- /dev/null +++ b/docs/rfcs/032_execution_observations.md @@ -0,0 +1,109 @@ +# InQL RFC 032: Execution observations + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 004 (execution context) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 040 (interoperability semantic profiles) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines execution observations for InQL sessions. Execution observations correlate runtime attempts with semantic plan targets and record backend, adapter, semantic profile context, status, timing, diagnostics, row counts, and optional trace identifiers without making runtime logs the source of relational semantics. + +## Motivation + +After a plan executes, users and tools need evidence about what was attempted and what happened. A result table alone cannot explain which plan version ran, which adapter executed it, which diagnostics occurred, or how runtime observations attach to semantic targets. InQL needs a lightweight execution observation model that is structural, redacted by default, and independent of any particular telemetry backend. + +## Goals + +- Define execution attempts as semantic targets. +- Correlate session execution with plan identity. +- Record backend and adapter information, semantic profile context when available, status, diagnostics, row counts, and timing. +- Allow optional trace/log/metric correlation without requiring a telemetry provider. +- Preserve redaction defaults for payload-heavy or sensitive data. + +## Non-Goals + +- Defining an OpenTelemetry provider, collector, exporter, or sampling policy. +- Defining pipeline orchestration or retry semantics. +- Recording row payloads, samples, or full backend logs by default. +- Letting backend execution redefine plan lineage or schema semantics. + +## Guide-level explanation (how authors think about it) + +An author can collect data and then inspect the observation: + +```incan +result = session.collect(summary) +observation = result.execution_observation() + +assert observation.plan_id == inspect_plan(summary).plan_id +assert observation.status == "success" +``` + +Execution evidence explains the run. It does not replace plan inspection. + +## Reference-level explanation (precise rules) + +An execution observation must include an execution attempt target, plan target, session or binding context reference, backend name, adapter version when available, start time, end time or duration, status, diagnostics, and optional row count, byte count, trace identifier, requested semantic profile, and observed semantic profile. + +Execution status must distinguish at least success, failure, cancelled, skipped, and unsupported. + +Diagnostics must be structured records. Sensitive values, row samples, query payloads, credentials, and source data must not be included by default. + +An execution observation may reference local inspection artifacts or semantic targets produced before execution. It must not mutate authored Prism targets or claim lineage that was not present in the plan evidence model. + +Telemetry integrations may emit equivalent spans, events, logs, or metrics, but the InQL observation model must remain usable when no telemetry backend is configured. + +## Design details + +### Syntax + +This RFC introduces no syntax. + +### Semantics + +Execution observations are runtime evidence. They describe an attempt to execute a semantic plan through a session and adapter. + +### Interaction with other InQL surfaces + +Quality observations, adapter coverage records, semantic profile records, and export bridges may refer to execution observations. Pipeline layers may consume them, but orchestration behavior remains outside this RFC. + +### Compatibility / migration + +Existing session execution remains valid. Implementations may initially emit partial observations, but unsupported fields must be explicit rather than silently omitted when consumers request them. + +## Alternatives considered + +- **Use backend logs only.** Rejected because logs are not stable semantic evidence and may be sensitive. +- **Require OpenTelemetry for all observations.** Rejected because local InQL evidence should work without provider configuration. +- **Attach execution data directly to plan nodes.** Rejected because runtime attempts are lifecycle events, not authored plan structure. + +## Drawbacks + +- Observation records add runtime overhead. +- Redaction can make diagnostics less convenient if users expect full backend logs. +- Correlating observations with plan snapshots requires stable semantic identity. + +## Layers affected + +- **InQL specification** — execution observation fields and status values become normative. +- **InQL library package** — Session results must expose observations. +- **Execution / interchange** — adapters must report execution facts without redefining semantics. +- **Documentation** — docs must explain observation redaction and telemetry independence. + +## Unresolved questions + +- What minimum observation fields are required for every adapter? +- Should failed planning, binding, and lowering attempts share the same observation model as execution attempts? +- How should trace identifiers be represented when multiple telemetry systems are active? + + diff --git a/docs/rfcs/033_adapter_requirements_coverage.md b/docs/rfcs/033_adapter_requirements_coverage.md new file mode 100644 index 0000000..e665922 --- /dev/null +++ b/docs/rfcs/033_adapter_requirements_coverage.md @@ -0,0 +1,128 @@ +# InQL RFC 033: Adapter requirements and coverage + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 004 (execution context) + - InQL RFC 024 (function extension policy) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 032 (execution observations) + - InQL RFC 040 (interoperability semantic profiles) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines adapter requirements and coverage states for InQL. Requirements describe backend capabilities needed by a plan or evidence contract, while coverage states report whether a specific adapter can satisfy those requirements under the relevant binding and semantic profile. Unknown coverage is not enforcement. + +## Motivation + +Backend neutrality only works when backend limits are visible. A plan may require extension functions, precise decimal behavior, variant semantics, lineage preservation, audit emission, masking, aggregation thresholding, or other capabilities. If InQL hides adapter uncertainty, downstream systems may assume a guarantee that the selected backend cannot provide. + +## Goals + +- Define adapter requirements as semantic evidence targets. +- Define coverage states: covered, partially_covered, uncovered, and unknown. +- Require coverage records to name the adapter, semantic profile when relevant, and evidence. +- Keep backend inability distinct from unsupported InQL semantics. +- Make capability uncertainty explicit before execution when possible. + +## Non-Goals + +- Defining every possible backend capability. +- Defining physical execution strategies. +- Making any one adapter the semantic owner of InQL behavior. +- Defining organization-wide enforcement policy. + +## Guide-level explanation (how authors think about it) + +An inspection can reveal backend requirements: + +```incan +inspection = inspect_plan(summary) + +for requirement in inspection.adapter_requirements(): + print(requirement.capability, requirement.guarantee_level) +``` + +A session can then report whether the selected adapter covers them: + +```incan +coverage = session.check_coverage(summary) +``` + +If coverage is unknown for a requirement whose guarantee level is required, tools should not present that as enforced behavior. + +## Reference-level explanation (precise rules) + +An adapter requirement must include requirement identity, target, capability, guarantee level, reason references, and optional diagnostic text. + +Capability names must be stable public vocabulary when they appear in serialized artifacts. Initial capability families should include extension_function, variant_semantics, decimal_semantics, null_semantics, lineage_preservation, audit_emission, row_filter, column_mask, aggregate_threshold, region_binding, and ordered_execution where applicable. + +Guarantee level must distinguish required, preferred, and optional requirements. + +A coverage record must include requirement identity, adapter identity, adapter version when available, semantic profile identity when the evaluation depends on a profile, coverage state, evidence references, and diagnostic text. + +Coverage state must distinguish: + +- covered: the adapter can satisfy the requirement under the current binding +- partially_covered: the adapter can satisfy part of the requirement or only under restrictions +- uncovered: the adapter cannot satisfy the requirement +- unknown: InQL cannot determine whether the adapter can satisfy the requirement + +Unknown coverage must not be treated as covered. If a requirement whose guarantee level is required is unknown or uncovered, execution must reject, route, rewrite, require approval, or report non-enforcing behavior according to the higher-level policy using the coverage record. + +Backend inability must be reported as adapter coverage or execution failure. It must not be encoded as a normal Substrait-level state for core InQL semantics. + +Ingress coverage belongs to plan ingress frontends. It must not be reported as backend adapter coverage unless the same feature also creates an execution requirement for the selected adapter. + +## Design details + +### Syntax + +This RFC introduces no syntax. + +### Semantics + +Adapter requirements are evidence about what a plan needs. Coverage records are evidence about what a selected adapter can provide. + +### Interaction with other InQL surfaces + +Function registry entries, semi-structured functions, extensions, quality assertions, governed attribute constraints, and semantic profile assessments may all create adapter requirements. Execution observations may reference coverage records. + +### Compatibility / migration + +Existing adapters may initially report unknown coverage for capabilities they do not declare. Consumers must distinguish unknown from covered. + +## Alternatives considered + +- **Fail only at backend runtime.** Rejected because users need pre-execution visibility when possible. +- **Treat unsupported backend features as unsupported InQL semantics.** Rejected because backend inability is not the same as invalid InQL. +- **Use boolean supports flags.** Rejected because partial and unknown coverage are important operational states. + +## Drawbacks + +- Capability vocabulary must be maintained. +- Adapters need more metadata. +- Early coverage results may be conservative. + +## Layers affected + +- **InQL specification** — adapter requirement and coverage vocabulary becomes normative. +- **InQL library package** — inspection and session APIs must expose requirements and coverage. +- **Execution / interchange** — adapters must report capability evidence honestly. +- **Documentation** — docs must explain that unknown coverage is not enforcement. + +## Unresolved questions + +- Which capability families are mandatory in the first implementation? +- Should coverage checks be available without binding physical sources? +- How should adapter-specific diagnostics be normalized? + + diff --git a/docs/rfcs/034_quality_assertions_observations.md b/docs/rfcs/034_quality_assertions_observations.md new file mode 100644 index 0000000..3dfbbaa --- /dev/null +++ b/docs/rfcs/034_quality_assertions_observations.md @@ -0,0 +1,117 @@ +# InQL RFC 034: Quality assertions and observations + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 004 (execution context) + - InQL RFC 012 (unified scalar expression surface) + - InQL RFC 016 (core aggregate functions) + - InQL RFC 017 (aggregate modifiers) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 032 (execution observations) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines InQL quality assertions and quality observations. Quality assertions are typed relational checks over datasets, fields, groups, or explicit multi-relation inputs. Quality observations are runtime results produced by executing those assertions. A quality assertion is not an ordinary filter unless the author explicitly asks to filter rows. + +## Motivation + +Data quality needs to participate in typed relational planning without collapsing into ad hoc post-run tests or silent filters. InQL can express many quality checks as relational work: row counts, null rates, accepted values, uniqueness, ranges, group thresholds, and aggregate conditions. Those checks should produce observations that sessions, CI, and pipeline layers can consume. + +## Goals + +- Define quality assertions as semantic targets. +- Define quality observations as execution evidence. +- Distinguish assertion declaration from runtime result. +- Support relation, field, group, and explicit cross-relation scopes. +- Preserve the rule that quality does not silently change relation cardinality. + +## Non-Goals + +- Defining pipeline orchestration, quarantine storage, retry behavior, or promotion gates. +- Defining a full Great Expectations-compatible suite model. +- Defining row-level model validation owned by the base language. +- Treating quality checks as filters by default. + +## Guide-level explanation (how authors think about it) + +Authors can declare checks and execute them through a session: + +```incan +checks = [ + row_count(min=1), + null_rate(col("customer_id"), max=0.0), + unique(col("order_id")), +] + +observations = session.observe_quality(orders, checks) +``` + +The observations report pass, fail, error, or skipped status. The `orders` relation is not filtered by these checks unless the author separately applies a filter. + +## Reference-level explanation (precise rules) + +A quality assertion must include assertion identity, target, predicate or metric expression, severity, mode, scope, and evidence references. + +Assertion scope must distinguish relation, field, group, and explicit cross_relation scopes. Cross-relation assertions are valid only when every relation is an explicit input to the assertion plan. + +Assertion mode must distinguish observe, require, and quarantine or equivalent policy-neutral states. The mode describes intended handling, but pipeline behavior belongs outside this RFC. + +A quality observation must include observation identity, assertion identity, execution attempt identity when applicable, status, metrics, diagnostics, and optional redacted sample references. + +Observation status must distinguish passed, failed, errored, skipped, and unsupported. + +Quality assertions may be planned as relational work. They must not change the cardinality or contents of the checked relation unless represented as an explicit transformation requested by the author. + +Quality expressions must use ordinary InQL scalar, aggregate, and grouping semantics. Invalid expression context must be diagnosed before execution where possible. + +## Design details + +### Syntax + +This RFC introduces no new block syntax. Helper APIs are sufficient for the first version. + +### Semantics + +Quality assertions produce observations. They may inform session failure, warnings, CI status, or pipeline gates, but those policies are outside the assertion semantics. + +### Interaction with other InQL surfaces + +Future `query {}` or pipeline syntax may lower to the same assertion model. The assertion model must not become method-chain specific. + +### Compatibility / migration + +Existing filters and projections are unaffected. Users must opt into quality assertions separately. + +## Alternatives considered + +- **Treat every quality check as a filter.** Rejected because observations and transformations are different semantics. +- **Leave quality entirely to external tools.** Rejected because typed relational checks need InQL schema and expression semantics. +- **Introduce syntax first.** Rejected because APIs and evidence contracts should settle before syntax. + +## Drawbacks + +- Quality APIs add another author-facing surface. +- Some checks may require backend support or additional execution cost. +- Separating observe/require/quarantine from pipeline behavior requires clear docs. + +## Layers affected + +- **InQL specification** — quality assertion and observation semantics become normative. +- **InQL library package** — public helper APIs and session observation APIs are affected. +- **Execution / interchange** — quality plans may lower to backend-executable relational work. +- **Documentation** — docs must distinguish checks, filters, and pipeline gates. + +## Unresolved questions + +- Which quality helpers belong in the first release? +- Should quality observations always require a Session, or can some be evaluated against in-memory data carriers directly? +- How should redacted sample references be represented? + + diff --git a/docs/rfcs/035_governed_attributes_policy_checkpoints.md b/docs/rfcs/035_governed_attributes_policy_checkpoints.md new file mode 100644 index 0000000..f9210ae --- /dev/null +++ b/docs/rfcs/035_governed_attributes_policy_checkpoints.md @@ -0,0 +1,119 @@ +# InQL RFC 035: Governed attributes and policy checkpoints + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 004 (execution context) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 033 (adapter requirements and coverage) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines how InQL carries governed attributes and records policy checkpoints as local relational evidence. Governed attributes are typed facts attached to semantic targets with provenance, confidence, authority, and lifetime. Policy checkpoints are decision records attached at authoring, planning, binding, or execution boundaries. InQL carries and propagates evidence; it does not define an organization-wide policy engine. + +## Motivation + +Relational plans often need to carry facts such as classification, origin, purpose, jurisdiction, derivation, masking status, or coverage state. Those facts may be supplied by model metadata, user declarations, imported artifacts, catalogs, policy engines, or prior plans. InQL should preserve and propagate them through relational semantics without pretending that inferred attributes are automatically authoritative policy truth. + +## Goals + +- Define governed attributes as typed evidence attached to semantic targets. +- Preserve source, confidence, authority, status, observed time, expiration, and evidence references. +- Define policy checkpoint records at authoring, planning, binding, and execution. +- Allow InQL to explain how attributes move through relational transformations. +- Keep policy authoring, approval, and global enforcement outside InQL. + +## Non-Goals + +- Defining a policy language. +- Defining a global invariant registry. +- Defining approvals, stewardship, ownership, or certification workflow. +- Deciding legal or compliance obligations. +- Making inferred attributes authoritative without review or explicit status. + +## Guide-level explanation (how authors think about it) + +An inspection may show that an output field carries derived attributes: + +```incan +lineage = inspect_lineage(report) +field = lineage.output_field("email_domain") + +for attribute in field.governed_attributes(): + print(attribute.key, attribute.status, attribute.confidence) +``` + +If a policy engine or session binding later allows, masks, warns, or rejects part of the plan, that result appears as a policy checkpoint record attached to the relevant target. + +## Reference-level explanation (precise rules) + +A governed attribute must include attribute identity, target, key, typed value, scope, source, confidence, status, authority when available, observed time when available, expiration time when available, and evidence references. + +Attribute scope must distinguish at least field, relation, expression, plan, output, and execution_path scopes. + +Attribute source must distinguish at least model, user, lineage, catalog, policy, adapter, imported artifact, and inferred. + +Confidence must distinguish exact, high, medium, low, and unknown or equivalent ordered states. + +Status must distinguish asserted, inferred, accepted, rejected, overridden, stale, and pending_review or equivalent review states. + +InQL may propagate attributes through relational transformations when transformation semantics are known. It must preserve provenance and confidence. It must report conservative or unknown propagation when exact propagation is not available. + +A policy checkpoint record must include decision identity, target, checkpoint, action, policy reference, reason code, evidence references, visibility, and optional diagnostics. + +Checkpoint must distinguish authoring, planning, binding, and execution. Action must distinguish at least allow, deny, redact, mask, row_filter, warn, require_quality_check, require_approval, and observe. + +Policy checkpoint records are evidence of a decision or external result. They must not by themselves define the policy language that produced the decision. + +## Design details + +### Syntax + +This RFC introduces no policy syntax. + +### Semantics + +InQL owns attribute carriage, propagation evidence, and checkpoint records. It does not own organizational policy meaning. + +### Interaction with other InQL surfaces + +Lineage edges explain how attributes may propagate. Adapter requirements may be created when a policy checkpoint requires backend capabilities such as masking or row filtering. + +### Compatibility / migration + +Existing plans without governed attributes remain valid. Consumers must treat absent attributes, unknown attributes, and rejected attributes as distinct states when those distinctions are available. + +## Alternatives considered + +- **Make InQL a policy engine.** Rejected because policy authoring and approval are outside typed relational semantics. +- **Use plain metadata tags only.** Rejected because provenance, confidence, authority, and status are required for trustworthy evidence. +- **Drop uncertain attributes.** Rejected because uncertainty is meaningful evidence. + +## Drawbacks + +- Attribute propagation can be complex and conservative. +- Policy checkpoint records can be mistaken for policy semantics unless docs are clear. +- More lifecycle states increase author and tool complexity. + +## Layers affected + +- **InQL specification** — governed attribute and checkpoint record semantics become normative. +- **InQL library package** — inspection APIs must expose attributes and decisions as typed records. +- **Execution / interchange** — Session and adapters may attach binding and execution checkpoint records. +- **Documentation** — docs must distinguish attribute evidence from policy authority. + +## Unresolved questions + +- Which governed attribute keys should InQL reserve for core use? +- Which propagation rules are required for the first release? +- Should policy checkpoints be serializable in portable plan artifacts by default, or only in local evidence artifacts? + + diff --git a/docs/rfcs/036_governed_plan_bundle.md b/docs/rfcs/036_governed_plan_bundle.md new file mode 100644 index 0000000..c98cf73 --- /dev/null +++ b/docs/rfcs/036_governed_plan_bundle.md @@ -0,0 +1,128 @@ +# InQL RFC 036: Governed plan bundle + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 033 (adapter requirements and coverage) + - InQL RFC 034 (quality assertions and observations) + - InQL RFC 035 (governed attributes and policy checkpoints) + - InQL RFC 040 (interoperability semantic profiles) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines the governed plan bundle as the local InQL artifact that keeps relational computation and evidence together. A bundle contains a plan reference, schemas, lineage, governed attributes, policy checkpoints, quality assertions, semantic profiles, ingress evidence, client session context, adapter requirements, coverage records, evidence references, and version metadata for the InQL-owned parts of governed relational computation. + +## Motivation + +Individual evidence artifacts are useful, but many consumers need a coherent handoff unit. A plan without its evidence can be executed without understanding requirements. Evidence without the plan cannot explain what computation it describes. The governed plan bundle gives InQL a portable local package for the facts it owns while leaving hosted storage, global policy, approvals, and cross-system reasoning outside the contract. + +## Goals + +- Define a bundle shape for InQL-owned relational evidence. +- Keep plan, schema, lineage, attributes, policy checkpoints, quality assertions, semantic profiles, ingress evidence, client session context, adapter requirements, coverage, and versions together. +- Support local tooling and downstream generic consumers. +- Avoid proprietary hosted behavior in the InQL contract. + +## Non-Goals + +- Defining a managed control plane or global graph store. +- Defining organization-wide approval or promotion workflow. +- Replacing Substrait as the relational interchange plan. +- Requiring every bundle consumer to understand every optional evidence family. + +## Guide-level explanation (how authors think about it) + +An author or CI job can produce one bundle for a planned relation: + +```incan +bundle = governed_plan_bundle(summary) +bundle.write("target/inql/summary.bundle.json") +``` + +The bundle can be inspected locally or consumed by other tools. It does not require a hosted service. + +## Reference-level explanation (precise rules) + +A governed plan bundle must include: + +- bundle schema version +- InQL version and relevant rule versions +- plan target +- input schema references +- output schema reference +- lineage graph reference or embedded lineage graph +- metadata attachments +- governed attributes +- policy checkpoint records +- quality assertions and optional observations +- semantic profile records and profile assessments when available +- ingress origin mappings and frontend coverage when available +- client session context when it affected plan analysis +- adapter requirements +- coverage records when available +- evidence references +- export status when available + +The bundle must distinguish required, optional, unavailable, and unsupported evidence sections. A missing evidence section must not be treated as an empty evidence section. + +The bundle may include a Substrait plan or a reference to a Substrait artifact, but Substrait must not be the only source of InQL evidence in the bundle. + +Sensitive or redacted evidence must follow attachment visibility rules. + +Bundle consumers must be able to ignore optional evidence families they do not understand while still detecting unsupported required evidence. + +## Design details + +### Syntax + +This RFC introduces no authoring syntax. + +### Semantics + +The bundle is an evidence package. It does not make policy decisions by itself. + +### Interaction with other InQL surfaces + +Inspection artifacts, execution observations, quality observations, ingress frontends, and export bridges may all read from or write to bundle-compatible records. + +### Compatibility / migration + +Bundles must be versioned from the start. Early bundles may contain fewer evidence families, but they must mark unsupported families explicitly. + +## Alternatives considered + +- **Only emit separate artifacts.** Rejected because consumers often need a coherent handoff unit. +- **Make the bundle a hosted-service protocol.** Rejected because local InQL evidence must remain open and service-independent. +- **Embed all evidence directly into Substrait.** Rejected because Substrait is not a complete InQL evidence model. + +## Drawbacks + +- Bundle versioning becomes a compatibility commitment. +- Bundles can become large for complex plans. +- Consumers need clear rules for partial evidence. + +## Layers affected + +- **InQL specification** — bundle contents and required distinctions become normative. +- **InQL library package** — APIs must produce bundle-compatible records. +- **Execution / interchange** — Substrait may be included or referenced, but not treated as the sole evidence store. +- **Documentation** — docs must define local bundle use without implying hosted-service requirements. + +## Unresolved questions + +- Should bundles embed artifacts or reference sibling artifact files by default? +- What is the first stable bundle serialization format? +- Which evidence families are required for a bundle to be considered complete? + + diff --git a/docs/rfcs/037_plan_diff_blast_radius_inputs.md b/docs/rfcs/037_plan_diff_blast_radius_inputs.md new file mode 100644 index 0000000..6bf504a --- /dev/null +++ b/docs/rfcs/037_plan_diff_blast_radius_inputs.md @@ -0,0 +1,130 @@ +# InQL RFC 037: Plan diff and blast-radius inputs + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 036 (governed plan bundle) + - InQL RFC 040 (interoperability semantic profiles) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines local InQL plan diffs and blast-radius input artifacts. A plan diff compares two InQL evidence artifacts and classifies changes in output schema, field identity, lineage, joins, filters, aggregates, windows, generators, quality assertions, semantic profiles, adapter requirements, and coverage. The result is a local input to downstream impact analysis, not an organization-wide blast-radius service. + +## Motivation + +Typed relational evidence should help users understand change before it reaches production. If a query changes, tools should know whether output fields changed, dependencies changed, adapter requirements changed, or quality checks changed. InQL can produce accurate local diff evidence because it owns the plan and lineage, but it should not claim to know every downstream consumer in every organization. + +## Goals + +- Define local plan diff inputs and outputs. +- Classify structural, schema, lineage, quality, semantic profile, and adapter-requirement changes. +- Treat unknown impact as an explicit result. +- Provide downstream systems with blast-radius inputs without defining a global blast-radius engine. + +## Non-Goals + +- Defining cross-repository dependency indexing. +- Defining organization-wide consumer discovery. +- Defining approvals, promotions, or change-management workflows. +- Deciding whether a change is acceptable for every downstream system. + +## Guide-level explanation (how authors think about it) + +A CI job can compare two plan artifacts: + +```incan +before = read_plan_artifact("main/summary.plan.json") +after = inspect_plan(summary) + +diff = diff_plans(before, after) +diff.changed_output_fields() +diff.changed_adapter_requirements() +``` + +The diff tells InQL-local facts. A higher-level system may combine those facts with dependency indexes and approvals. + +## Reference-level explanation (precise rules) + +A plan diff must compare two compatible InQL evidence artifacts or bundles. If artifacts are incompatible, stale, or missing required identity information, the diff must report unsupported or unknown impact. + +The diff must classify at least: + +- output field added +- output field removed +- output field renamed +- field type changed +- field nullability changed +- field lineage changed +- relation input changed +- filter predicate changed +- join structure changed +- aggregate measure changed +- window specification changed +- generator output changed +- semi-structured or format access confidence changed +- quality assertion changed +- semantic profile changed +- profile assessment changed +- adapter requirement changed +- coverage state changed + +Diff records must include affected semantic targets when available, change kind, severity or compatibility classification when known, evidence references, and confidence. + +Unknown impact must be explicit. If InQL cannot determine whether a change affects a target, it must not omit the target silently. + +Plan diffs may produce blast-radius input artifacts. Those artifacts describe local affected targets and requirement changes. They do not claim to enumerate all downstream consumers outside InQL's local artifact set. + +## Design details + +### Syntax + +This RFC introduces no syntax. + +### Semantics + +Diffs operate over evidence artifacts, not raw source text. Text diffs may be useful but are not sufficient for semantic change classification. + +### Interaction with other InQL surfaces + +Plan diffs depend on stable semantic identity, lineage, inspection artifacts, semantic profiles, and governed plan bundles. They should not be implemented before those contracts exist. + +### Compatibility / migration + +Older artifacts may not contain enough identity or lineage for precise diffs. Diff output must make that limitation explicit. + +## Alternatives considered + +- **Use textual diffs only.** Rejected because text diffs cannot reliably classify relational meaning changes. +- **Make InQL own full blast radius.** Rejected because downstream consumers, deployments, dashboards, and global dependency indexes are outside InQL. +- **Ignore unknown impact.** Rejected because unknown impact is a real result. + +## Drawbacks + +- Precise diffs require stable artifacts across versions. +- Conservative diffs may produce noisy warnings. +- Compatibility classification can become complex and may need iteration. + +## Layers affected + +- **InQL specification** — change kinds and local diff semantics become normative. +- **InQL library package** — diff APIs must compare structured artifacts. +- **Execution / interchange** — adapter requirement changes must be included where execution behavior changes. +- **Documentation** — docs must distinguish local blast-radius inputs from global impact analysis. + +## Unresolved questions + +- Which change kinds should be compatibility-classified in the first release? +- Should diff severity be part of InQL or left entirely to downstream policy? +- How should diffs handle generated or unstable field names? + + diff --git a/docs/rfcs/038_evidence_export_bridges.md b/docs/rfcs/038_evidence_export_bridges.md new file mode 100644 index 0000000..a926ae8 --- /dev/null +++ b/docs/rfcs/038_evidence_export_bridges.md @@ -0,0 +1,110 @@ +# InQL RFC 038: Evidence export bridges + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 027 (relational evidence program) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 032 (execution observations) + - InQL RFC 036 (governed plan bundle) + - InQL RFC 040 (interoperability semantic profiles) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines evidence export bridges from InQL's internal evidence model to external and adjacent formats. Export bridges map InQL plan, lineage, schema-flow, execution, quality, coverage, semantic profile, and bundle records into downstream views such as OpenLineage events, telemetry signals, semantic inspection fragments, and catalog/governance integration artifacts. Exports are projections from InQL evidence, not the internal source of truth. + +## Motivation + +InQL evidence should be useful outside InQL. CI systems, lineage tools, telemetry pipelines, catalogs, notebooks, and agents may all consume different formats. If each integration reconstructs evidence independently, semantics will drift. InQL should provide export bridges that preserve its local evidence model while acknowledging that external formats may be less expressive. + +## Goals + +- Define export bridges as downstream projections from InQL evidence. +- Preserve semantic target references and evidence versions where possible. +- Allow lossy external mappings only when loss is explicit. +- Keep provider configuration and hosted ingestion outside InQL core. +- Support local export without requiring a specific external service. + +## Non-Goals + +- Making any external format the internal InQL evidence model. +- Defining hosted ingestion, storage, dashboards, or managed governance behavior. +- Defining a telemetry provider, collector, exporter, or sampling policy. +- Guaranteeing that every external tool can represent every InQL evidence feature. + +## Guide-level explanation (how authors think about it) + +An author or CI job can export evidence from local artifacts: + +```incan +bundle = governed_plan_bundle(summary) +bundle.export_openlineage("target/inql/openlineage.json") +bundle.export_telemetry("target/inql/telemetry.json") +``` + +The names are illustrative. The key contract is that exports are generated from InQL evidence artifacts, not from backend logs or reconstructed SQL strings. + +## Reference-level explanation (precise rules) + +An export bridge must declare its source evidence schema versions, target format, target format version when available, mapping coverage, unsupported fields, redaction behavior, and diagnostics. + +Export bridges must preserve semantic target identifiers when the target format can carry them. When the target format cannot carry them directly, the bridge should preserve them in an extension, custom facet, attribute, or sidecar artifact when safe. + +Lossy mappings must be explicit. If an external lineage format cannot distinguish value, control, grouping, join, and sort lineage, the export must either preserve the distinction through an extension or report the loss. + +Sensitive attachments must follow visibility rules. Export bridges must not leak sensitive payloads merely because a target format lacks redaction semantics. + +Provider configuration, authentication, network transport, sampling, hosted ingestion, and storage are outside this RFC. + +## Design details + +### Syntax + +This RFC introduces no authoring syntax. + +### Semantics + +Exports are projections. They must not become the authoritative source of InQL plan, lineage, quality, or execution semantics. + +### Interaction with other InQL surfaces + +Export bridges depend on inspection artifacts, execution observations, quality observations, adapter coverage, and governed plan bundles. They should map from those records rather than from backend-specific plans. + +### Compatibility / migration + +Export bridges must version their mappings. Adding a new internal evidence field should not silently change external semantics without a mapping version change or documented behavior. + +## Alternatives considered + +- **Adopt one external lineage model internally.** Rejected because InQL needs evidence that many external tools cannot represent directly. +- **Leave all exports to downstream systems.** Rejected because independent reconstruction causes drift. +- **Require hosted ingestion.** Rejected because local export must work in open InQL. + +## Drawbacks + +- Export bridges require maintenance as external formats evolve. +- Some mappings will be lossy or require extensions. +- Redaction rules can make exports harder to debug. + +## Layers affected + +- **InQL specification** — export bridge responsibilities and loss reporting become normative. +- **InQL library package** — export APIs may live in core or optional modules. +- **Execution / interchange** — exports may include Substrait references, telemetry-shaped observations, and lineage events. +- **Documentation** — docs must identify external exports as projections, not internal truth. + +## Unresolved questions + +- Which export bridge should be implemented first? +- Should export bridges live in the core package or optional integration packages? +- What sidecar format should preserve InQL-specific evidence when an external target is lossy? + + diff --git a/docs/rfcs/040_interoperability_semantic_profiles.md b/docs/rfcs/040_interoperability_semantic_profiles.md new file mode 100644 index 0000000..95e50a5 --- /dev/null +++ b/docs/rfcs/040_interoperability_semantic_profiles.md @@ -0,0 +1,215 @@ +# InQL RFC 040: Interoperability semantic profiles + +- **Status:** Draft +- **Created:** 2026-05-30 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 000 (core language model and layer boundaries) + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 004 (execution context) + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 008 (optimizer boundary, statistics, cost-based optimization, and adaptive execution) + - InQL RFC 012 (unified scalar expression surface) + - InQL RFC 013 (function catalog program) + - InQL RFC 024 (function extension policy) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 032 (execution observations) + - InQL RFC 033 (adapter requirements and coverage) + - InQL RFC 036 (governed plan bundle) + - InQL RFC 038 (evidence export bridges) + - InQL RFC 041 (Prism plan ingress and external client frontends) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines interoperability semantic profiles for InQL evidence. A profile describes the semantic environment a plan is being received from, compared with, targeted at, or observed under: an InQL baseline, client protocol, plan ingress frontend, execution engine, adapter binding, SQL dialect, interchange consumer, or conformance baseline. Profiles give ingress coverage records, adapter requirements, coverage records, execution observations, plan diffs, bundles, and exports a shared context without making any external system the owner of InQL relational meaning. + +## Motivation + +Interoperability requires more than lowering a plan and asking whether an adapter has a support flag. Different target environments can share the same relational vocabulary while differing on edge semantics: type coercion, decimal overflow, timestamp and timezone behavior, identifier resolution, null and NaN ordering, collation, case sensitivity, function definitions, aggregate edge cases, window defaults, nested data behavior, row ordering, and fallback execution. + +If InQL does not name the semantic profile used for an inspection or execution, those assumptions will be scattered across adapters, Substrait metadata, docs, and runtime diagnostics. That would make coverage hard to trust. A plan could appear portable while relying on target-specific behavior that was never recorded as evidence. + +Profiles provide the missing layer between InQL-authored semantics, plan ingress, and adapter coverage. Prism remains the source of authored and rewritten relational meaning. Profiles describe source and target environments well enough for InQL to produce ingress diagnostics, requirements, coverage records, and observations against them. + +## Goals + +- Define semantic profiles as versioned evidence records. +- Allow profiles for InQL baselines, client protocols, plan ingress frontends, execution engines, adapter bindings, SQL dialects, interchange consumers, and conformance baselines. +- Name the semantic dimensions that affect relational correctness and evidence interpretation. +- Let adapter requirements and coverage records state which profile they were evaluated against. +- Let execution observations report the profile requested before execution and the profile observed at runtime when available. +- Keep profiles local and open, without requiring a hosted registry or managed control plane. +- Keep external target profiles non-authoritative for InQL semantics. + +## Non-Goals + +- Defining a profile for one specific external engine. +- Making any external engine, SQL dialect, or interchange format the normative InQL semantic model. +- Defining SQL transpilation, physical planning, or backend execution strategies. +- Defining a full conformance test suite. +- Defining a global registry of every engine version or deployment configuration. +- Guaranteeing semantic equivalence merely because a profile name is present. + +## Guide-level explanation (how authors think about it) + +Most authors should encounter profiles through inspection, coverage, and execution evidence: + +```incan +from pub::inql.inspect import inspect_plan + +inspection = inspect_plan(summary) +profile = inspection.semantic_profile("portable_relational") + +requirements = inspection.adapter_requirements(profile) +coverage = session.check_coverage(summary, target_profile=profile) +``` + +The exact API names are illustrative. The important model is that the target profile is explicit. A coverage report should be able to say which semantic profile was used and which dimensions are covered, constrained, mismatched, or unknown. + +Execution can attach the same evidence context: + +```incan +result = session.collect(summary, target_profile=profile) +observation = result.execution_observation() + +assert observation.requested_profile == profile.id +``` + +If the runtime adapter reports a different engine version, configuration, or semantic mode than the requested profile expected, the observation should record that difference as structured evidence. + +## Reference-level explanation (precise rules) + +InQL must define an interoperability semantic profile record. A profile record must include: + +- profile identity +- profile schema version +- target class +- profile name or family +- source +- target version information when available +- target configuration fingerprint when available +- semantic dimensions +- evidence references +- confidence or completeness +- diagnostics + +Target class must distinguish at least: + +- inql_baseline +- client_protocol +- plan_ingress_frontend +- execution_engine +- adapter_binding +- sql_dialect +- interchange_consumer +- conformance_baseline + +Semantic dimensions must be represented as structured records rather than free-form prose. Initial dimensions should include, where applicable: + +- type system and implicit coercion +- numeric and decimal semantics +- temporal, timezone, and calendar semantics +- boolean, null, and NaN semantics +- string comparison, collation, and case sensitivity +- identifier resolution and catalog naming +- client session state and configuration semantics +- relation ordering and determinism +- aggregate and grouping edge semantics +- window frame and ordering semantics +- nested, variant, and semi-structured data semantics +- function and operator identity +- extension and fallback behavior +- plan-stage observability + +A semantic dimension record must include dimension identity, lifecycle, declared behavior when known, source, evidence references, confidence, and diagnostics. A dimension may be exact, constrained, unknown, or not_applicable. Unknown dimensions must not be treated as matching InQL semantics. + +InQL may define profile assessments that compare a plan or bundle with a profile. A profile assessment must include the plan target, profile identity, affected semantic targets, assessed dimensions, result state, evidence references, confidence, and diagnostics. + +Profile assessment result state must distinguish at least: + +- matched: InQL can determine that the plan's required semantics match the profile for the assessed dimension +- constrained: the profile can satisfy the dimension only under recorded restrictions +- mismatched: the profile does not satisfy the plan's required semantics for the dimension +- unknown: InQL cannot determine whether the profile satisfies the dimension +- not_applicable: the dimension does not apply to the plan or target profile + +Adapter requirements and coverage records may cite profile records and profile assessments. If coverage depends on a profile, the coverage record must identify the profile. Coverage evaluated under one profile must not be reused under a different profile unless the evidence proves that the relevant semantic dimensions are equivalent. + +Execution observations may include a requested profile and an observed profile. The requested profile is the semantic profile used during pre-execution inspection or coverage checks. The observed profile records runtime facts reported by the adapter, such as engine version, adapter version, semantic mode, or relevant configuration. A mismatch between requested and observed profiles must be diagnostic evidence. It must not silently rewrite the plan's authored semantics. + +Profiles must not replace Prism semantic targets, lineage edges, adapter requirements, or execution observations. They provide context for evidence. They do not create fields, lineage, policy decisions, quality observations, or coverage states by themselves. + +Serialized artifacts that include profile records must distinguish missing profile evidence from an empty or fully matching profile assessment. + +## Design details + +### Syntax + +This RFC introduces no authoring syntax. + +### Semantics + +Semantic profiles are evidence contexts. They describe the target environment against which InQL evidence is checked, exported, or observed. They do not define InQL relational meaning. + +Profiles may be authored, built into InQL, imported from artifacts, produced by adapters, or observed during execution. The source and lifecycle must be recorded so tools can distinguish a trusted built-in profile from an adapter-reported runtime observation or an imported profile. + +### Interaction with other InQL surfaces + +Prism remains the source of authored and rewritten relational meaning. Profile assessments consume Prism targets, lineage, schema flow, function registry facts, ingress coverage records, and adapter requirements. They must not infer semantic structure from backend plan strings or external client protocol node identifiers. + +Plan ingress frontends may use profile evidence when decoding and analyzing external client plans. A Spark Connect frontend, for example, may use a client protocol profile to decide identifier resolution, function aliases, coercion behavior, and unsupported-feature diagnostics before Prism produces an analyzed plan. + +Substrait lowering may carry or reference profile evidence, but Substrait must not be the only profile evidence store. + +Function registry entries may contribute profile dimensions when function identity, determinism, null behavior, extension behavior, or backend availability affects compatibility. + +Adapter coverage records should cite the profile used for evaluation when the answer depends on target semantics. Execution observations should report runtime profile facts when adapters can provide them. + +Governed plan bundles may include profile records and profile assessments so downstream tools can understand which target environments were checked. + +Export bridges may project profile evidence into external formats. Lossy exports must report dimensions that could not be represented. + +### Compatibility / migration + +Existing plans and adapters remain valid without profile evidence. Tools that require profile evidence must report missing profiles as unsupported or unknown evidence rather than assuming portability. + +Profile schemas must be versioned from the start. Profile names that appear in serialized artifacts must be stable public vocabulary or explicitly marked as local/private. + +## Alternatives considered + +- **Use adapter support flags only.** Rejected because support depends on target semantics, engine version, configuration, and execution mode. +- **Use Substrait as the profile model.** Rejected because Substrait is an interchange boundary and does not capture every InQL evidence dimension. +- **Make one external engine profile normative.** Rejected because InQL needs to interoperate with multiple targets without importing one target's semantics as the language definition. +- **Rely only on conformance tests.** Rejected because tests are valuable evidence but do not replace structured profile records, coverage states, or diagnostics. +- **Leave profiles to downstream integrations.** Rejected because independent profile reconstruction would cause drift across adapters, CI, notebooks, agents, and governance exports. + +## Drawbacks + +- Profiles add another evidence concept that must stay distinct from requirements and coverage. +- Profile dimension vocabulary will require maintenance as InQL and target environments grow. +- Early profiles may contain many unknown dimensions, which can make reports feel conservative. +- Runtime-observed profiles can differ from requested profiles, requiring clear diagnostics. + +## Layers affected + +- **InQL specification** — semantic profile records, dimensions, and assessment states become part of the relational evidence vocabulary. +- **InQL library package** — inspection, coverage, bundle, and export APIs must be able to expose profile records when available. +- **Execution / interchange** — sessions and adapters may report requested and observed profile evidence without owning InQL semantics. +- **Documentation** — docs must explain profiles as evidence contexts, not as alternative semantic authorities. + +## Unresolved questions + +- Which semantic dimensions are mandatory in the first implementation? +- Should built-in InQL profiles live in core or in optional integration packages? +- How should profile records compare target configurations without leaking sensitive deployment details? +- Should conformance test results become profile evidence in this RFC or a later RFC? + + diff --git a/docs/rfcs/041_prism_plan_ingress_frontends.md b/docs/rfcs/041_prism_plan_ingress_frontends.md new file mode 100644 index 0000000..4310101 --- /dev/null +++ b/docs/rfcs/041_prism_plan_ingress_frontends.md @@ -0,0 +1,167 @@ +# InQL RFC 041: Prism plan ingress and external client frontends + +- **Status:** Draft +- **Created:** 2026-05-30 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 000 (core language model and layer boundaries) + - InQL RFC 004 (execution context) + - InQL RFC 007 (Prism logical planning and optimization engine) + - InQL RFC 012 (unified scalar expression surface) + - InQL RFC 013 (function catalog program) + - InQL RFC 027 (relational evidence program) + - InQL RFC 028 (semantic identity and target model) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 033 (adapter requirements and coverage) + - InQL RFC 040 (interoperability semantic profiles) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines Prism plan ingress and external client frontends for InQL. A frontend receives an external authoring or client protocol such as Spark Connect, SQL, or another unresolved relational plan surface, decodes it into a Prism-owned unresolved ingress plan, and asks Prism to analyze that plan into ordinary InQL relational semantics. The frontend may preserve client-origin evidence, client-session evidence, protocol diagnostics, and ingress coverage records, but it must not make the external protocol, Spark, Substrait, DataFusion, or any backend adapter the semantic owner of the plan. + +## Motivation + +InQL should be able to interoperate with established client APIs without pretending that those APIs own InQL's semantics. Spark Connect is the clearest pressure: a PySpark client can submit plan-shaped calls over a protocol boundary, and those calls may depend on client session state such as configuration, current catalog, temporary views, or function aliases. InQL should not route those calls through Spark just to recover meaning later. Prism should receive an unresolved representation, resolve names and functions, apply InQL semantic rules under an explicit profile and session context, and then continue through the normal planning, evidence, Substrait, and execution paths. + +Without a first-class ingress contract, external API support will be squeezed into session adapters, backend adapters, Substrait metadata, or compatibility flags. That would hide the real boundary. Execution adapters run plans. Plan ingress frontends receive external plan requests and let Prism create InQL plans. + +## Goals + +- Define plan ingress frontends as distinct from execution session adapters. +- Define a Prism-owned unresolved ingress plan model for external client plans. +- Preserve client-origin evidence without treating external node identifiers as InQL semantic identities. +- Represent client session state that can affect Prism analysis. +- Allow Spark Connect-style clients to submit supported relation, expression, and command calls that Prism analyzes directly. +- Require structured unsupported-feature diagnostics and ingress coverage records. +- Connect ingress analysis to semantic profiles for client protocol behavior such as case sensitivity, function aliases, coercion, temporal behavior, and null ordering. +- Keep DataFusion and other execution backends behind the existing execution adapter boundary. + +## Non-Goals + +- Defining full Spark API parity. +- Defining every Spark Connect protobuf message, gRPC service method, or streaming transport detail. +- Making Spark, PySpark, Spark Connect, DataFusion, or Substrait the source of InQL relational meaning. +- Defining SQL transpilation as the internal planning model. +- Reimplementing every external client session lifecycle rule. +- Defining a hosted compatibility service or global conformance registry. +- Requiring every InQL deployment to expose an external client protocol. + +## Guide-level explanation (how authors think about it) + +An external client frontend lets an existing tool submit relational intent while InQL keeps Prism as the planner: + +```incan +from pub::inql.frontends.spark import spark_connect_frontend +from pub::inql.session import datafusion_session + +frontend = spark_connect_frontend(session_factory=datafusion_session) +frontend.serve("127.0.0.1:15002") +``` + +A PySpark client may send a supported Spark Connect plan to that endpoint. The frontend decodes the request into a Prism ingress plan, Prism analyzes it, and the selected InQL session executes the resulting plan through the normal adapter path. + +The important user model is not that InQL becomes Spark internally. The model is that InQL can speak a client protocol at the edge while keeping Prism responsible for names, functions, types, lineage, diagnostics, evidence, and execution requirements. + +## Reference-level explanation (precise rules) + +InQL must define a plan ingress frontend boundary. A frontend may parse, decode, authenticate, route, frame client requests, and maintain client session state. It must not resolve relational semantics by delegating to an external engine when Prism can represent the requested plan. + +An ingress frontend must produce a Prism-owned unresolved ingress plan or a structured unsupported diagnostic. An unresolved ingress plan must represent at least: + +- ingress request identity +- ingress frontend identity +- client session identity when the request is session-scoped +- client protocol and protocol version when available +- requested semantic profile when available +- unresolved relation nodes +- unresolved expression nodes +- unresolved command nodes when the protocol includes commands +- client-origin references +- diagnostics +- ingress coverage records + +Prism analysis of an ingress plan must produce ordinary Prism semantic targets, relation plans, expression plans, diagnostics, lineage inputs, adapter requirements, and inspection evidence. External client node identifiers may be preserved as origin references, but they must not replace Prism semantic target identities. + +Client session state that can affect analysis must be represented as client session targets, typed attachments, profile evidence, or analyzer binding references. This includes current catalog or namespace, temporary relation names, session variables, function registrations, case sensitivity, ANSI or compatibility modes, timezone settings, and other frontend-specific state that changes name resolution, type coercion, function lookup, or expression semantics. + +Analyzer bindings that depend on client session state must reference the relevant client session target or profile assessment. A later inspection should be able to explain that a relation, function, alias, identifier, or coercion resolved under a specific client session context without treating that client session as the source of relational meaning. + +Ingress frontends must distinguish at least the following coverage states for protocol features: + +- supported: Prism can represent and analyze the feature under the selected profile +- partially_supported: Prism can represent the feature only under recorded restrictions +- unsupported: Prism cannot represent or analyze the feature +- unknown: InQL cannot determine support for the feature + +Unsupported and unknown ingress features must be reported before execution when they affect plan semantics. They must not be silently lowered to backend-specific behavior. + +Spark Connect compatibility must be modeled as a frontend protocol profile plus ingress coverage, not as a backend adapter capability alone. A Spark Connect frontend may accept supported relation and expression calls, reject unsupported calls, and report coverage for omitted protocol areas. It must not require Spark's engine to produce the semantic plan that Prism will execute. + +Semantic profiles may affect ingress analysis. Profile dimensions may control identifier resolution, case sensitivity, function aliases, type coercion, null and NaN behavior, timestamp semantics, decimal behavior, ANSI mode, and other compatibility-sensitive rules. Profile evidence must be explicit when the frontend uses those rules. + +Commands that do not describe relational computation, such as session configuration, catalog inspection, temporary view registration, cache control, or client lifecycle operations, must be represented as command ingress nodes, mapped to explicit InQL client session or execution session behavior, or rejected with structured diagnostics. Accepted commands that mutate client session state must produce client session evidence. They must not be disguised as ordinary relational plan nodes. + +Plan ingress evidence must be inspectable. Tools must be able to see which frontend produced a plan, which client session context affected analysis, which client protocol features were used, which features were unsupported or partially supported, which profile governed analysis, and how client-origin references map to Prism targets. + +## Design details + +### Syntax + +This RFC introduces no InQL authoring syntax. Frontends are package or service APIs around Prism and Session. + +### Semantics + +Plan ingress is a semantic boundary before Prism analysis. It is not an execution boundary. The frontend receives external input, Prism owns analysis, and Session plus execution adapters own execution. + +The unresolved ingress model should be general enough for Spark Connect, SQL parsers, notebook clients, and future external plan protocols, but each frontend must declare its own protocol coverage, session-state model, and profile assumptions. + +### Interaction with other InQL surfaces + +Method chains, `query {}` blocks, SQL frontends, and Spark Connect frontends may all produce Prism plans. Equivalent relational intent should converge on comparable Prism targets after analysis, subject to explicit profile differences. + +Prism lineage must preserve client-origin relationships where available. Those relationships explain where a Prism target came from, but they are not value lineage by themselves. + +Inspection APIs and artifacts must expose ingress diagnostics, origin mappings, selected profiles, relevant client session targets, and ingress coverage when a plan came through a frontend. + +Adapter requirements and coverage remain execution-facing evidence. Ingress coverage is frontend-facing evidence. A Spark Connect relation node being accepted by a frontend is not the same as DataFusion being able to execute the resulting plan. + +### Compatibility / migration + +Existing InQL plans and sessions remain valid without ingress evidence. Frontends are additive. Tools that require client-origin evidence must report missing ingress evidence as unsupported or unknown rather than inferring it from display names or backend plans. + +## Alternatives considered + +- **Use Spark as the planner.** Rejected because that would make Spark the semantic owner and reduce Prism to an execution bridge. +- **Translate Spark Connect directly to Substrait.** Rejected because Substrait is an interchange boundary, not the full InQL semantic analysis and evidence model. +- **Treat Spark Connect support as a backend adapter.** Rejected because receiving client plan calls and executing an analyzed plan are different boundaries. +- **Only support InQL-native authoring.** Rejected because external client protocols are valuable when they feed Prism honestly instead of bypassing it. +- **Accept unsupported calls and hope the backend handles them.** Rejected because unknown frontend semantics must be visible before execution. + +## Drawbacks + +- Frontends require protocol-specific maintenance. +- Spark compatibility pressure can pull non-portable semantics into the system unless profiles and coverage stay explicit. +- The unresolved ingress model adds another planning representation before Prism's analyzed plan. +- Early frontend support will be partial, which requires clear diagnostics and compatibility documentation. + +## Layers affected + +- **InQL specification** — plan ingress, frontend coverage, origin mapping, and Prism analysis boundaries become normative vocabulary. +- **InQL library package** — frontend APIs, unresolved ingress plan records, diagnostics, and inspection records must be exposed through public modules where implemented. +- **Execution / interchange** — Session and backend adapters execute Prism-owned plans and may report adapter coverage, but they do not own ingress semantics. +- **Documentation** — docs must distinguish external client protocol support from Spark engine compatibility, Substrait interchange, and DataFusion execution. + +## Unresolved questions + +- Which Spark Connect relation and expression nodes belong in the first supported ingress slice? +- Should ingress frontends live in the core package or optional integration packages? +- What is the minimum common unresolved ingress model shared by Spark Connect, SQL, and future client protocols? +- Which client session fields are required in the first Spark Connect-compatible profile without importing Spark's session model wholesale? + + diff --git a/docs/rfcs/README.md b/docs/rfcs/README.md index e11824e..d83bdfb 100644 --- a/docs/rfcs/README.md +++ b/docs/rfcs/README.md @@ -33,6 +33,20 @@ InQL uses its **own** RFC series (starting at 000), independent of the [Incan la | [024][rfc-024] | Implemented | Function extension policy | | | [025][rfc-025] | Implemented | Typed sketch logical values | | | [026][rfc-026] | Implemented | Semi-structured variant logical values | | +| [027][rfc-027] | Draft | Relational evidence program | | +| [028][rfc-028] | Draft | Semantic identity and target model | | +| [029][rfc-029] | Draft | Typed metadata attachments | | +| [030][rfc-030] | Draft | Prism lineage graph | | +| [031][rfc-031] | Draft | Local inspection APIs and artifacts | | +| [032][rfc-032] | Draft | Execution observations | | +| [033][rfc-033] | Draft | Adapter requirements and coverage | | +| [034][rfc-034] | Draft | Quality assertions and observations | | +| [035][rfc-035] | Draft | Governed attributes and policy checkpoints | | +| [036][rfc-036] | Draft | Governed plan bundle | | +| [037][rfc-037] | Draft | Plan diff and blast-radius inputs | | +| [038][rfc-038] | Draft | Evidence export bridges | | +| [040][rfc-040] | Draft | Interoperability semantic profiles | | +| [041][rfc-041] | Draft | Prism plan ingress and external client frontends | | @@ -71,4 +85,18 @@ New RFCs should follow [TEMPLATE.md] (aligned with Incan’s RFC structure, adap [rfc-024]: 024_function_extension_policy.md [rfc-025]: 025_typed_sketch_logical_values.md [rfc-026]: 026_semi_structured_variant_values.md +[rfc-027]: 027_relational_evidence_program.md +[rfc-028]: 028_semantic_identity_targets.md +[rfc-029]: 029_metadata_attachments.md +[rfc-030]: 030_prism_lineage_graph.md +[rfc-031]: 031_inspection_artifacts.md +[rfc-032]: 032_execution_observations.md +[rfc-033]: 033_adapter_requirements_coverage.md +[rfc-034]: 034_quality_assertions_observations.md +[rfc-035]: 035_governed_attributes_policy_checkpoints.md +[rfc-036]: 036_governed_plan_bundle.md +[rfc-037]: 037_plan_diff_blast_radius_inputs.md +[rfc-038]: 038_evidence_export_bridges.md +[rfc-040]: 040_interoperability_semantic_profiles.md +[rfc-041]: 041_prism_plan_ingress_frontends.md [incan-rfcs]: https://github.com/dannys-code-corner/incan/tree/main/workspaces/docs-site/docs/RFCs From 44b72bee329f8c761d26b58049f3e51a2c30b444 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Fri, 5 Jun 2026 17:05:33 +0200 Subject: [PATCH 2/6] docs - include RFC039 exploration API draft --- .../039_pandas_familiar_exploration_api.md | 232 ++++++++++++++++++ docs/rfcs/README.md | 2 + 2 files changed, 234 insertions(+) create mode 100644 docs/rfcs/039_pandas_familiar_exploration_api.md diff --git a/docs/rfcs/039_pandas_familiar_exploration_api.md b/docs/rfcs/039_pandas_familiar_exploration_api.md new file mode 100644 index 0000000..bff1cb4 --- /dev/null +++ b/docs/rfcs/039_pandas_familiar_exploration_api.md @@ -0,0 +1,232 @@ +# InQL RFC 039: Pandas-familiar exploration API + +- **Status:** Draft +- **Created:** 2026-05-30 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 000 (language specification, naming, schema shapes, and relational positions) + - InQL RFC 001 (dataset carriers and method-chain API surface) + - InQL RFC 003 (`query {}` blocks and relational authoring) + - InQL RFC 005 (pipe-forward relational syntax) + - InQL RFC 012 (unified scalar expression surface) +- **Issue:** — +- **RFC PR:** — +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines a pandas-familiar exploration API for InQL dataset carriers. The API provides dictionary-like column access through `data["column"]`, projection through `data[["a", "b"]]`, boolean filtering through `data[predicate]`, and a small set of familiar method aliases such as `where`, `assign`, `groupby`, `sort_values`, and `head`. These forms are ergonomic aliases over InQL's existing typed relational model; they must not introduce pandas row-indexing, mutable frame, eager execution, or index-alignment semantics. + +## Core model + +1. InQL dataset carriers may behave dictionary-like for columns. +2. InQL dataset carriers must not behave sequence-like for rows unless a future RFC defines row-position semantics explicitly. +3. Bracket column access returns a bound scalar column expression, not a materialized Series-like value. +4. Bracket projection and bracket filtering lower to the same relational operators as `DataSet[T]` method chains and `query {}` blocks. +5. Pandas-familiar method names are aliases over existing InQL relational operations, not alternate execution contracts. + +## Motivation + +InQL's cleaner relational APIs are good for production authoring, but data exploration has a different adoption problem. Authors coming from pandas need a familiar fallback surface for the workflows they reach for reflexively: selecting a column, filtering a frame, projecting a few columns, adding a derived column, grouping, sorting, and previewing rows. If InQL only exposes its cleanest API, it forces those users to learn InQL before they can inspect data, which is a poor fit for exploratory work. + +At the same time, copying pandas wholesale would be a design error. Pandas carries semantics that do not fit InQL's typed, planned, backend-neutral model: mutable frames, eager local execution, row-position indexing, index alignment, view-versus-copy behavior, dynamic dtype coercion, and a Series object model with local row values. Those features are familiar, but they would make InQL less coherent if imported as language semantics. + +This RFC takes the narrower path: provide familiar surface forms where they map cleanly to InQL's relational model, and reject the parts of pandas that would require a different data model. + +## Goals + +- Define bracket column access on `DataSet[T]` carriers through `data["column"]`. +- Define bracket projection through `data[["a", "b"]]`. +- Define bracket filtering through `data[predicate]` where `predicate` is a boolean scalar expression bound to the same dataset relation. +- Define pandas-familiar aliases for common method-chain operations where semantics already exist in InQL. +- Preserve InQL's typed schema flow, boundedness constraints, scalar expression model, Prism planning, Substrait lowering, and execution boundaries. +- Require clear diagnostics for row-indexing spellings that pandas users might try but InQL does not support. + +## Non-Goals + +- Achieving pandas API compatibility. +- Introducing row-position indexing through `data[0]`, `data[1:10]`, `.iloc`, or equivalent forms. +- Introducing pandas index labels, index alignment, multi-index behavior, or Series alignment semantics. +- Introducing mutable assignment such as `data["x"] = expr` or `inplace=true`. +- Defining pandas view-versus-copy behavior. +- Making bracket access materialize local row values. +- Defining all exploratory helpers such as `describe`, `sample`, rich display, plotting, or notebook integration. +- Replacing the cleaner InQL `DataSet[T]` method surface or `query {}` blocks. + +## Guide-level explanation (how authors think about it) + +Authors can use bracket syntax when they want a familiar DataFrame-like way to refer to columns: + +```incan +high_value = orders[orders["amount"] > 100] +``` + +`orders["amount"]` is not a pandas Series. It is a column expression bound to `orders`, equivalent in meaning to a relation-scoped column reference. The comparison produces a boolean scalar expression, and `orders[...]` with that boolean expression filters rows. + +Authors can project columns with a list of names: + +```incan +order_amounts = orders[["order_id", "customer_id", "amount"]] +``` + +This is a projection over the same relation. The output preserves column order from the list and follows the same schema rules as the corresponding InQL projection operation. + +Authors can use familiar method aliases for common exploration flows: + +```incan +enriched = ( + orders + .where(orders["amount"] > 100) + .assign("gross_amount", orders["amount"] * 1.21) +) + +summary = ( + enriched + .groupby(["region"]) + .agg([sum(enriched["gross_amount"]) as total_gross, count() as order_count]) + .sort_values("total_gross", ascending=false) + .head(20) +) +``` + +Those names are familiar, but the semantics are still InQL. `where` is `filter`, `assign` is derived-column projection, `groupby` is `group_by`, `sort_values` is `order_by`, and `head` is `limit`. They do not execute eagerly unless the surrounding execution context materializes the result. + +Row indexing is intentionally rejected: + +```incan +first = orders[0] # compile-time error +preview = orders[1:10] # compile-time error +row = orders.iloc[0] # not part of this RFC +``` + +Authors should use `head(n)` or `limit(n)` for preview-shaped relational limiting, and window functions for ordered row-number analytics. + +## Reference-level explanation (precise rules) + +### Applicability + +The pandas-familiar exploration API applies to values whose type conforms to `DataSet[T]` as defined by InQL RFC 001. The API must preserve the concrete carrier kind where the equivalent relational operation would preserve it. For example, filtering a `LazyFrame[T]` produces a `LazyFrame[T]`; filtering a `DataStream[T]` must follow the same boundedness and capability constraints as `filter(...)` on `DataStream[T]`. + +### Bracket column access + +For a dataset value `data: DataSet[T]`, `data["name"]` denotes a scalar column expression bound to `data` and the field named `name`. + +The key expression in the strongly typed form must be a string literal or another compile-time-known string value whose value can be checked against the schema. If `T` is a closed local model, the named field must exist or compilation must fail. If `T` is open-ended or dynamic, field lookup must follow the schema-shape rules from InQL RFC 000, including warnings for undeclared open-ended fields where that RFC requires them. + +`data["name"]` must not materialize a local column, must not produce a Series-like object, and must not imply that `data` is locally available. It is a typed relational scalar expression that may be used in relational expression positions and in ordinary Incan expression positions that consume InQL scalar expressions. + +Bound column expressions must preserve relation provenance. A predicate built from `orders["amount"]` may filter `orders`, but must not be accepted as a filter predicate for an unrelated dataset unless an explicit relational operation such as a join establishes the appropriate relation context. + +### Bracket projection + +For a dataset value `data: DataSet[T]`, `data[["a", "b"]]` denotes an ordered projection containing the named columns in the list. + +The projection list in the strongly typed form must be a list literal or another compile-time-known list of string column names. Each projected name must be checked using the same schema-shape rules as bracket column access. The output schema must contain the projected columns in the order supplied by the list. + +Duplicate projected names must be rejected unless a future RFC defines duplicate-column schema semantics. Pandas permits duplicate column labels, but InQL's typed relation model requires deterministic field names. + +Bracket projection must lower to the same relational projection semantics as the corresponding `DataSet[T]` projection surface or `query { SELECT ... }` form. It must not materialize data. + +### Bracket filtering + +For a dataset value `data: DataSet[T]`, `data[predicate]` denotes row filtering when `predicate` is a scalar expression whose result type is `bool` and whose relation provenance is compatible with `data`. + +Bracket filtering must lower to the same operation as `data.filter(predicate)`. Null handling, boundedness checks, backend capability checks, and diagnostics must be identical to the equivalent `filter(...)` operation. + +`data[predicate]` must not accept materialized boolean arrays, local lists of booleans, row masks, or pandas-like index-aligned masks unless a future RFC defines those concepts explicitly. + +### Rejected bracket forms + +The following forms must produce compile-time diagnostics: + +- `data[0]` +- `data[1:10]` +- `data[-1]` +- `data[mask]` where `mask` is a materialized local boolean list or array rather than an InQL scalar expression +- any bracket form whose key cannot be resolved as a string column key, compile-time-known list of string column keys, or boolean scalar expression + +Diagnostics should explain that InQL supports dictionary-like column access and relational filtering, not pandas row indexing. + +### Familiar method aliases + +The initial pandas-familiar method alias set is: + +| Alias | Canonical InQL operation | Required semantics | +| ----- | ------------------------ | ------------------ | +| `where(predicate)` | `filter(predicate)` | Filter rows using a boolean scalar expression. | +| `assign(name, expr)` | `with_column(name, expr)` | Add or replace one derived column. | +| `groupby(columns)` | `group_by(columns)` | Group using column names or scalar grouping expressions accepted by the canonical operation. | +| `sort_values(by, ascending=true)` | `order_by(...)` | Sort by one or more named columns or ordering expressions. | +| `head(n)` | `limit(n)` | Apply a relational row limit with the same carrier and boundedness rules as `limit`. | + +These aliases must not alter the plan produced by the canonical operation except for source-location metadata used in diagnostics. If a canonical operation is unavailable for a carrier because of boundedness rules, the alias must be unavailable for the same reason. + +### Method alias arguments + +String column names accepted by familiar aliases must resolve using the same schema-shape rules as bracket column access. Scalar expression arguments must use the unified scalar expression model from InQL RFC 012. Aggregate arguments used after `groupby(...)` must use the aggregate-measure rules defined by the relevant aggregate RFCs. + +`sort_values("amount")` must be equivalent to ordering by the `amount` column in ascending order. `sort_values("amount", ascending=false)` must be equivalent to ordering by the same column in descending order. Multi-column sort arguments may be supported through a compile-time-known list of names or ordering expressions, provided they lower to the same ordering model as `order_by(...)`. + +### Interaction with `query {}` and pipe-forward + +This RFC does not add new `query {}` clause syntax. Bracket access and familiar method aliases must remain semantically equivalent to `query {}` blocks that express the same relational operations. + +If pipe-forward from InQL RFC 005 is implemented, it must not define different pandas-familiar semantics. A pipe-forward stage that corresponds to `where`, projection, grouping, sorting, or limiting must lower to the same relational model as bracket and method-chain forms. + +## Design details + +### Syntax + +This RFC requires the Incan language and InQL vocabulary integration to recognize bracket access on dataset carriers for the forms specified above. The bracket syntax is intentionally overloaded only by key shape: string key for column expression, list of string keys for projection, and boolean scalar expression for filtering. + +The RFC does not require `.loc`, `.iloc`, attribute-style column access, or assignment syntax. + +### Semantics + +The semantic distinction is column dictionary access versus row sequence access. `DataSet[T]` carriers may be indexed by column name because the schema is part of the relational type. They may not be indexed by row position because row order is not an inherent property of an unordered relation and because InQL execution may be lazy, distributed, streamed, or backend-planned. + +`head(n)` is a relational limit, not proof of stable row order. Authors who need deterministic preview order should combine `sort_values(...)` or `order_by(...)` with `head(...)`. + +### Interaction with Incan `model` types and schema shapes + +Closed model schemas provide the strongest checks for bracket column access and projection. Open-ended and dynamic schemas follow the existing InQL schema-shape contract: declared fields remain checked, while undeclared fields may be allowed with warnings or dynamic typing where InQL RFC 000 permits that behavior. + +### Compatibility / migration + +This RFC is additive. Existing `DataSet[T]` method chains and `query {}` blocks remain valid and remain the canonical semantic reference. + +The main compatibility risk is expectation compatibility, not source compatibility. Authors who expect full pandas behavior must receive clear diagnostics and documentation explaining which familiar forms InQL supports and which ones are deliberately absent. + +## Alternatives considered + +- **Expose only the clean InQL API.** Rejected because exploration workflows need familiarity, and forcing pandas users through only the cleanest InQL surface raises the cost of first use. +- **Implement broad pandas compatibility.** Rejected because pandas row indexes, eager Series values, mutable assignment, and index alignment conflict with InQL's typed relational planning model. +- **Support `.loc` and `.iloc` initially.** Rejected because both imply an index or positional row model that this RFC intentionally excludes. +- **Use attribute column access such as `data.amount`.** Rejected for the initial surface because it collides with ordinary methods and carrier properties more readily than string-key access. +- **Make `data["name"]` return a Series-like value.** Rejected because it would imply local materialization and row-level value access instead of a backend-neutral scalar expression. + +## Drawbacks + +- Familiar names can create false expectations about pandas parity unless diagnostics and docs are explicit. +- Bracket syntax adds an overloaded authoring surface that tooling must explain carefully. +- Method aliases duplicate parts of the canonical API, increasing documentation and completion surface area. +- Rejecting row indexing is correct for InQL's model but will still surprise some pandas users. +- Compile-time-known projection lists may feel less flexible than pandas during ad hoc exploration, especially for dynamic column selection. + +## Layers affected + +- **InQL specification** — RFCs 000, 001, 003, 005, and 012 must stay coherent with bracket access, relation provenance, scalar expression typing, and alias semantics. +- **InQL library package** — public `.incn` APIs must expose the familiar aliases and tests must cover equivalence to canonical operations where those aliases are library-level methods. +- **Incan compiler** — parser, typechecker, lowering, diagnostics, formatter, and LSP may need dataset-aware bracket handling and source spans for useful errors. +- **Execution / interchange** — Prism, Substrait lowering, sessions, and backend adapters must receive the same logical operators as canonical InQL operations; this RFC must not introduce a separate execution path. +- **Documentation** — reference and explanation docs should present the pandas-familiar API as an exploration facade and document the rejected pandas semantics directly. + +## Unresolved questions + +- Should `assign(...)` require exactly one `(name, expr)` pair initially, or should it also support a list of assignments once projection assignment syntax is settled? +- Should `groupby(...)` accept only strings in the familiar surface, or should it accept every scalar grouping expression accepted by `group_by(...)` from day one? +- Should dynamic runtime `list[str]` projection be permitted for dynamic carriers, and if so what output schema should the typechecker assign? +- Should `merge(...)` be included in the initial alias set, or deferred until join output typing and pandas-style join argument spelling are specified more completely? + + diff --git a/docs/rfcs/README.md b/docs/rfcs/README.md index d83bdfb..7a65567 100644 --- a/docs/rfcs/README.md +++ b/docs/rfcs/README.md @@ -45,6 +45,7 @@ InQL uses its **own** RFC series (starting at 000), independent of the [Incan la | [036][rfc-036] | Draft | Governed plan bundle | | | [037][rfc-037] | Draft | Plan diff and blast-radius inputs | | | [038][rfc-038] | Draft | Evidence export bridges | | +| [039][rfc-039] | Draft | Pandas-familiar exploration API | | | [040][rfc-040] | Draft | Interoperability semantic profiles | | | [041][rfc-041] | Draft | Prism plan ingress and external client frontends | | @@ -97,6 +98,7 @@ New RFCs should follow [TEMPLATE.md] (aligned with Incan’s RFC structure, adap [rfc-036]: 036_governed_plan_bundle.md [rfc-037]: 037_plan_diff_blast_radius_inputs.md [rfc-038]: 038_evidence_export_bridges.md +[rfc-039]: 039_pandas_familiar_exploration_api.md [rfc-040]: 040_interoperability_semantic_profiles.md [rfc-041]: 041_prism_plan_ingress_frontends.md [incan-rfcs]: https://github.com/dannys-code-corner/incan/tree/main/workspaces/docs-site/docs/RFCs From d4afefefe3434d5f1479dd834470465f57f25ae3 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Fri, 5 Jun 2026 17:22:45 +0200 Subject: [PATCH 3/6] docs - link relational evidence RFC trackers --- docs/rfcs/027_relational_evidence_program.md | 4 ++-- docs/rfcs/028_semantic_identity_targets.md | 4 ++-- docs/rfcs/029_metadata_attachments.md | 4 ++-- docs/rfcs/030_prism_lineage_graph.md | 4 ++-- docs/rfcs/031_inspection_artifacts.md | 4 ++-- docs/rfcs/032_execution_observations.md | 4 ++-- docs/rfcs/033_adapter_requirements_coverage.md | 4 ++-- docs/rfcs/034_quality_assertions_observations.md | 4 ++-- docs/rfcs/035_governed_attributes_policy_checkpoints.md | 4 ++-- docs/rfcs/036_governed_plan_bundle.md | 4 ++-- docs/rfcs/037_plan_diff_blast_radius_inputs.md | 4 ++-- docs/rfcs/038_evidence_export_bridges.md | 4 ++-- docs/rfcs/039_pandas_familiar_exploration_api.md | 4 ++-- docs/rfcs/040_interoperability_semantic_profiles.md | 4 ++-- docs/rfcs/041_prism_plan_ingress_frontends.md | 4 ++-- 15 files changed, 30 insertions(+), 30 deletions(-) diff --git a/docs/rfcs/027_relational_evidence_program.md b/docs/rfcs/027_relational_evidence_program.md index d5a04e2..f85a974 100644 --- a/docs/rfcs/027_relational_evidence_program.md +++ b/docs/rfcs/027_relational_evidence_program.md @@ -23,8 +23,8 @@ - InQL RFC 038 (evidence export bridges) - InQL RFC 040 (interoperability semantic profiles) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #61](https://github.com/dannys-code-corner/InQL/issues/61) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/028_semantic_identity_targets.md b/docs/rfcs/028_semantic_identity_targets.md index 88d21db..b52b900 100644 --- a/docs/rfcs/028_semantic_identity_targets.md +++ b/docs/rfcs/028_semantic_identity_targets.md @@ -9,8 +9,8 @@ - InQL RFC 007 (Prism logical planning and optimization engine) - InQL RFC 027 (relational evidence program) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #62](https://github.com/dannys-code-corner/InQL/issues/62) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/029_metadata_attachments.md b/docs/rfcs/029_metadata_attachments.md index 7009740..e49c6cc 100644 --- a/docs/rfcs/029_metadata_attachments.md +++ b/docs/rfcs/029_metadata_attachments.md @@ -7,8 +7,8 @@ - InQL RFC 007 (Prism logical planning and optimization engine) - InQL RFC 027 (relational evidence program) - InQL RFC 028 (semantic identity and target model) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #63](https://github.com/dannys-code-corner/InQL/issues/63) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/030_prism_lineage_graph.md b/docs/rfcs/030_prism_lineage_graph.md index 50509ee..00d41df 100644 --- a/docs/rfcs/030_prism_lineage_graph.md +++ b/docs/rfcs/030_prism_lineage_graph.md @@ -14,8 +14,8 @@ - InQL RFC 027 (relational evidence program) - InQL RFC 028 (semantic identity and target model) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #64](https://github.com/dannys-code-corner/InQL/issues/64) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/031_inspection_artifacts.md b/docs/rfcs/031_inspection_artifacts.md index 96d791b..79ef1a9 100644 --- a/docs/rfcs/031_inspection_artifacts.md +++ b/docs/rfcs/031_inspection_artifacts.md @@ -11,8 +11,8 @@ - InQL RFC 030 (Prism lineage graph) - InQL RFC 040 (interoperability semantic profiles) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #65](https://github.com/dannys-code-corner/InQL/issues/65) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/032_execution_observations.md b/docs/rfcs/032_execution_observations.md index 722d347..555d907 100644 --- a/docs/rfcs/032_execution_observations.md +++ b/docs/rfcs/032_execution_observations.md @@ -9,8 +9,8 @@ - InQL RFC 028 (semantic identity and target model) - InQL RFC 031 (local inspection APIs and artifacts) - InQL RFC 040 (interoperability semantic profiles) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #66](https://github.com/dannys-code-corner/InQL/issues/66) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/033_adapter_requirements_coverage.md b/docs/rfcs/033_adapter_requirements_coverage.md index e665922..813c51f 100644 --- a/docs/rfcs/033_adapter_requirements_coverage.md +++ b/docs/rfcs/033_adapter_requirements_coverage.md @@ -12,8 +12,8 @@ - InQL RFC 032 (execution observations) - InQL RFC 040 (interoperability semantic profiles) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #67](https://github.com/dannys-code-corner/InQL/issues/67) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/034_quality_assertions_observations.md b/docs/rfcs/034_quality_assertions_observations.md index 3dfbbaa..47d1c3d 100644 --- a/docs/rfcs/034_quality_assertions_observations.md +++ b/docs/rfcs/034_quality_assertions_observations.md @@ -11,8 +11,8 @@ - InQL RFC 027 (relational evidence program) - InQL RFC 028 (semantic identity and target model) - InQL RFC 032 (execution observations) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #68](https://github.com/dannys-code-corner/InQL/issues/68) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/035_governed_attributes_policy_checkpoints.md b/docs/rfcs/035_governed_attributes_policy_checkpoints.md index f9210ae..1e51b7f 100644 --- a/docs/rfcs/035_governed_attributes_policy_checkpoints.md +++ b/docs/rfcs/035_governed_attributes_policy_checkpoints.md @@ -10,8 +10,8 @@ - InQL RFC 029 (typed metadata attachments) - InQL RFC 030 (Prism lineage graph) - InQL RFC 033 (adapter requirements and coverage) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #69](https://github.com/dannys-code-corner/InQL/issues/69) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/036_governed_plan_bundle.md b/docs/rfcs/036_governed_plan_bundle.md index c98cf73..9b4128d 100644 --- a/docs/rfcs/036_governed_plan_bundle.md +++ b/docs/rfcs/036_governed_plan_bundle.md @@ -14,8 +14,8 @@ - InQL RFC 035 (governed attributes and policy checkpoints) - InQL RFC 040 (interoperability semantic profiles) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #70](https://github.com/dannys-code-corner/InQL/issues/70) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/037_plan_diff_blast_radius_inputs.md b/docs/rfcs/037_plan_diff_blast_radius_inputs.md index 6bf504a..d596d50 100644 --- a/docs/rfcs/037_plan_diff_blast_radius_inputs.md +++ b/docs/rfcs/037_plan_diff_blast_radius_inputs.md @@ -11,8 +11,8 @@ - InQL RFC 031 (local inspection APIs and artifacts) - InQL RFC 036 (governed plan bundle) - InQL RFC 040 (interoperability semantic profiles) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #71](https://github.com/dannys-code-corner/InQL/issues/71) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/038_evidence_export_bridges.md b/docs/rfcs/038_evidence_export_bridges.md index a926ae8..a322815 100644 --- a/docs/rfcs/038_evidence_export_bridges.md +++ b/docs/rfcs/038_evidence_export_bridges.md @@ -12,8 +12,8 @@ - InQL RFC 032 (execution observations) - InQL RFC 036 (governed plan bundle) - InQL RFC 040 (interoperability semantic profiles) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #72](https://github.com/dannys-code-corner/InQL/issues/72) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/039_pandas_familiar_exploration_api.md b/docs/rfcs/039_pandas_familiar_exploration_api.md index bff1cb4..59c6d7f 100644 --- a/docs/rfcs/039_pandas_familiar_exploration_api.md +++ b/docs/rfcs/039_pandas_familiar_exploration_api.md @@ -9,8 +9,8 @@ - InQL RFC 003 (`query {}` blocks and relational authoring) - InQL RFC 005 (pipe-forward relational syntax) - InQL RFC 012 (unified scalar expression surface) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #73](https://github.com/dannys-code-corner/InQL/issues/73) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/040_interoperability_semantic_profiles.md b/docs/rfcs/040_interoperability_semantic_profiles.md index 95e50a5..38a4cd6 100644 --- a/docs/rfcs/040_interoperability_semantic_profiles.md +++ b/docs/rfcs/040_interoperability_semantic_profiles.md @@ -22,8 +22,8 @@ - InQL RFC 036 (governed plan bundle) - InQL RFC 038 (evidence export bridges) - InQL RFC 041 (Prism plan ingress and external client frontends) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #74](https://github.com/dannys-code-corner/InQL/issues/74) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — diff --git a/docs/rfcs/041_prism_plan_ingress_frontends.md b/docs/rfcs/041_prism_plan_ingress_frontends.md index 4310101..47290b1 100644 --- a/docs/rfcs/041_prism_plan_ingress_frontends.md +++ b/docs/rfcs/041_prism_plan_ingress_frontends.md @@ -16,8 +16,8 @@ - InQL RFC 031 (local inspection APIs and artifacts) - InQL RFC 033 (adapter requirements and coverage) - InQL RFC 040 (interoperability semantic profiles) -- **Issue:** — -- **RFC PR:** — +- **Issue:** [InQL #75](https://github.com/dannys-code-corner/InQL/issues/75) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) - **Written against:** Incan v0.3-era InQL - **Shipped in:** — From d12c0eaa89d3057da6ed4259100088da6be7c2ad Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Fri, 5 Jun 2026 17:48:19 +0200 Subject: [PATCH 4/6] docs - broaden evidence exchange RFC scope --- docs/rfcs/027_relational_evidence_program.md | 28 +++- docs/rfcs/032_execution_observations.md | 2 +- docs/rfcs/036_governed_plan_bundle.md | 2 +- docs/rfcs/038_evidence_exchange_bridges.md | 133 ++++++++++++++++++ docs/rfcs/038_evidence_export_bridges.md | 110 --------------- .../040_interoperability_semantic_profiles.md | 16 ++- docs/rfcs/README.md | 4 +- 7 files changed, 170 insertions(+), 125 deletions(-) create mode 100644 docs/rfcs/038_evidence_exchange_bridges.md delete mode 100644 docs/rfcs/038_evidence_export_bridges.md diff --git a/docs/rfcs/027_relational_evidence_program.md b/docs/rfcs/027_relational_evidence_program.md index f85a974..ae624d6 100644 --- a/docs/rfcs/027_relational_evidence_program.md +++ b/docs/rfcs/027_relational_evidence_program.md @@ -20,7 +20,7 @@ - InQL RFC 035 (governed attributes and policy checkpoints) - InQL RFC 036 (governed plan bundle) - InQL RFC 037 (plan diff and blast-radius inputs) - - InQL RFC 038 (evidence export bridges) + - InQL RFC 038 (evidence exchange bridges) - InQL RFC 040 (interoperability semantic profiles) - InQL RFC 041 (Prism plan ingress and external client frontends) - **Issue:** [InQL #61](https://github.com/dannys-code-corner/InQL/issues/61) @@ -30,7 +30,7 @@ ## Summary -This RFC is the umbrella tracking RFC for InQL's relational evidence program. The program defines the local, open semantic evidence contracts that make typed relational computation inspectable before execution and reviewable after execution: stable semantic targets, metadata attachments, Prism lineage, inspection artifacts, execution observations, adapter coverage, quality observations, governed attributes, plan bundles, plan diffs, export bridges, interoperability semantic profiles, and Prism plan ingress. This RFC is complete only when the child RFCs are implemented, rejected, or explicitly superseded by design decision. +This RFC is the umbrella tracking RFC for InQL's relational evidence program. The program defines the local, open semantic evidence contracts that make typed relational computation inspectable before execution and reviewable after execution: stable semantic targets, metadata attachments, Prism lineage, inspection artifacts, execution observations, adapter coverage, quality observations, governed attributes, plan bundles, plan diffs, evidence exchange bridges, interoperability semantic profiles, and Prism plan ingress. This RFC is complete only when the child RFCs are implemented, rejected, or explicitly superseded by design decision. ## Core model @@ -51,7 +51,7 @@ Without this program, lineage, governance, quality, observability, and change-im ## Goals - Establish relational evidence as one coordinated InQL program. -- Define the child RFC set required for semantic identity, lineage, inspection, observations, coverage, quality, governed attributes, plan bundles, plan diffs, exports, and interoperability profiles. +- Define the child RFC set required for semantic identity, lineage, inspection, observations, coverage, quality, governed attributes, plan bundles, plan diffs, evidence exchange, and interoperability profiles. - Keep the program open, local, and backend-neutral. - Make Prism-authored relational meaning the source of local lineage and schema-flow evidence. - Define target-environment profile evidence without making any external engine, dialect, or interchange format the semantic owner. @@ -65,7 +65,7 @@ Without this program, lineage, governance, quality, observability, and change-im - Defining pipeline orchestration, scheduling, retries, checkpointing, or cross-step lifecycle state. - Making Substrait extension metadata the authoritative evidence store. - Making a specific backend adapter the semantic owner of lineage, quality, policy, or coverage. -- Defining every external export mapping directly in this umbrella RFC. +- Defining every external artifact exchange mapping directly in this umbrella RFC. ## Guide-level explanation (how authors think about it) @@ -87,6 +87,22 @@ total = lineage.field("total_amount") The exact API is defined in the child RFCs. The important user model is stable: InQL can explain typed relational computation locally, before a backend runs it and without requiring an external governance service. +The same evidence model should also support migration and modernization workbenches. A tool can ingest source-system metadata, target-environment profiles, transformation project artifacts, catalog metadata, and orchestration metadata; attach them to InQL semantic targets; assess compatibility gaps; and export reviewable suggestions back into the transformation stack: + +```incan +brief = migration_evidence_brief( + source_profile="legacy_sql", + target_profile="cloud_analytics", + transformation_project="analytics_project", +) + +inspection = inspect_migration(brief) +risk = inspection.profile_gaps() +suggestions = inspection.export_transformation_suggestions() +``` + +The names are illustrative. The important boundary is not the exact migration stack. InQL owns semantic targets, profile assessments, lineage, and evidence; external projects, catalogs, and orchestrators remain consumers or evidence sources. + ## Reference-level explanation (precise rules) The relational evidence program must consist of the following child RFCs unless this RFC is amended or superseded: @@ -101,13 +117,13 @@ The relational evidence program must consist of the following child RFCs unless - InQL RFC 035 (governed attributes and policy checkpoints) - InQL RFC 036 (governed plan bundle) - InQL RFC 037 (plan diff and blast-radius inputs) -- InQL RFC 038 (evidence export bridges) +- InQL RFC 038 (evidence exchange bridges) - InQL RFC 040 (interoperability semantic profiles) - InQL RFC 041 (Prism plan ingress and external client frontends) This umbrella RFC must not be marked Implemented while any required child RFC remains Draft, Planned, In Progress, Blocked, or otherwise unresolved. A child RFC may be removed from the required completion set only by a design decision recorded in this RFC or by a superseding RFC. -Child RFCs must preserve the layer boundary established by this RFC. They may define local InQL evidence contracts and generic export shapes. They must not define proprietary product behavior, hosted storage behavior, managed approval semantics, or organization-wide policy lifecycle rules. +Child RFCs must preserve the layer boundary established by this RFC. They may define local InQL evidence contracts and generic exchange shapes. They must not define proprietary product behavior, hosted storage behavior, managed approval semantics, or organization-wide policy lifecycle rules. Relational evidence must derive from InQL semantic sources where possible. Prism-authored and Prism-rewritten plans are the authoritative source for local relational lineage. Session and backend adapter observations may report execution facts, diagnostics, and capability coverage, but they must not decide that an authored lineage edge exists or does not exist. diff --git a/docs/rfcs/032_execution_observations.md b/docs/rfcs/032_execution_observations.md index 555d907..bd1f3a6 100644 --- a/docs/rfcs/032_execution_observations.md +++ b/docs/rfcs/032_execution_observations.md @@ -75,7 +75,7 @@ Execution observations are runtime evidence. They describe an attempt to execute ### Interaction with other InQL surfaces -Quality observations, adapter coverage records, semantic profile records, and export bridges may refer to execution observations. Pipeline layers may consume them, but orchestration behavior remains outside this RFC. +Quality observations, adapter coverage records, semantic profile records, and evidence exchange bridges may refer to execution observations. Pipeline layers may consume them, but orchestration behavior remains outside this RFC. ### Compatibility / migration diff --git a/docs/rfcs/036_governed_plan_bundle.md b/docs/rfcs/036_governed_plan_bundle.md index 9b4128d..f257261 100644 --- a/docs/rfcs/036_governed_plan_bundle.md +++ b/docs/rfcs/036_governed_plan_bundle.md @@ -94,7 +94,7 @@ The bundle is an evidence package. It does not make policy decisions by itself. ### Interaction with other InQL surfaces -Inspection artifacts, execution observations, quality observations, ingress frontends, and export bridges may all read from or write to bundle-compatible records. +Inspection artifacts, execution observations, quality observations, ingress frontends, and evidence exchange bridges may all read from or write to bundle-compatible records. ### Compatibility / migration diff --git a/docs/rfcs/038_evidence_exchange_bridges.md b/docs/rfcs/038_evidence_exchange_bridges.md new file mode 100644 index 0000000..d1662d4 --- /dev/null +++ b/docs/rfcs/038_evidence_exchange_bridges.md @@ -0,0 +1,133 @@ +# InQL RFC 038: Evidence exchange bridges + +- **Status:** Draft +- **Created:** 2026-05-29 +- **Author(s):** Danny Meijer (@dannymeijer) +- **Related:** + - InQL RFC 002 (Apache Substrait integration) + - InQL RFC 027 (relational evidence program) + - InQL RFC 029 (typed metadata attachments) + - InQL RFC 030 (Prism lineage graph) + - InQL RFC 031 (local inspection APIs and artifacts) + - InQL RFC 032 (execution observations) + - InQL RFC 036 (governed plan bundle) + - InQL RFC 040 (interoperability semantic profiles) +- **Issue:** [InQL #72](https://github.com/dannys-code-corner/InQL/issues/72) +- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) +- **Written against:** Incan v0.3-era InQL +- **Shipped in:** — + +## Summary + +This RFC defines evidence exchange bridges between InQL's internal evidence model and external or adjacent formats. Exchange bridges map InQL plan, lineage, schema-flow, execution, quality, coverage, semantic profile, and bundle records into downstream views such as OpenLineage events, telemetry signals, semantic inspection fragments, transformation-project artifacts, and catalog/governance integration artifacts. They may also ingest external evidence artifacts such as transformation manifests, source catalogs, schema catalogs, run results, and orchestration metadata. Inbound artifacts and outbound projections are evidence exchange records, not the internal source of truth. + +## Motivation + +InQL evidence should be useful outside InQL, and external project artifacts should be usable as evidence inputs when they are explicit about their source and scope. CI systems, lineage tools, telemetry pipelines, catalogs, notebooks, transformation frameworks, orchestrators, and agents may all consume or produce different formats. If each integration reconstructs evidence independently, semantics will drift. InQL should provide exchange bridges that preserve its local evidence model while acknowledging that external formats may be less expressive or may represent facts at a different semantic layer. + +## Goals + +- Define exchange bridges as inbound and outbound mappings around InQL evidence. +- Preserve semantic target references and evidence versions where possible. +- Allow lossy external mappings only when loss is explicit. +- Allow external artifacts to seed metadata, lineage hints, quality observations, run observations, and target mappings without becoming authoritative InQL semantics. +- Support transformation-framework artifacts such as manifests, catalogs, run results, source definitions, model metadata, tests, tags, and documentation scaffolds. +- Keep provider configuration and hosted ingestion outside InQL core. +- Support local exchange without requiring a specific external service. + +## Non-Goals + +- Making any external format the internal InQL evidence model. +- Defining hosted ingestion, storage, dashboards, or managed governance behavior. +- Defining a telemetry provider, collector, exporter, or sampling policy. +- Guaranteeing that every external tool can represent every InQL evidence feature. +- Guaranteeing that imported transformation, catalog, or orchestration artifacts are complete or semantically authoritative. +- Defining a full migration product, transformation runtime, or orchestration engine. + +## Guide-level explanation (how authors think about it) + +An author or CI job can exchange evidence with local artifacts: + +```incan +bundle = governed_plan_bundle(summary) +bundle.export_openlineage("target/inql/openlineage.json") +bundle.export_telemetry("target/inql/telemetry.json") +``` + +The names are illustrative. The key contract is that outbound exports are generated from InQL evidence artifacts, not from backend logs or reconstructed SQL strings. + +For transformation-project workflows, an exchange bridge can also ingest project artifacts and emit reviewable suggestions: + +```incan +project = transformation_project_artifacts("analytics_project/") +bundle = governed_plan_bundle(summary, evidence=[project]) + +sources = bundle.export_transformation_sources() +tests = bundle.export_transformation_quality_suggestions() +``` + +The bridge may read common artifacts such as manifests, catalogs, run results, source definitions, tests, tags, metadata, and documentation fragments. It may emit suggested source declarations, model metadata, test definitions, tags, exposures, or documentation scaffolds. Those suggestions remain projections from InQL evidence and imported artifact evidence; they do not make the transformation framework the semantic owner of the plan. + +## Reference-level explanation (precise rules) + +An exchange bridge must declare its direction, source evidence schema versions, target format, target format version when available, mapping coverage, unsupported fields, redaction behavior, and diagnostics. + +Outbound exchange bridges must preserve semantic target identifiers when the target format can carry them. When the target format cannot carry them directly, the bridge should preserve them in an extension, custom facet, attribute, or sidecar artifact when safe. + +Inbound exchange bridges must preserve external artifact identity, source location, artifact version, and confidence. Imported records may attach metadata, origin hints, observed run facts, quality observations, or candidate mappings to InQL semantic targets. They must not create InQL lineage, policy decisions, quality pass/fail states, or adapter coverage unless the corresponding InQL evidence contract can represent and validate that evidence. + +Lossy mappings must be explicit. If an external lineage format cannot distinguish value, control, grouping, join, and sort lineage, the bridge must either preserve the distinction through an extension or report the loss. If an imported artifact collapses source relation, model, test, and run-result semantics into one node vocabulary, the bridge must report that limitation instead of pretending the artifact has InQL target precision. + +Sensitive attachments must follow visibility rules. Exchange bridges must not leak sensitive payloads merely because a target format lacks redaction semantics. + +Provider configuration, authentication, network transport, sampling, hosted ingestion, and storage are outside this RFC. + +## Design details + +### Syntax + +This RFC introduces no authoring syntax. + +### Semantics + +Outbound exports are projections. Inbound artifacts are evidence inputs. Neither direction may become the authoritative source of InQL plan, lineage, quality, or execution semantics. + +### Interaction with other InQL surfaces + +Exchange bridges depend on inspection artifacts, execution observations, quality observations, adapter coverage, interoperability profiles, and governed plan bundles. They should map from or into those records rather than from backend-specific plans. + +Transformation-framework bridges are a first-class example. A bridge may ingest manifest, catalog, run-result, source, model, test, tag, exposure, metadata, and documentation artifacts. It may export suggested source definitions, model metadata, quality tests, documentation scaffolds, exposures, tags, or run validation summaries. The bridge must keep imported project semantics distinct from Prism-authored semantics and must identify any profile assumptions used to compare source and target environments. + +### Compatibility / migration + +Exchange bridges must version their mappings. Adding a new internal evidence field should not silently change external semantics without a mapping version change or documented behavior. Imported artifact schemas must be versioned or fingerprinted where possible so stale or incompatible artifacts can be diagnosed. + +## Alternatives considered + +- **Adopt one external lineage model internally.** Rejected because InQL needs evidence that many external tools cannot represent directly. +- **Leave all exchange to downstream systems.** Rejected because independent reconstruction causes drift. +- **Require hosted ingestion.** Rejected because local export must work in open InQL. +- **Treat transformation project artifacts as authoritative semantics.** Rejected because those artifacts are valuable evidence, but they are not Prism's analyzed relational model. + +## Drawbacks + +- Exchange bridges require maintenance as external formats evolve. +- Some mappings will be lossy or require extensions. +- Redaction rules can make exports harder to debug. +- Inbound artifact support can be mistaken for semantic endorsement unless confidence, source, and target mapping diagnostics are explicit. + +## Layers affected + +- **InQL specification** — exchange bridge responsibilities and loss reporting become normative. +- **InQL library package** — exchange APIs may live in core or optional modules. +- **Execution / interchange** — exchanges may include Substrait references, telemetry-shaped observations, lineage events, transformation artifacts, and run-result evidence. +- **Documentation** — docs must identify external exchanges as evidence inputs or projections, not internal truth. + +## Unresolved questions + +- Which exchange bridge should be implemented first? +- Should exchange bridges live in the core package or optional integration packages? +- What sidecar format should preserve InQL-specific evidence when an external target is lossy? +- Which transformation-project artifacts should be supported in the first bridge slice? + + diff --git a/docs/rfcs/038_evidence_export_bridges.md b/docs/rfcs/038_evidence_export_bridges.md deleted file mode 100644 index a322815..0000000 --- a/docs/rfcs/038_evidence_export_bridges.md +++ /dev/null @@ -1,110 +0,0 @@ -# InQL RFC 038: Evidence export bridges - -- **Status:** Draft -- **Created:** 2026-05-29 -- **Author(s):** Danny Meijer (@dannymeijer) -- **Related:** - - InQL RFC 002 (Apache Substrait integration) - - InQL RFC 027 (relational evidence program) - - InQL RFC 029 (typed metadata attachments) - - InQL RFC 030 (Prism lineage graph) - - InQL RFC 031 (local inspection APIs and artifacts) - - InQL RFC 032 (execution observations) - - InQL RFC 036 (governed plan bundle) - - InQL RFC 040 (interoperability semantic profiles) -- **Issue:** [InQL #72](https://github.com/dannys-code-corner/InQL/issues/72) -- **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) -- **Written against:** Incan v0.3-era InQL -- **Shipped in:** — - -## Summary - -This RFC defines evidence export bridges from InQL's internal evidence model to external and adjacent formats. Export bridges map InQL plan, lineage, schema-flow, execution, quality, coverage, semantic profile, and bundle records into downstream views such as OpenLineage events, telemetry signals, semantic inspection fragments, and catalog/governance integration artifacts. Exports are projections from InQL evidence, not the internal source of truth. - -## Motivation - -InQL evidence should be useful outside InQL. CI systems, lineage tools, telemetry pipelines, catalogs, notebooks, and agents may all consume different formats. If each integration reconstructs evidence independently, semantics will drift. InQL should provide export bridges that preserve its local evidence model while acknowledging that external formats may be less expressive. - -## Goals - -- Define export bridges as downstream projections from InQL evidence. -- Preserve semantic target references and evidence versions where possible. -- Allow lossy external mappings only when loss is explicit. -- Keep provider configuration and hosted ingestion outside InQL core. -- Support local export without requiring a specific external service. - -## Non-Goals - -- Making any external format the internal InQL evidence model. -- Defining hosted ingestion, storage, dashboards, or managed governance behavior. -- Defining a telemetry provider, collector, exporter, or sampling policy. -- Guaranteeing that every external tool can represent every InQL evidence feature. - -## Guide-level explanation (how authors think about it) - -An author or CI job can export evidence from local artifacts: - -```incan -bundle = governed_plan_bundle(summary) -bundle.export_openlineage("target/inql/openlineage.json") -bundle.export_telemetry("target/inql/telemetry.json") -``` - -The names are illustrative. The key contract is that exports are generated from InQL evidence artifacts, not from backend logs or reconstructed SQL strings. - -## Reference-level explanation (precise rules) - -An export bridge must declare its source evidence schema versions, target format, target format version when available, mapping coverage, unsupported fields, redaction behavior, and diagnostics. - -Export bridges must preserve semantic target identifiers when the target format can carry them. When the target format cannot carry them directly, the bridge should preserve them in an extension, custom facet, attribute, or sidecar artifact when safe. - -Lossy mappings must be explicit. If an external lineage format cannot distinguish value, control, grouping, join, and sort lineage, the export must either preserve the distinction through an extension or report the loss. - -Sensitive attachments must follow visibility rules. Export bridges must not leak sensitive payloads merely because a target format lacks redaction semantics. - -Provider configuration, authentication, network transport, sampling, hosted ingestion, and storage are outside this RFC. - -## Design details - -### Syntax - -This RFC introduces no authoring syntax. - -### Semantics - -Exports are projections. They must not become the authoritative source of InQL plan, lineage, quality, or execution semantics. - -### Interaction with other InQL surfaces - -Export bridges depend on inspection artifacts, execution observations, quality observations, adapter coverage, and governed plan bundles. They should map from those records rather than from backend-specific plans. - -### Compatibility / migration - -Export bridges must version their mappings. Adding a new internal evidence field should not silently change external semantics without a mapping version change or documented behavior. - -## Alternatives considered - -- **Adopt one external lineage model internally.** Rejected because InQL needs evidence that many external tools cannot represent directly. -- **Leave all exports to downstream systems.** Rejected because independent reconstruction causes drift. -- **Require hosted ingestion.** Rejected because local export must work in open InQL. - -## Drawbacks - -- Export bridges require maintenance as external formats evolve. -- Some mappings will be lossy or require extensions. -- Redaction rules can make exports harder to debug. - -## Layers affected - -- **InQL specification** — export bridge responsibilities and loss reporting become normative. -- **InQL library package** — export APIs may live in core or optional modules. -- **Execution / interchange** — exports may include Substrait references, telemetry-shaped observations, and lineage events. -- **Documentation** — docs must identify external exports as projections, not internal truth. - -## Unresolved questions - -- Which export bridge should be implemented first? -- Should export bridges live in the core package or optional integration packages? -- What sidecar format should preserve InQL-specific evidence when an external target is lossy? - - diff --git a/docs/rfcs/040_interoperability_semantic_profiles.md b/docs/rfcs/040_interoperability_semantic_profiles.md index 38a4cd6..fdc1b9e 100644 --- a/docs/rfcs/040_interoperability_semantic_profiles.md +++ b/docs/rfcs/040_interoperability_semantic_profiles.md @@ -20,7 +20,7 @@ - InQL RFC 032 (execution observations) - InQL RFC 033 (adapter requirements and coverage) - InQL RFC 036 (governed plan bundle) - - InQL RFC 038 (evidence export bridges) + - InQL RFC 038 (evidence exchange bridges) - InQL RFC 041 (Prism plan ingress and external client frontends) - **Issue:** [InQL #74](https://github.com/dannys-code-corner/InQL/issues/74) - **RFC PR:** [InQL #60](https://github.com/dannys-code-corner/InQL/pull/60) @@ -29,7 +29,7 @@ ## Summary -This RFC defines interoperability semantic profiles for InQL evidence. A profile describes the semantic environment a plan is being received from, compared with, targeted at, or observed under: an InQL baseline, client protocol, plan ingress frontend, execution engine, adapter binding, SQL dialect, interchange consumer, or conformance baseline. Profiles give ingress coverage records, adapter requirements, coverage records, execution observations, plan diffs, bundles, and exports a shared context without making any external system the owner of InQL relational meaning. +This RFC defines interoperability semantic profiles for InQL evidence. A profile describes the semantic environment a plan is being received from, compared with, targeted at, or observed under: an InQL baseline, client protocol, plan ingress frontend, execution engine, adapter binding, SQL dialect, catalog/schema system, transformation project, interchange consumer, or conformance baseline. Profiles give ingress coverage records, adapter requirements, coverage records, execution observations, plan diffs, bundles, and exchanges a shared context without making any external system the owner of InQL relational meaning. ## Motivation @@ -42,7 +42,7 @@ Profiles provide the missing layer between InQL-authored semantics, plan ingress ## Goals - Define semantic profiles as versioned evidence records. -- Allow profiles for InQL baselines, client protocols, plan ingress frontends, execution engines, adapter bindings, SQL dialects, interchange consumers, and conformance baselines. +- Allow profiles for InQL baselines, client protocols, plan ingress frontends, execution engines, adapter bindings, SQL dialects, catalog/schema systems, transformation projects, interchange consumers, and conformance baselines. - Name the semantic dimensions that affect relational correctness and evidence interpretation. - Let adapter requirements and coverage records state which profile they were evaluated against. - Let execution observations report the profile requested before execution and the profile observed at runtime when available. @@ -54,6 +54,7 @@ Profiles provide the missing layer between InQL-authored semantics, plan ingress - Defining a profile for one specific external engine. - Making any external engine, SQL dialect, or interchange format the normative InQL semantic model. - Defining SQL transpilation, physical planning, or backend execution strategies. +- Defining transformation-project semantics as InQL semantics. - Defining a full conformance test suite. - Defining a global registry of every engine version or deployment configuration. - Guaranteeing semantic equivalence merely because a profile name is present. @@ -109,6 +110,8 @@ Target class must distinguish at least: - execution_engine - adapter_binding - sql_dialect +- catalog_schema_system +- transformation_project - interchange_consumer - conformance_baseline @@ -120,6 +123,8 @@ Semantic dimensions must be represented as structured records rather than free-f - boolean, null, and NaN semantics - string comparison, collation, and case sensitivity - identifier resolution and catalog naming +- schema catalog, partition, and external table metadata semantics +- transformation project selection, materialization, test, and metadata semantics - client session state and configuration semantics - relation ordering and determinism - aggregate and grouping edge semantics @@ -175,7 +180,7 @@ Adapter coverage records should cite the profile used for evaluation when the an Governed plan bundles may include profile records and profile assessments so downstream tools can understand which target environments were checked. -Export bridges may project profile evidence into external formats. Lossy exports must report dimensions that could not be represented. +Evidence exchange bridges may project profile evidence into external formats or ingest external project artifacts with profile context. Lossy exports and lossy imports must report dimensions that could not be represented. ### Compatibility / migration @@ -189,7 +194,7 @@ Profile schemas must be versioned from the start. Profile names that appear in s - **Use Substrait as the profile model.** Rejected because Substrait is an interchange boundary and does not capture every InQL evidence dimension. - **Make one external engine profile normative.** Rejected because InQL needs to interoperate with multiple targets without importing one target's semantics as the language definition. - **Rely only on conformance tests.** Rejected because tests are valuable evidence but do not replace structured profile records, coverage states, or diagnostics. -- **Leave profiles to downstream integrations.** Rejected because independent profile reconstruction would cause drift across adapters, CI, notebooks, agents, and governance exports. +- **Leave profiles to downstream integrations.** Rejected because independent profile reconstruction would cause drift across adapters, CI, notebooks, agents, transformation projects, and governance exchanges. ## Drawbacks @@ -211,5 +216,6 @@ Profile schemas must be versioned from the start. Profile names that appear in s - Should built-in InQL profiles live in core or in optional integration packages? - How should profile records compare target configurations without leaking sensitive deployment details? - Should conformance test results become profile evidence in this RFC or a later RFC? +- Which transformation-project profile dimensions are needed before exchange bridges can safely emit test and metadata suggestions? diff --git a/docs/rfcs/README.md b/docs/rfcs/README.md index 7a65567..8aada9b 100644 --- a/docs/rfcs/README.md +++ b/docs/rfcs/README.md @@ -44,7 +44,7 @@ InQL uses its **own** RFC series (starting at 000), independent of the [Incan la | [035][rfc-035] | Draft | Governed attributes and policy checkpoints | | | [036][rfc-036] | Draft | Governed plan bundle | | | [037][rfc-037] | Draft | Plan diff and blast-radius inputs | | -| [038][rfc-038] | Draft | Evidence export bridges | | +| [038][rfc-038] | Draft | Evidence exchange bridges | | | [039][rfc-039] | Draft | Pandas-familiar exploration API | | | [040][rfc-040] | Draft | Interoperability semantic profiles | | | [041][rfc-041] | Draft | Prism plan ingress and external client frontends | | @@ -97,7 +97,7 @@ New RFCs should follow [TEMPLATE.md] (aligned with Incan’s RFC structure, adap [rfc-035]: 035_governed_attributes_policy_checkpoints.md [rfc-036]: 036_governed_plan_bundle.md [rfc-037]: 037_plan_diff_blast_radius_inputs.md -[rfc-038]: 038_evidence_export_bridges.md +[rfc-038]: 038_evidence_exchange_bridges.md [rfc-039]: 039_pandas_familiar_exploration_api.md [rfc-040]: 040_interoperability_semantic_profiles.md [rfc-041]: 041_prism_plan_ingress_frontends.md From 8465b4c03897664529869709ea2ed7f7109b5653 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Fri, 5 Jun 2026 17:51:36 +0200 Subject: [PATCH 5/6] docs - add ecosystem examples to evidence RFCs --- docs/rfcs/027_relational_evidence_program.md | 2 +- docs/rfcs/038_evidence_exchange_bridges.md | 10 +++++----- docs/rfcs/040_interoperability_semantic_profiles.md | 4 ++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/rfcs/027_relational_evidence_program.md b/docs/rfcs/027_relational_evidence_program.md index ae624d6..9290b85 100644 --- a/docs/rfcs/027_relational_evidence_program.md +++ b/docs/rfcs/027_relational_evidence_program.md @@ -87,7 +87,7 @@ total = lineage.field("total_amount") The exact API is defined in the child RFCs. The important user model is stable: InQL can explain typed relational computation locally, before a backend runs it and without requiring an external governance service. -The same evidence model should also support migration and modernization workbenches. A tool can ingest source-system metadata, target-environment profiles, transformation project artifacts, catalog metadata, and orchestration metadata; attach them to InQL semantic targets; assess compatibility gaps; and export reviewable suggestions back into the transformation stack: +The same evidence model should also support migration and modernization workbenches. A tool can ingest source-system metadata, target-environment profiles, transformation project artifacts, catalog metadata, and orchestration metadata; attach them to InQL semantic targets; assess compatibility gaps; and export reviewable suggestions back into the transformation stack. Representative ecosystems include legacy and operational SQL systems such as Oracle, PostgreSQL, SQL Server, and MySQL; cloud and lakehouse targets such as Athena, Presto, Trino, Spark, Snowflake, BigQuery, Redshift, and Databricks; catalogs such as Glue Data Catalog and Hive Metastore; transformation projects such as dbt; and orchestrators such as Airflow, MWAA, Dagster, and Prefect: ```incan brief = migration_evidence_brief( diff --git a/docs/rfcs/038_evidence_exchange_bridges.md b/docs/rfcs/038_evidence_exchange_bridges.md index d1662d4..8e340d8 100644 --- a/docs/rfcs/038_evidence_exchange_bridges.md +++ b/docs/rfcs/038_evidence_exchange_bridges.md @@ -19,11 +19,11 @@ ## Summary -This RFC defines evidence exchange bridges between InQL's internal evidence model and external or adjacent formats. Exchange bridges map InQL plan, lineage, schema-flow, execution, quality, coverage, semantic profile, and bundle records into downstream views such as OpenLineage events, telemetry signals, semantic inspection fragments, transformation-project artifacts, and catalog/governance integration artifacts. They may also ingest external evidence artifacts such as transformation manifests, source catalogs, schema catalogs, run results, and orchestration metadata. Inbound artifacts and outbound projections are evidence exchange records, not the internal source of truth. +This RFC defines evidence exchange bridges between InQL's internal evidence model and external or adjacent formats. Exchange bridges map InQL plan, lineage, schema-flow, execution, quality, coverage, semantic profile, and bundle records into downstream views such as OpenLineage events, telemetry signals, semantic inspection fragments, transformation-project artifacts, and catalog/governance integration artifacts. They may also ingest external evidence artifacts such as transformation manifests, source catalogs, schema catalogs, run results, and orchestration metadata. Representative artifact families include dbt manifests and run results, Glue Data Catalog or Hive Metastore snapshots, Airflow or MWAA DAG metadata, Dagster assets, Prefect deployment metadata, OpenLineage events, DataHub or OpenMetadata catalog records, and Great Expectations-style quality results. Inbound artifacts and outbound projections are evidence exchange records, not the internal source of truth. ## Motivation -InQL evidence should be useful outside InQL, and external project artifacts should be usable as evidence inputs when they are explicit about their source and scope. CI systems, lineage tools, telemetry pipelines, catalogs, notebooks, transformation frameworks, orchestrators, and agents may all consume or produce different formats. If each integration reconstructs evidence independently, semantics will drift. InQL should provide exchange bridges that preserve its local evidence model while acknowledging that external formats may be less expressive or may represent facts at a different semantic layer. +InQL evidence should be useful outside InQL, and external project artifacts should be usable as evidence inputs when they are explicit about their source and scope. CI systems, lineage tools, telemetry pipelines, catalogs, notebooks, transformation frameworks, orchestrators, and agents may all consume or produce different formats. Systems such as dbt, Airflow, MWAA, Dagster, Prefect, Glue Data Catalog, Hive Metastore, DataHub, OpenMetadata, OpenLineage, and Great Expectations are useful ecosystem examples, but none of them should become InQL's internal evidence model. If each integration reconstructs evidence independently, semantics will drift. InQL should provide exchange bridges that preserve its local evidence model while acknowledging that external formats may be less expressive or may represent facts at a different semantic layer. ## Goals @@ -31,7 +31,7 @@ InQL evidence should be useful outside InQL, and external project artifacts shou - Preserve semantic target references and evidence versions where possible. - Allow lossy external mappings only when loss is explicit. - Allow external artifacts to seed metadata, lineage hints, quality observations, run observations, and target mappings without becoming authoritative InQL semantics. -- Support transformation-framework artifacts such as manifests, catalogs, run results, source definitions, model metadata, tests, tags, and documentation scaffolds. +- Support transformation-framework artifacts such as manifests, catalogs, run results, source definitions, model metadata, tests, tags, and documentation scaffolds, including dbt-shaped artifacts where a bridge supports that profile. - Keep provider configuration and hosted ingestion outside InQL core. - Support local exchange without requiring a specific external service. @@ -66,7 +66,7 @@ sources = bundle.export_transformation_sources() tests = bundle.export_transformation_quality_suggestions() ``` -The bridge may read common artifacts such as manifests, catalogs, run results, source definitions, tests, tags, metadata, and documentation fragments. It may emit suggested source declarations, model metadata, test definitions, tags, exposures, or documentation scaffolds. Those suggestions remain projections from InQL evidence and imported artifact evidence; they do not make the transformation framework the semantic owner of the plan. +The bridge may read common artifacts such as manifests, catalogs, run results, source definitions, tests, tags, metadata, and documentation fragments. In a dbt-shaped bridge, for example, those inputs may include `manifest.json`, `catalog.json`, `run_results.json`, source YAML, model YAML, tags, exposures, tests, and documentation blocks. It may emit suggested source declarations, model metadata, test definitions, tags, exposures, or documentation scaffolds. Those suggestions remain projections from InQL evidence and imported artifact evidence; they do not make the transformation framework the semantic owner of the plan. ## Reference-level explanation (precise rules) @@ -96,7 +96,7 @@ Outbound exports are projections. Inbound artifacts are evidence inputs. Neither Exchange bridges depend on inspection artifacts, execution observations, quality observations, adapter coverage, interoperability profiles, and governed plan bundles. They should map from or into those records rather than from backend-specific plans. -Transformation-framework bridges are a first-class example. A bridge may ingest manifest, catalog, run-result, source, model, test, tag, exposure, metadata, and documentation artifacts. It may export suggested source definitions, model metadata, quality tests, documentation scaffolds, exposures, tags, or run validation summaries. The bridge must keep imported project semantics distinct from Prism-authored semantics and must identify any profile assumptions used to compare source and target environments. +Transformation-framework bridges are a first-class example. A bridge may ingest manifest, catalog, run-result, source, model, test, tag, exposure, metadata, and documentation artifacts from systems such as dbt, Airflow, MWAA, Dagster, or Prefect when the bridge profile supports them. It may export suggested source definitions, model metadata, quality tests, documentation scaffolds, exposures, tags, or run validation summaries. The bridge must keep imported project semantics distinct from Prism-authored semantics and must identify any profile assumptions used to compare source and target environments. ### Compatibility / migration diff --git a/docs/rfcs/040_interoperability_semantic_profiles.md b/docs/rfcs/040_interoperability_semantic_profiles.md index fdc1b9e..fc52939 100644 --- a/docs/rfcs/040_interoperability_semantic_profiles.md +++ b/docs/rfcs/040_interoperability_semantic_profiles.md @@ -39,6 +39,8 @@ If InQL does not name the semantic profile used for an inspection or execution, Profiles provide the missing layer between InQL-authored semantics, plan ingress, and adapter coverage. Prism remains the source of authored and rewritten relational meaning. Profiles describe source and target environments well enough for InQL to produce ingress diagnostics, requirements, coverage records, and observations against them. +Profiles are intentionally ecosystem-neutral, but concrete profiles may describe systems and formats such as Oracle, PostgreSQL, SQL Server, MySQL, Athena, Presto, Trino, Spark, Snowflake, BigQuery, Redshift, Databricks, Glue Data Catalog, Hive Metastore, dbt, Airflow, MWAA, Dagster, Prefect, OpenLineage, DataHub, OpenMetadata, or Great Expectations. Listing a system as a possible profile target does not make that system normative for InQL semantics. + ## Goals - Define semantic profiles as versioned evidence records. @@ -115,6 +117,8 @@ Target class must distinguish at least: - interchange_consumer - conformance_baseline +Concrete profile families may be narrower than target class names. For example, a `sql_dialect` target class may include Oracle, PostgreSQL, SQL Server, or MySQL profiles; an `execution_engine` target class may include Athena, Presto, Trino, Spark, Snowflake, BigQuery, Redshift, or Databricks profiles; a `catalog_schema_system` target class may include Glue Data Catalog or Hive Metastore profiles; and a `transformation_project` target class may include dbt-shaped project profiles. + Semantic dimensions must be represented as structured records rather than free-form prose. Initial dimensions should include, where applicable: - type system and implicit coercion From 0747d61aad135994a4ca6e0590d2499a16fbcb74 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Sat, 6 Jun 2026 22:36:47 +0200 Subject: [PATCH 6/6] ci - expect released incan v0.3 --- .github/workflows/ci.yml | 2 +- incan.lock | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 96a6b7e..a814b3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ concurrency: env: CARGO_TERM_COLOR: always INCAN_REF: release/v0.3 - EXPECTED_INCAN_VERSION: 0.3.0-rc50 + EXPECTED_INCAN_VERSION: 0.3.0 RUST_BACKTRACE: 1 INCAN_NO_BANNER: 1 INCAN_GENERATED_CARGO_TARGET_DIR: ${{ github.workspace }}/.incan-generated-cargo-target diff --git a/incan.lock b/incan.lock index 276911a..804ae80 100644 --- a/incan.lock +++ b/incan.lock @@ -3,7 +3,7 @@ [incan] format = 1 -incan-version = "0.3.0-rc47" +incan-version = "0.3.0" deps-fingerprint = "sha256:9bc7b859d7eb9b1d1cd385b6ccd26e5c9abe32c73f814e1a34345aea30a21b96" cargo-features = [] cargo-no-default-features = false @@ -395,9 +395,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" [[package]] name = "blake2" @@ -499,9 +499,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", "num-traits", @@ -1824,14 +1824,14 @@ dependencies = [ [[package]] name = "incan_core" -version = "0.3.0-rc47" +version = "0.3.0" dependencies = [ "serde", ] [[package]] name = "incan_derive" -version = "0.3.0-rc47" +version = "0.3.0" dependencies = [ "proc-macro2", "quote", @@ -1840,7 +1840,7 @@ dependencies = [ [[package]] name = "incan_stdlib" -version = "0.3.0-rc47" +version = "0.3.0" dependencies = [ "incan_core", "incan_derive", @@ -1864,7 +1864,7 @@ dependencies = [ [[package]] name = "inql" -version = "0.3.0-rc47" +version = "0.3.0" dependencies = [ "blake2", "blake3", @@ -2067,9 +2067,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lz4_flex" @@ -3545,9 +3545,9 @@ checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "yoke" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive",