Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additional database indexes for performance #1309

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""
Add missing indexes

Revision ID: 80c155808b3f
Revises: bec3296d7537
Create Date: 2025-02-12 03:41:46.603006+00:00
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "80c155808b3f"
down_revision = "bec3296d7537"
branch_labels = None
depends_on = None


def upgrade():
with op.batch_alter_table("columnattribute", schema=None) as batch_op:
batch_op.create_index(
"idx_columnattribute_column_id",
["column_id"],
unique=False,
)

with op.batch_alter_table("cube", schema=None) as batch_op:
batch_op.create_index("idx_cube_cube_id", ["cube_id"], unique=False)

with op.batch_alter_table("dimensionlink", schema=None) as batch_op:
batch_op.create_index(
"idx_dimensionlink_dimension_id",
["dimension_id"],
unique=False,
)
batch_op.create_index(
"idx_dimensionlink_node_revision_id",
["node_revision_id"],
unique=False,
)

with op.batch_alter_table("metric_required_dimensions", schema=None) as batch_op:
batch_op.create_index(
"idx_metric_required_dimensions_metric_id",
["metric_id"],
unique=False,
)

with op.batch_alter_table("node", schema=None) as batch_op:
batch_op.create_index(
"idx_node_deactivated_at_null",
["deactivated_at"],
unique=False,
postgresql_where=sa.text("deactivated_at IS NULL"),
)

with op.batch_alter_table("nodeavailabilitystate", schema=None) as batch_op:
batch_op.create_index(
"idx_nodeavailabilitystate_node_id",
["node_id"],
unique=False,
)

with op.batch_alter_table("nodecolumns", schema=None) as batch_op:
batch_op.create_index("idx_nodecolumns_node_id", ["node_id"], unique=False)

with op.batch_alter_table("noderelationship", schema=None) as batch_op:
batch_op.create_index(
"idx_noderelationship_child_id",
["child_id"],
unique=False,
)
batch_op.create_index(
"idx_noderelationship_parent_id",
["parent_id"],
unique=False,
)

with op.batch_alter_table("partition", schema=None) as batch_op:
batch_op.create_index(
"idx_partition_column_id",
["column_id"],
unique=False,
postgresql_using="btree",
)

with op.batch_alter_table("tagnoderelationship", schema=None) as batch_op:
batch_op.create_index(
"idx_tagnoderelationship_node_id",
["node_id"],
unique=False,
)
batch_op.create_index(
"idx_tagnoderelationship_tag_id",
["tag_id"],
unique=False,
)


def downgrade():
with op.batch_alter_table("tagnoderelationship", schema=None) as batch_op:
batch_op.drop_index("idx_tagnoderelationship_tag_id")
batch_op.drop_index("idx_tagnoderelationship_node_id")

with op.batch_alter_table("partition", schema=None) as batch_op:
batch_op.drop_index("idx_partition_column_id", postgresql_using="btree")

with op.batch_alter_table("noderelationship", schema=None) as batch_op:
batch_op.drop_index("idx_noderelationship_parent_id")
batch_op.drop_index("idx_noderelationship_child_id")

with op.batch_alter_table("nodecolumns", schema=None) as batch_op:
batch_op.drop_index("idx_nodecolumns_node_id")

with op.batch_alter_table("nodeavailabilitystate", schema=None) as batch_op:
batch_op.drop_index("idx_nodeavailabilitystate_node_id")

with op.batch_alter_table("node", schema=None) as batch_op:
batch_op.drop_index(
"idx_node_deactivated_at_null",
postgresql_where=sa.text("deactivated_at IS NULL"),
)

with op.batch_alter_table("metric_required_dimensions", schema=None) as batch_op:
batch_op.drop_index("idx_metric_required_dimensions_metric_id")

with op.batch_alter_table("dimensionlink", schema=None) as batch_op:
batch_op.drop_index("idx_dimensionlink_node_revision_id")
batch_op.drop_index("idx_dimensionlink_dimension_id")

with op.batch_alter_table("cube", schema=None) as batch_op:
batch_op.drop_index("idx_cube_cube_id")

with op.batch_alter_table("columnattribute", schema=None) as batch_op:
batch_op.drop_index("idx_columnattribute_column_id")
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ async def find_nodes_by(
limit,
before,
after,
*options,
options=options,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import TYPE_CHECKING, List, Optional

import sqlalchemy as sa
from sqlalchemy import UniqueConstraint, select
from sqlalchemy import Index, UniqueConstraint, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Mapped, mapped_column, relationship

Expand Down Expand Up @@ -91,7 +91,10 @@ class ColumnAttribute(
"""

__tablename__ = "columnattribute"
__table_args__ = (UniqueConstraint("attribute_type_id", "column_id"),)
__table_args__ = (
UniqueConstraint("attribute_type_id", "column_id"),
Index("idx_columnattribute_column_id", "column_id"),
)

id: Mapped[int] = mapped_column(
sa.BigInteger().with_variant(sa.Integer, "sqlite"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Dict, List, Optional

import sqlalchemy as sa
from sqlalchemy import JSON, DateTime, ForeignKey
from sqlalchemy import JSON, DateTime, ForeignKey, Index
from sqlalchemy.orm import Mapped, mapped_column

from datajunction_server.database.base import Base
Expand Down Expand Up @@ -84,6 +84,7 @@ class NodeAvailabilityState(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "nodeavailabilitystate"
__table_args__ = (Index("idx_nodeavailabilitystate_node_id", "node_id"),)

availability_id: Mapped[int] = mapped_column(
ForeignKey(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import cached_property
from typing import TYPE_CHECKING, Dict, List, Optional, Set

from sqlalchemy import JSON, BigInteger, Enum, ForeignKey, Integer
from sqlalchemy import JSON, BigInteger, Enum, ForeignKey, Index, Integer
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import Mapped, mapped_column, relationship

Expand All @@ -23,6 +23,10 @@ class DimensionLink(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "dimensionlink"
__table_args__ = (
Index("idx_dimensionlink_node_revision_id", "node_revision_id"),
Index("idx_dimensionlink_dimension_id", "dimension_id"),
)

id: Mapped[int] = mapped_column(
BigInteger().with_variant(Integer, "sqlite"),
Expand Down
61 changes: 39 additions & 22 deletions datajunction-server/datajunction_server/database/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class NodeRelationship(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "noderelationship"
__table_args__ = (
Index("idx_noderelationship_parent_id", "parent_id"),
Index("idx_noderelationship_child_id", "child_id"),
)

parent_id: Mapped[int] = mapped_column(
ForeignKey("node.id", name="fk_noderelationship_parent_id_node"),
Expand All @@ -90,6 +94,7 @@ class CubeRelationship(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "cube"
__table_args__ = (Index("idx_cube_cube_id", "cube_id"),)

cube_id: Mapped[int] = mapped_column(
ForeignKey("noderevision.id", name="fk_cube_cube_id_noderevision"),
Expand All @@ -109,6 +114,7 @@ class BoundDimensionsRelationship(Base): # pylint: disable=too-few-public-metho
"""

__tablename__ = "metric_required_dimensions"
__table_args__ = (Index("idx_metric_required_dimensions_metric_id", "metric_id"),)

metric_id: Mapped[int] = mapped_column(
ForeignKey(
Expand Down Expand Up @@ -174,16 +180,6 @@ class Node(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "node"
__table_args__ = (
UniqueConstraint("name", "namespace", name="unique_node_namespace_name"),
Index("cursor_index", "created_at", "id", postgresql_using="btree"),
Index(
"namespace_index",
"namespace",
postgresql_using="btree",
postgresql_ops={"identifier": "varchar_pattern_ops"},
),
)

id: Mapped[int] = mapped_column(
sa.BigInteger().with_variant(sa.Integer, "sqlite"),
Expand Down Expand Up @@ -259,6 +255,23 @@ class Node(Base): # pylint: disable=too-few-public-methods
foreign_keys="History.entity_name",
)

__table_args__ = (
UniqueConstraint("name", "namespace", name="unique_node_namespace_name"),
Index("cursor_index", "created_at", "id", postgresql_using="btree"),
Index(
"namespace_index",
"namespace",
postgresql_using="btree",
postgresql_ops={"identifier": "varchar_pattern_ops"},
),
# Handles frequent filtering on deactivated_at is NULL
Index(
"idx_node_deactivated_at_null",
"deactivated_at",
postgresql_where=(deactivated_at.is_(None)),
),
)

def __hash__(self) -> int:
return hash(self.id)

Expand Down Expand Up @@ -405,19 +418,20 @@ async def find( # pylint: disable=keyword-arg-before-vararg
return result.unique().scalars().all()

@classmethod
async def find_by( # pylint: disable=keyword-arg-before-vararg,too-many-locals
async def find_by(
cls,
session: AsyncSession,
names: Optional[List[str]] = None,
fragment: Optional[str] = None,
node_types: Optional[List[NodeType]] = None,
tags: Optional[List[str]] = None,
edited_by: Optional[str] = None,
namespace: Optional[str] = None,
limit: Optional[int] = 100,
before: Optional[str] = None,
after: Optional[str] = None,
*options: ExecutableOption, # pylint: disable=keyword-arg-before-vararg
names: list[str] | None = None,
fragment: str | None = None,
node_types: list[NodeType] | None = None,
tags: list[str] | None = None,
edited_by: str | None = None,
namespace: str | None = None,
limit: int | None = 100,
before: str | None = None,
after: str | None = None,
include_deactivated: bool = False,
options: list[ExecutableOption] = None,
) -> List["Node"]:
"""
Finds a list of nodes by prefix
Expand Down Expand Up @@ -454,6 +468,8 @@ async def find_by( # pylint: disable=keyword-arg-before-vararg,too-many-locals
)
if node_types:
statement = statement.where(Node.type.in_(node_types))
if not include_deactivated:
statement = statement.where(is_(Node.deactivated_at, None))
if edited_by:
edited_node_subquery = (
select(History.entity_name)
Expand Down Expand Up @@ -483,7 +499,7 @@ async def find_by( # pylint: disable=keyword-arg-before-vararg,too-many-locals

limit = limit if limit and limit > 0 else 100
statement = statement.limit(limit)
result = await session.execute(statement.options(*options))
result = await session.execute(statement.options(*(options or [])))
nodes = result.unique().scalars().all()

# Reverse for backward pagination
Expand Down Expand Up @@ -986,6 +1002,7 @@ class NodeColumns(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "nodecolumns"
__table_args__ = (Index("idx_nodecolumns_node_id", "node_id"),)

node_id: Mapped[int] = mapped_column( # pylint: disable=unsubscriptable-object
ForeignKey("noderevision.id", name="fk_nodecolumns_node_id_noderevision"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from typing import Optional

from sqlalchemy import BigInteger, Enum, ForeignKey, Integer
from sqlalchemy import BigInteger, Enum, ForeignKey, Index, Integer
from sqlalchemy.orm import Mapped, mapped_column, relationship

from datajunction_server.database.base import Base
Expand All @@ -29,6 +29,9 @@ class Partition(Base): # type: ignore # pylint: disable=too-few-public-methods
"""

__tablename__ = "partition"
__table_args__ = (
Index("idx_partition_column_id", "column_id", postgresql_using="btree"),
)

id: Mapped[int] = mapped_column(
BigInteger().with_variant(Integer, "sqlite"),
Expand Down
15 changes: 14 additions & 1 deletion datajunction-server/datajunction_server/database/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@

from typing import TYPE_CHECKING, Any, Dict, List, Optional

from sqlalchemy import JSON, BigInteger, Column, ForeignKey, Integer, String, select
from sqlalchemy import (
JSON,
BigInteger,
Column,
ForeignKey,
Index,
Integer,
String,
select,
)
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Mapped, joinedload, mapped_column, relationship
from sqlalchemy.sql.base import ExecutableOption
Expand Down Expand Up @@ -108,6 +117,10 @@ class TagNodeRelationship(Base): # pylint: disable=too-few-public-methods
"""

__tablename__ = "tagnoderelationship"
__table_args__ = (
Index("idx_tagnoderelationship_tag_id", "tag_id"),
Index("idx_tagnoderelationship_node_id", "node_id"),
)

tag_id: Mapped[int] = mapped_column(
ForeignKey("tag.id", name="fk_tagnoderelationship_tag_id_tag"),
Expand Down
Loading