Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance by splitting query details off into seperate table #481

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions project/tests/factories.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
# -*- coding: utf-8 -*-
import factory
import factory.fuzzy

from silk.models import Request, Response, SQLQuery

from silk.models import Request, Response, SQLQuery, SQLQueryDetails

HTTP_METHODS = ['GET', 'POST', 'PUT', 'PATCH', 'HEAD', 'OPTIONS']
STATUS_CODES = [200, 201, 300, 301, 302, 401, 403, 404]


class SQLQueryFactory(factory.django.DjangoModelFactory):

class SQLQueryDetailsFactory(factory.django.DjangoModelFactory):
query = factory.Sequence(lambda num: u'SELECT foo FROM bar WHERE foo=%s' % num)
traceback = factory.Sequence(lambda num: u'Traceback #%s' % num)

class Meta:
model = SQLQueryDetails

class SQLQueryFactory(factory.django.DjangoModelFactory):
details = factory.SubFactory(SQLQueryDetailsFactory)

class Meta:
model = SQLQuery

Expand Down
15 changes: 15 additions & 0 deletions project/tests/test_collector.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.test import TestCase
from silk.collector import DataCollector
from silk.models import SQLQuery


class TestCollector(TestCase):
Expand All @@ -18,3 +19,17 @@ def test_clear(self):
self.test_query_registration()
DataCollector().clear()
self.assertFalse(DataCollector().queries)

def test_finalise(self):
"""
Just call the example app to try a full registration/finalise run
"""
self.client.get("/example_app/")
assert SQLQuery.objects.count() == 1
query = SQLQuery.objects.get()
assert (
query.details.query
== 'SELECT "example_app_blind"."id", "example_app_blind"."photo", "example_app_blind"."name", "example_app_blind"."child_safe" FROM "example_app_blind"'
)
assert query.details.traceback != ""
assert query.details.analysis == "2 0 0 SCAN example_app_blind"
7 changes: 6 additions & 1 deletion project/tests/test_lib/mock_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from django.utils import timezone

from silk import models
from silk.models import SQLQuery, Profile
from silk.models import Profile, SQLQuery, SQLQueryDetails


class MockSuite(object):
Expand Down Expand Up @@ -93,7 +93,12 @@ def mock_sql_queries(self, request=None, profile=None, n=1, as_dict=False):
if as_dict:
queries.append(d)
else:
details_kwargs = {
key: d.pop(key)
for key in ['query', 'traceback']
}
query = SQLQuery.objects.create(**d)
SQLQueryDetails.objects.create(query_obj=query, **details_kwargs)
queries.append(query)
if profile:
if as_dict:
Expand Down
32 changes: 16 additions & 16 deletions project/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def test_query_manager_instance(self):

def test_traceback_ln_only(self):

self.obj.traceback = """Traceback (most recent call last):
self.obj.details.traceback = """Traceback (most recent call last):
File "/home/user/some_script.py", line 10, in some_func
pass
File "/usr/lib/python2.7/bdb.py", line 20, in trace_dispatch
Expand All @@ -308,7 +308,7 @@ def test_traceback_ln_only(self):

def test_formatted_query_if_no_query(self):

self.obj.query = ""
self.obj.details.query = ""
self.obj.formatted_query

def test_formatted_query_if_has_a_query(self):
Expand All @@ -320,7 +320,7 @@ def test_formatted_query_if_has_a_query(self):
ON Book.isbn = Book_author.isbn
GROUP BY Book.title;"""

self.obj.query = query
self.obj.details.query = query
self.obj.formatted_query

def test_num_joins_if_no_joins_in_query(self):
Expand All @@ -330,7 +330,7 @@ def test_num_joins_if_no_joins_in_query(self):
FROM Book
GROUP BY Book.title;"""

self.obj.query = query
self.obj.details.query = query

self.assertEqual(self.obj.num_joins, 0)

Expand All @@ -344,14 +344,14 @@ def test_num_joins_if_joins_in_query(self):
JOIN option_address_type oat ON o.id = oat.option_id
WHERE a.country_id = 1 AND at.id <> oat.type_id;"""

self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.num_joins, 4)

def test_num_joins_if_no_joins_in_query_but_this_word_searched(self):

query = """SELECT Book.title FROM Book WHERE Book.title=`Join the dark side, Luke!`;"""

self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.num_joins, 0)

def test_num_joins_if_multiple_statement_in_query(self):
Expand All @@ -361,52 +361,52 @@ def test_num_joins_if_multiple_statement_in_query(self):
INNER JOIN joined ON Book.joiner = joined.joiner
WHERE Book.joiner='Join i am'"""

self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.num_joins, 2)

def test_tables_involved_if_no_query(self):

self.obj.query = ''
self.obj.details.query = ''

self.assertEqual(self.obj.tables_involved, [])

def test_tables_involved_if_query_has_only_a_from_token(self):

query = """SELECT * FROM Book;"""
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['Book;'])

def test_tables_involved_if_query_has_a_join_token(self):

query = """SELECT p.id FROM Person p JOIN Address a ON p.Id = a.Person_ID;"""
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['Person', 'Address'])

def test_tables_involved_if_query_has_an_as_token(self):

query = 'SELECT Book.title AS Title FROM Book GROUP BY Book.title;'
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['Title', 'Book'])

# FIXME bug, not a feature
def test_tables_involved_check_with_fake_a_from_token(self):

query = """SELECT * FROM Book WHERE Book.title=`EVIL FROM WITHIN`;"""
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['Book', 'WITHIN`;'])

# FIXME bug, not a feature
def test_tables_involved_check_with_fake_a_join_token(self):

query = """SELECT * FROM Book WHERE Book.title=`Luke, join the dark side!`;"""
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['Book', 'the'])

# FIXME bug, not a feature
def test_tables_involved_check_with_fake_an_as_token(self):

query = """SELECT * FROM Book WHERE Book.title=`AS SOON AS POSIABLE`;"""
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['Book', 'POSIABLE`;'])

def test_tables_involved_if_query_has_subquery(self):
Expand All @@ -418,14 +418,14 @@ def test_tables_involved_if_query_has_subquery(self):
WHERE RealTableY.Col11>14
) AS B INNER JOIN A
ON A.ForeignKeyY=B.ID;'''
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['ID', 'RealTableZ', 'RealTableY', 'B', 'A'])

# FIXME bug, not a feature
def test_tables_involved_if_query_has_django_aliase_on_column_names(self):

query = 'SELECT foo AS bar FROM some_table;'
self.obj.query = query
self.obj.details.query = query
self.assertEqual(self.obj.tables_involved, ['bar', 'some_table;'])

def test_save_if_no_end_and_start_time(self):
Expand Down
15 changes: 13 additions & 2 deletions silk/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,25 @@ def finalise(self):
self.request.save()

sql_queries = []
sql_details = []
for identifier, query in self.queries.items():
query['identifier'] = identifier
details_kwargs = {
key: query.pop(key, "")
for key in ['query', 'traceback', 'analysis']
}
sql_query = models.SQLQuery(**query)
sql_detail = models.SQLQueryDetails(query_obj=sql_query, **details_kwargs)
sql_queries += [sql_query]
sql_details += [sql_detail]

models.SQLQuery.objects.bulk_create(sql_queries)
sql_queries = models.SQLQuery.objects.filter(request=self.request)
for sql_query in sql_queries.all():
sql_queries = list(models.SQLQuery.objects.filter(request=self.request))
query_by_identifer = {item.identifier: item for item in sql_queries}
for sql_detail in sql_details:
sql_detail.query_obj = query_by_identifer[sql_detail.query_obj.identifier]
models.SQLQueryDetails.objects.bulk_create(sql_details)
for sql_query in sql_queries:
query = self.queries.get(sql_query.identifier)
if query:
query['model'] = sql_query
Expand Down
41 changes: 41 additions & 0 deletions silk/migrations/0009_sqlquerydetails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Generated by Django 2.2.24 on 2021-06-24 09:54

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("silk", "0008_sqlquery_analysis"),
]

operations = [
migrations.CreateModel(
name="SQLQueryDetails",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("query", models.TextField()),
("traceback", models.TextField()),
("analysis", models.TextField()),
(
"query_obj",
models.OneToOneField(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="details",
to="silk.SQLQuery",
),
),
],
),
]
38 changes: 38 additions & 0 deletions silk/migrations/0010_fill_query_details.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 2.2.24 on 2021-06-24 09:54

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this migration handwritten? or auto generated

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code inside the migration was handwritten, the file was autogenerated using ./manage.py makemigrations silk --empty --name fill_query_details

from django.db import migrations, reset_queries

BATCH_SIZE = 100


def fill_query_details(apps, schema_editor):
SQLQueryDetails = apps.get_model("silk", "SQLQueryDetails")
SQLQuery = apps.get_model("silk", "SQLQuery")
queries = SQLQuery.objects.all().order_by("pk")
buffer = []
for query in queries.iterator(BATCH_SIZE):
buffer.append(
SQLQueryDetails(
query_obj=query,
query=query.query,
traceback=query.traceback,
analysis=query.analysis or "",
)
)
if len(buffer) == BATCH_SIZE:
SQLQueryDetails.objects.bulk_create(buffer, BATCH_SIZE)
reset_queries() # query log leads to massive memory issue
buffer = []


def backwards(apps, schema_editor):
pass


class Migration(migrations.Migration):

dependencies = [
("silk", "0009_sqlquerydetails"),
]

operations = [migrations.RunPython(fill_query_details, backwards)]
25 changes: 25 additions & 0 deletions silk/migrations/0011_auto_20210624_1450.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 2.2.24 on 2021-06-24 12:50

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("silk", "0010_fill_query_details"),
]

operations = [
migrations.RemoveField(
model_name="sqlquery",
name="analysis",
),
migrations.RemoveField(
model_name="sqlquery",
name="query",
),
migrations.RemoveField(
model_name="sqlquery",
name="traceback",
),
]
18 changes: 11 additions & 7 deletions silk/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@ def bulk_create(self, *args, **kwargs):


class SQLQuery(models.Model):
query = TextField()
start_time = DateTimeField(null=True, blank=True, default=timezone.now)
end_time = DateTimeField(null=True, blank=True)
time_taken = FloatField(blank=True, null=True)
Expand All @@ -240,22 +239,20 @@ class SQLQuery(models.Model):
Request, related_name='queries', null=True,
blank=True, db_index=True, on_delete=models.CASCADE,
)
traceback = TextField()
analysis = TextField(null=True, blank=True)
objects = SQLQueryManager()

# TODO docstring
@property
def traceback_ln_only(self):
return '\n'.join(self.traceback.split('\n')[::2])
return '\n'.join(self.details.traceback.split('\n')[::2])

@property
def formatted_query(self):
return sqlparse.format(self.query, reindent=True, keyword_case='upper')
return sqlparse.format(self.details.query, reindent=True, keyword_case='upper')

@property
def num_joins(self):
parsed_query = sqlparse.parse(self.query)
parsed_query = sqlparse.parse(self.details.query)
count = 0
for statement in parsed_query:
count += sum(map(lambda t: t.match(sqlparse.tokens.Keyword, r'\.*join\.*', regex=True), statement.flatten()))
Expand All @@ -269,7 +266,7 @@ def tables_involved(self):
TODO: Can probably parse the SQL using sqlparse etc and pull out table
info that way?
"""
components = [x.strip() for x in self.query.split()]
components = [x.strip() for x in self.details.query.split()]
tables = []

for idx, component in enumerate(components):
Expand Down Expand Up @@ -309,6 +306,13 @@ def delete(self, *args, **kwargs):
super(SQLQuery, self).delete(*args, **kwargs)


class SQLQueryDetails(models.Model):
query_obj = OneToOneField(SQLQuery, blank=True, null=True, on_delete=models.CASCADE, related_name='details')
query = TextField()
traceback = TextField()
analysis = TextField()


class BaseProfile(models.Model):
name = CharField(max_length=300, blank=True, default='')
start_time = DateTimeField(default=timezone.now)
Expand Down
Loading