Skip to content

Commit 1ef4470

Browse files
Feature/622 improve runs view perf (#624)
* wip * Clean up old code * Write more tests * Fix typo * Add 0.5.5 sql script to db changelog * Remove sorting and filtering from jobCount column * PR fixes + fix tests
1 parent 9058af2 commit 1ef4470

File tree

12 files changed

+388
-140
lines changed

12 files changed

+388
-140
lines changed

src/main/resources/db_scripts/db_script_latest.sql

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -184,28 +184,7 @@ alter table "workflow"
184184
references "scheduler_instance"("id")
185185
on update NO ACTION on delete NO ACTION;
186186

187-
create view "dag_run_view" AS
188-
select
189-
dag_instance.id as "id",
190-
workflow.name as "workflow_name",
191-
workflow.project as "project_name",
192-
COALESCE(jobInstanceCount.count, 0) as "job_count",
193-
dag_instance.started as "started",
194-
dag_instance.finished as "finished",
195-
dag_instance.status as "status",
196-
dag_instance.triggered_by as "triggered_by",
197-
workflow.id as "workflow_id"
198-
from dag_instance
199-
left join (
200-
select job_instance.dag_instance_id, count(1) as "count"
201-
from job_instance
202-
group by dag_instance_id
203-
) as jobInstanceCount
204-
on jobInstanceCount.dag_instance_id = dag_instance.id
205-
left join workflow
206-
on workflow.id = dag_instance.workflow_id;
207-
208187
CREATE INDEX job_instance_dag_instance_idx ON job_instance (dag_instance_id);
209188
CREATE INDEX dag_instance_workflow_id_idx ON dag_instance (workflow_id);
189+
CREATE INDEX dag_instance_started_idx ON dag_instance (started);
210190
CREATE INDEX workflow_scheduler_inst_id_idx ON workflow (scheduler_instance_id);
211-

src/main/resources/db_scripts/liquibase/db.changelog.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,7 @@ databaseChangeLog:
7676
file: v0.5.3.remove-form-config.yml
7777
- include:
7878
relativeToChangelogFile: true
79-
file: v0.5.3.add-job-template-history.yml
79+
file: v0.5.3.add-job-template-history.yml
80+
- include:
81+
relativeToChangelogFile: true
82+
file: v0.5.5.optimize-dag-run-query.yml
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* Copyright 2018 ABSA Group Limited
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
CREATE INDEX dag_instance_started_idx ON dag_instance (started);
17+
DROP VIEW dag_run_view;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#
2+
# Copyright 2018 ABSA Group Limited
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
databaseChangeLog:
17+
- changeSet:
18+
id: v0.5.5.optimize-dag-run-query
19+
logicalFilePath: v0.5.5.optimize-dag-run-query
20+
21+
context: default
22+
changes:
23+
- sqlFile:
24+
relativeToChangelogFile: true
25+
path: v0.5.5.optimize-dag-run-query.sql

src/main/scala/za/co/absa/hyperdrive/trigger/models/tables/DagRunTable.scala

Lines changed: 0 additions & 57 deletions
This file was deleted.

src/main/scala/za/co/absa/hyperdrive/trigger/persistance/DagRunRepository.scala

Lines changed: 210 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616
package za.co.absa.hyperdrive.trigger.persistance
1717

1818
import org.springframework.stereotype
19+
import slick.ast.BaseTypedType
20+
import slick.jdbc.SetParameter.{SetBoolean, SetInt, SetLong, SetString, SetTimestamp, SetUnit}
21+
import slick.jdbc.{GetResult, PositionedParameters, SQLActionBuilder, SetParameter}
1922
import za.co.absa.hyperdrive.trigger.models.dagRuns.DagRun
20-
import za.co.absa.hyperdrive.trigger.models.search.{TableSearchRequest, TableSearchResponse}
23+
import za.co.absa.hyperdrive.trigger.models.search.{BooleanFilterAttributes, ContainsFilterAttributes, DateTimeRangeFilterAttributes, EqualsMultipleFilterAttributes, IntRangeFilterAttributes, LongFilterAttributes, SortAttributes, TableSearchRequest, TableSearchResponse}
2124

25+
import java.sql.Timestamp
2226
import javax.inject.Inject
2327
import scala.concurrent.{ExecutionContext, Future}
2428

@@ -28,7 +32,211 @@ trait DagRunRepository extends Repository {
2832

2933
@stereotype.Repository
3034
class DagRunRepositoryImpl @Inject()(val dbProvider: DatabaseProvider) extends DagRunRepository {
35+
import api._
36+
private val fieldMapping = Map(
37+
"workflowId" -> "workflow.id",
38+
"workflowName" -> "workflow.name",
39+
"projectName" -> "workflow.project",
40+
"started" -> "dag_instance.started",
41+
"finished" -> "dag_instance.finished",
42+
"status" -> "dag_instance.status",
43+
"triggeredBy" -> "dag_instance.triggered_by",
44+
"id" -> "dag_instance.id"
45+
)
46+
47+
private def orderByMapping(index: Int) = if (index == -1) "DESC" else "ASC"
48+
3149
override def searchDagRuns(searchRequest: TableSearchRequest)(implicit ec: ExecutionContext): Future[TableSearchResponse[DagRun]] = {
32-
db.run(dagRunTable.search(searchRequest))
50+
51+
val dagIdsQueryMain =
52+
sql"""
53+
FROM dag_instance
54+
JOIN workflow on dag_instance.workflow_id = workflow.id
55+
WHERE 1=1
56+
"""
57+
val queryFilters = generateQueryFilters(searchRequest)
58+
val dagIdsQueryFilters = queryFilters._1
59+
val setParameters = queryFilters._2
60+
61+
val queryOrderBy = searchRequest.sort match {
62+
case Some(SortAttributes(by, order)) =>
63+
sql"""
64+
ORDER BY #${fieldMapping(by)} #${orderByMapping(order)}, dag_instance.id DESC
65+
"""
66+
case None =>
67+
sql"""
68+
ORDER BY dag_instance.id DESC
69+
"""
70+
}
71+
val dagIdsQueryLimitOffset =
72+
sql"""
73+
LIMIT #${searchRequest.size} OFFSET #${searchRequest.from}
74+
"""
75+
val dagIdsQueryOpeningPart =
76+
sql"""
77+
WITH dag_ids AS (
78+
SELECT dag_instance.id
79+
"""
80+
val dagIdsQueryClosingPart =
81+
sql"""
82+
)
83+
"""
84+
val countQuerySelect =
85+
sql"""
86+
SELECT COUNT(1)
87+
"""
88+
val dagRunQueryMain =
89+
sql"""
90+
select workflow.id,
91+
workflow.name,
92+
workflow.project,
93+
COALESCE(jobInstanceCount.count, 0) AS "job_count",
94+
dag_instance.started,
95+
dag_instance.finished,
96+
dag_instance.status,
97+
dag_instance.triggered_by,
98+
dag_instance.id
99+
from dag_instance
100+
left join (
101+
select job_instance.dag_instance_id, count(1) as "count"
102+
from job_instance
103+
join dag_ids on dag_ids.id = job_instance.dag_instance_id
104+
group by dag_instance_id
105+
) as jobInstanceCount
106+
on jobInstanceCount.dag_instance_id = dag_instance.id
107+
left join workflow
108+
on workflow.id = dag_instance.workflow_id
109+
join dag_ids on dag_ids.id = dag_instance.id
110+
"""
111+
112+
113+
val countQuery = SQLActionBuilder(
114+
concatQueryParts(countQuerySelect, dagIdsQueryMain, dagIdsQueryFilters),
115+
setParameters)
116+
val countQueryAction = countQuery.as[Int].head
117+
118+
implicit val getDagRunResult: GetResult[DagRun] = GetResult(r => DagRun(
119+
r.nextLong(), r.nextString(), r.nextString(), r.nextInt(),
120+
r.nextTimestamp().toLocalDateTime, r.nextTimestampOption().map(_.toLocalDateTime),
121+
r.nextString(), r.nextString(), r.nextLong()))
122+
val dagRunsQuery = SQLActionBuilder(
123+
concatQueryParts(dagIdsQueryOpeningPart, dagIdsQueryMain, dagIdsQueryFilters, queryOrderBy,
124+
dagIdsQueryLimitOffset, dagIdsQueryClosingPart, dagRunQueryMain, queryOrderBy), setParameters)
125+
val dagRunsQueryAction = dagRunsQuery.as[DagRun]
126+
127+
db.run(
128+
for {
129+
l <- countQueryAction
130+
r <- dagRunsQueryAction
131+
} yield {
132+
TableSearchResponse[DagRun](items = r, total = l)
133+
}
134+
)
135+
}
136+
137+
private def concatQueryParts(sqlActionBuilders: SQLActionBuilder*): Seq[Any] = {
138+
sqlActionBuilders.map(_.queryParts).reduceOption(_ ++ _).getOrElse(Seq())
139+
}
140+
141+
private def generateQueryFilters(request: TableSearchRequest) = {
142+
val filters =
143+
applyContainsFilter(request.getContainsFilterAttributes) ++
144+
applyIntRangeFilter(request.getIntRangeFilterAttributes) ++
145+
applyDateTimeRangeFilter(request.getDateTimeRangeFilterAttributes) ++
146+
applyEqualsMultipleFilter(request.getEqualsMultipleFilterAttributes) ++
147+
applyLongFilter(request.getLongFilterAttributes) ++
148+
applyBooleanFilter(request.getBooleanFilterAttributes)
149+
val queryPart = concatQueryParts(filters.map(_._1):_*)
150+
val setParameterFns = filters.flatMap(_._2)
151+
val setParameter = SetParameter((_: Unit, pp: PositionedParameters) => {
152+
setParameterFns.foreach(fn => fn(pp))
153+
})
154+
(SQLActionBuilder(queryPart, SetUnit), setParameter)
155+
}
156+
157+
private def applyContainsFilter(attributes: Seq[ContainsFilterAttributes]) = {
158+
attributes
159+
.filter(attribute => fieldMapping.contains(attribute.field))
160+
.map(attribute => (
161+
sql"""AND #${fieldMapping(attribute.field)} LIKE '%' || ? || '%'""",
162+
Seq((pp: PositionedParameters) => SetString(attribute.value, pp))
163+
))
164+
}
165+
166+
private def applyIntRangeFilter(attributes: Seq[IntRangeFilterAttributes]) = {
167+
val setIntParameter = (v: Int, pp:PositionedParameters) => SetInt(v, pp)
168+
attributes
169+
.filter(attribute => fieldMapping.contains(attribute.field))
170+
.map(attribute => applyRangeFilter(attribute.field, attribute.start, attribute.end, setIntParameter))
171+
}
172+
173+
private def applyDateTimeRangeFilter(attributes: Seq[DateTimeRangeFilterAttributes]) = {
174+
val setTimestampParameter = (v: Timestamp, pp:PositionedParameters) => SetTimestamp(v, pp)
175+
attributes
176+
.filter(attribute => fieldMapping.contains(attribute.field))
177+
.map(attribute => applyRangeFilter(attribute.field, attribute.start.map(Timestamp.valueOf), attribute.end.map(Timestamp.valueOf), setTimestampParameter))
178+
}
179+
180+
private def applyEqualsMultipleFilter(attributes: Seq[EqualsMultipleFilterAttributes]) = {
181+
attributes
182+
.filter(attribute => fieldMapping.contains(attribute.field))
183+
.map { attribute =>
184+
val placeholders = List.fill(attribute.values.size)("?").mkString("(", ",", ")")
185+
(
186+
sql"""AND #${fieldMapping(attribute.field)} IN #${placeholders}""",
187+
attribute.values.map(value => (pp: PositionedParameters) => SetString(value, pp))
188+
)
189+
}
190+
}
191+
192+
private def applyLongFilter(attributes: Seq[LongFilterAttributes]) = {
193+
attributes
194+
.filter(attribute => fieldMapping.contains(attribute.field))
195+
.map(attribute => (
196+
sql"""AND #${fieldMapping(attribute.field)} = ?""",
197+
Seq((pp: PositionedParameters) => SetLong(attribute.value, pp))
198+
))
199+
}
200+
201+
private def applyBooleanFilter(attributes: Seq[BooleanFilterAttributes]) = {
202+
attributes
203+
.filter(attribute => fieldMapping.contains(attribute.field))
204+
.filter(attribute => attribute.value.isTrue != attribute.value.isFalse)
205+
.map(attribute => (
206+
sql"""AND #${fieldMapping(attribute.field)} = ?""",
207+
Seq((pp: PositionedParameters) => SetBoolean(attribute.value.isTrue, pp))
208+
))
209+
}
210+
211+
private def applyRangeFilter[B: BaseTypedType](field: String, start: Option[B], end: Option[B], setParameterFn: (B, PositionedParameters) => Unit) = {
212+
if (start.isDefined && end.isDefined) {
213+
(
214+
sql"""AND #${fieldMapping(field)} >= ?
215+
AND #${fieldMapping(field)} <= ?""",
216+
Seq(
217+
(pp: PositionedParameters) => setParameterFn(start.get, pp),
218+
(pp: PositionedParameters) => setParameterFn(end.get, pp)
219+
)
220+
)
221+
} else if (start.isDefined) {
222+
(
223+
sql"""AND #${fieldMapping(field)} >= ?""",
224+
Seq(
225+
(pp: PositionedParameters) => setParameterFn(start.get, pp)
226+
)
227+
)
228+
} else if (end.isDefined) {
229+
(
230+
sql"""AND #${fieldMapping(field)} <= ?""",
231+
Seq(
232+
(pp: PositionedParameters) => setParameterFn(end.get, pp)
233+
)
234+
)
235+
} else {
236+
(
237+
sql"""AND 1=1""",
238+
Seq()
239+
)
240+
}
33241
}
34242
}

src/main/scala/za/co/absa/hyperdrive/trigger/persistance/Repository.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ trait Repository
2525
with JobInstanceTable
2626
with SensorTable
2727
with WorkflowTable
28-
with DagRunTable
2928
with WorkflowHistoryTable
3029
with JobTemplateTable
3130
with JobTemplateHistoryTable

src/test/scala/za/co/absa/hyperdrive/trigger/ApplicationStartPostgresTest.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ class ApplicationStartPostgresTest extends FlatSpec with Matchers with SpringInt
107107
workflows.size shouldBe 1
108108
workflows.head.name shouldBe workflowJoined.name
109109

110-
import api._
111-
run(sqlu"drop view dag_run_view")
112110
schemaDrop()
113111
}
114112
}

0 commit comments

Comments
 (0)