Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APP-4737 Enhance MDIR with data domains and products #1239

Merged
merged 23 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8cb0b01
First Iteration of Report using Data Domains and Products
pavanmanishd Jan 21, 2025
82bf772
Added UI support and write to file
pavanmanishd Jan 21, 2025
41c63b2
Merge branch 'main' into APP-4737
pavanmanishd Jan 21, 2025
f78b9a0
Domain Creation and Asset Linking to Data Product
pavanmanishd Jan 21, 2025
9b38c14
Export to excel sheet and remove previous tests
pavanmanishd Jan 21, 2025
e2b9739
Merge branch 'main' into APP-4737
pavanmanishd Jan 21, 2025
4ad3eb0
Remove Domains, SubDomains and Products after tests
pavanmanishd Jan 22, 2025
cbdccb5
Remove unnecessary ui inputs
pavanmanishd Jan 22, 2025
e5d539b
Remove glossary from config
pavanmanishd Jan 22, 2025
cb08cc1
ImportReportTest using Domains and Products
pavanmanishd Jan 22, 2025
156b6e5
Remove unused code
pavanmanishd Jan 22, 2025
035ebca
Formatting Changes
pavanmanishd Jan 22, 2025
aaccdf0
Initial Conversion CSV Test
pavanmanishd Jan 22, 2025
a34b50e
ImpactReportCSVTest for Domains and Products
pavanmanishd Jan 22, 2025
32c02e9
Merge branch 'main' into APP-4737
pavanmanishd Jan 22, 2025
620013a
Unique Domain for each test
pavanmanishd Jan 23, 2025
59e8987
Check subdomains and Removed unique for subdomains
pavanmanishd Jan 27, 2025
a08d221
Check all data products
pavanmanishd Jan 27, 2025
0c2c746
Optimized the retreival with queries and reformatted the code
pavanmanishd Jan 28, 2025
e13159d
Added Queries instead of loops for effiecient reterival
pavanmanishd Jan 29, 2025
044ec89
Merge branch 'main' into APP-4737
cmgrote Jan 29, 2025
4423264
Removed unused code, optimized some queries
cmgrote Jan 29, 2025
d1b67ed
Wait until all the datadomains are resolved
pavanmanishd Jan 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,56 @@ abstract class PackageTest(
}
}

/**
* Remove the provided domain and all its subdomains and products, if they exist
*
* @param domainName of the domain
*/
fun removeDomainAndProduct(domainName: String) {
val domains =
DataDomain
.select(client)
.where(DataDomain.NAME.eq(domainName))
.stream()
.toList()
try {
if (domains.isNotEmpty()) {
// find all the products under the domain
val productGuids =
DataProduct
.select(client)
.where(DataProduct.PARENT_DOMAIN_QUALIFIED_NAME.eq(domains[0].qualifiedName))
.stream()
.map { it.guid }
.toList()
if (productGuids.isNotEmpty()) {
client.assets.delete(productGuids, AtlanDeleteType.HARD)
}

// find all subdomains under the domain
val subDomainNames =
DataDomain
.select(client)
.where(DataDomain.PARENT_DOMAIN_QUALIFIED_NAME.eq(domains[0].qualifiedName))
.stream()
.map { it.name }
.toList()
if (subDomainNames.isNotEmpty()) {
subDomainNames.forEach {
removeDomainAndProduct(it)
}
}

// delete the domain
domains.forEach { domain ->
client.assets.delete(domain.guid, AtlanDeleteType.HARD)
}
}
} catch (e: Exception) {
logger.error(e) { "Unable to remove domain: $domainName" }
}
}

/**
* Set up and run a custom package, using the provided configuration.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import javax.annotation.processing.Generated
@Generated("com.atlan.pkg.CustomPackage")
@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY)
data class MetadataImpactReportCfg(
@JsonProperty("include_glossary") val includeGlossary: String = "TRUE",
@JsonProperty("glossary_name") val glossaryName: String = "Metadata metrics",
@JsonProperty("include_data_products") val includeDataProducts: String = "TRUE",
@JsonProperty("data_domain") val dataDomain: String = "Metadata metrics",
@JsonProperty("include_details") val includeDetails: Boolean = false,
@JsonProperty("file_format") val fileFormat: String = "XLSX",
@JsonProperty("delivery_type") val deliveryType: String = "DIRECT",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@ package com.atlan.pkg.mdir
import MetadataImpactReportCfg
import com.atlan.AtlanClient
import com.atlan.exception.NotFoundException
import com.atlan.model.assets.Glossary
import com.atlan.model.assets.GlossaryCategory
import com.atlan.model.assets.GlossaryTerm
import com.atlan.model.enums.AssetCreationHandling
import com.atlan.model.assets.Asset
import com.atlan.model.assets.DataDomain
import com.atlan.model.assets.DataProduct
import com.atlan.model.enums.AtlanAnnouncementType
import com.atlan.model.enums.AtlanIcon
import com.atlan.model.enums.CertificateStatus
import com.atlan.pkg.PackageContext
import com.atlan.pkg.Utils
Expand Down Expand Up @@ -83,7 +81,7 @@ object Reporter {
const val CAT_SAVINGS = "Cost savings"
const val CAT_ADOPTION = "Adoption metrics"

val CATEGORIES =
val SUBDOMAINS =
mapOf(
CAT_HEADLINES to "**Metrics that break down Atlan-managed assets as overall numbers.** These are mostly useful to contextualize the overall asset footprint of your data ecosystem.",
CAT_SAVINGS to "**Metrics that can be used to discover potential cost savings.** These are areas you may want to investigate for cost savings, though there are caveats with each one that are worth reviewing to understand potential limitations.",
Expand Down Expand Up @@ -133,14 +131,15 @@ object Reporter {
Paths.get(filePath).toFile().createNewFile()
}

val glossary =
if (ctx.config.includeGlossary == "TRUE") {
createGlossaryIdempotent(ctx.client, ctx.config.glossaryName)
val domain =
if (ctx.config.includeDataProducts == "TRUE") {
createDomainIdempotent(ctx.client, ctx.config.dataDomain)
} else {
null
}
val categoryNameToGuid = createCategoriesIdempotent(ctx.client, glossary)
val fileOutputs = runReports(ctx, outputDirectory, batchSize, glossary, categoryNameToGuid)

val subdomainNameToQualifiedName = createSubDomainsIdempotent(ctx.client, domain)
val fileOutputs = runReports(ctx, outputDirectory, batchSize, subdomainNameToQualifiedName)

when (ctx.config.deliveryType) {
"EMAIL" -> {
Expand Down Expand Up @@ -176,46 +175,69 @@ object Reporter {
}
}

private fun createGlossaryIdempotent(
private fun createDomainIdempotent(
client: AtlanClient,
glossaryName: String,
): Glossary =
domainName: String,
): Asset =
try {
Glossary.findByName(client, glossaryName)
val domain =
DataDomain
.select(client)
.where(DataDomain.NAME.eq(domainName))
.stream()
.toList()
if (domain.isEmpty()) {
val create = DataDomain.creator(domainName).build()
val response = create.save(client)
response.getResult(create)
} else {
domain.first()
}
} catch (e: NotFoundException) {
val create =
Glossary
.creator(glossaryName)
.assetIcon(AtlanIcon.PROJECTOR_SCREEN_CHART)
.build()
val create = DataDomain.creator(domainName).build()
val response = create.save(client)
response.getResult(create)
}

private fun createCategoriesIdempotent(
private fun createSubDomainsIdempotent(
client: AtlanClient,
glossary: Glossary?,
domain: Asset?,
): Map<String, String> {
if (glossary == null) return emptyMap()
if (domain == null) return emptyMap()
val nameToResolved = mutableMapOf<String, String>()
val placeholderToName = mutableMapOf<String, String>()
AssetBatch(client, 20).use { batch ->
CATEGORIES.forEach { (name, description) ->
SUBDOMAINS.forEach { (name, description) ->
val builder =
try {
val found = GlossaryCategory.findByNameFast(client, name, glossary.qualifiedName)[0]
found.trimToRequired().guid(found.guid)
val found =
DataDomain
.select(client)
.where(DataDomain.PARENT_DOMAIN_QUALIFIED_NAME.eq(domain.qualifiedName))
.where(DataDomain.NAME.eq(name))
.stream()
.toList()
.firstOrNull()
if (found != null) {
found.trimToRequired().guid(found.guid)
} else {
DataDomain.creator(name, domain.qualifiedName)
}
} catch (e: NotFoundException) {
GlossaryCategory.creator(name, glossary)
DataDomain.creator(name, domain.qualifiedName)
}
val category = builder.description(description).build()
placeholderToName[category.guid] = name
batch.add(category)
val subdomain = builder.description(description).build()
println(name)
batch.add(subdomain)
}
batch.flush()
placeholderToName.forEach { (guid, name) ->
val resolved = batch.resolvedGuids.getOrDefault(guid, guid)
nameToResolved[name] = resolved
while (nameToResolved.size < SUBDOMAINS.size) {
DataDomain
.select(client)
.where(DataDomain.GUID.`in`(batch.resolvedGuids.values.toList()))
.stream()
.forEach {
nameToResolved[it.name] = it.qualifiedName
}
}
}
return nameToResolved
Expand All @@ -225,8 +247,7 @@ object Reporter {
ctx: PackageContext<MetadataImpactReportCfg>,
outputDirectory: String,
batchSize: Int = 300,
glossary: Glossary? = null,
categoryNameToGuid: Map<String, String>? = null,
subdomainNameToQualifiedName: Map<String, String>,
): List<String> {
if (ctx.config.fileFormat == "XLSX") {
val outputFile = "$outputDirectory${File.separator}mdir.xlsx"
Expand All @@ -244,7 +265,7 @@ object Reporter {
)
reports.forEach { repClass ->
val metric = Metric.get(repClass, ctx.client, batchSize, logger)
outputReport(ctx, metric, overview, xlsx.createSheet(metric.getShortName()), batchSize, glossary, categoryNameToGuid)
outputReportDomain(ctx, metric, overview, xlsx.createSheet(metric.getShortName()), subdomainNameToQualifiedName)
}
}
return listOf(outputFile)
Expand All @@ -266,7 +287,7 @@ object Reporter {
val metric = Metric.get(repClass, ctx.client, batchSize, logger)
val metricFile = "$outputDirectory${File.separator}${CSV_FILES[metric.getShortName()]}"
CSVWriter(metricFile).use { details ->
outputReport(ctx, metric, overview, details, batchSize, glossary, categoryNameToGuid)
outputReportDomain(ctx, metric, overview, details, subdomainNameToQualifiedName)
}
outputFiles.add(metricFile)
}
Expand All @@ -275,38 +296,49 @@ object Reporter {
}
}

private fun outputReport(
private fun outputReportDomain(
ctx: PackageContext<MetadataImpactReportCfg>,
metric: Metric,
overview: TabularWriter,
details: TabularWriter,
batchSize: Int,
glossary: Glossary? = null,
categoryNameToGuid: Map<String, String>? = null,
subdomainNameToQualifiedName: Map<String, String>,
) {
logger.info { "Quantifying metric: ${metric.name} ..." }
val quantified = metric.quantify()
val term =
if (ctx.config.includeGlossary == "TRUE") {
writeMetricToGlossary(ctx.client, metric, quantified, glossary!!, categoryNameToGuid!!)
} else {
null
}
writeMetricToFile(ctx.client, metric, quantified, overview, details, ctx.config.includeDetails, term, batchSize)
if (ctx.config.includeDataProducts == "TRUE") {
writeMetricToDomain(ctx.client, metric, quantified, subdomainNameToQualifiedName)
}
writeMetricToFile(metric, quantified, overview, details, ctx.config.includeDetails)
}

private fun writeMetricToGlossary(
private fun writeMetricToDomain(
client: AtlanClient,
metric: Metric,
quantified: Double,
glossary: Glossary,
categoryNameToGuid: Map<String, String>,
): GlossaryTerm {
subdomainNameToQualifiedName: Map<String, String>,
): Asset {
val builder =
try {
GlossaryTerm.findByNameFast(client, metric.name, glossary.qualifiedName).trimToRequired()
val foundProducts = DataProduct.findByName(client, metric.name)
println(subdomainNameToQualifiedName[metric.category])
val matchingProduct =
foundProducts.firstOrNull { product ->
product.qualifiedName.substringBefore("/product/") == subdomainNameToQualifiedName[metric.category]
}

matchingProduct?.trimToRequired() ?: DataProduct.creator(
client,
metric.name,
subdomainNameToQualifiedName[metric.category],
metric.query().build(),
)
} catch (e: NotFoundException) {
GlossaryTerm.creator(metric.name, glossary)
DataProduct.creator(
client,
metric.name,
subdomainNameToQualifiedName[metric.category],
metric.query().build(),
)
}
val prettyQuantity = NumberFormat.getNumberInstance(Locale.US).format(quantified)
if (metric.caveats.isNotBlank()) {
Expand All @@ -324,26 +356,23 @@ object Reporter {
.announcementTitle("Note")
.announcementMessage(metric.notes)
}
val term =

val product =
builder
.displayName(metric.displayName)
.description(metric.description)
.certificateStatusMessage(prettyQuantity)
.category(GlossaryCategory.refByGuid(categoryNameToGuid[metric.category]))
.build()
val response = term.save(client)
return response.getResult(term) ?: term.trimToRequired().guid(response.getAssignedGuid(term)).build()
val response = product.save(client)
return response.getResult(product) ?: product.trimToRequired().guid(response.getAssignedGuid(product)).build()
}

private fun writeMetricToFile(
client: AtlanClient,
metric: Metric,
quantified: Double,
overview: TabularWriter,
details: TabularWriter,
includeDetails: Boolean,
term: GlossaryTerm?,
batchSize: Int,
) {
overview.writeRecord(
listOf(
Expand All @@ -355,26 +384,7 @@ object Reporter {
),
)
if (includeDetails) {
val batch =
if (term != null) {
AssetBatch(
client,
batchSize,
false,
AssetBatch.CustomMetadataHandling.IGNORE,
true,
false,
false,
false,
AssetCreationHandling.FULL,
false,
)
} else {
null
}
metric.outputDetailedRecords(details, term, batch)
batch?.flush()
batch?.close()
metric.outputDetailedRecords(details)
}
}
}
Loading