Skip to content

Commit a2b8e8e

Browse files
committed
Use the DeepSizeOf trait to get an accurate account Graph memory usage
Signed-off-by: Hiram Chirino <[email protected]>
1 parent 5319ce7 commit a2b8e8e

File tree

11 files changed

+69
-58
lines changed

11 files changed

+69
-58
lines changed

Cargo.lock

+23
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ criterion = "0.5.1"
6161
csaf = { version = "0.5.0", default-features = false }
6262
csaf-walker = { version = "0.10.0", default-features = false }
6363
cve = "0.3.1"
64+
deepsize = "0.2.0"
6465
env_logger = "0.11.0"
6566
futures = "0.3.30"
6667
futures-util = "0.3"
@@ -204,3 +205,6 @@ osv = { git = "https://github.com/ctron/osv", rev = "b53f1590bbbdc663e3efe405f1f
204205
# to pickup fix: https://github.com/Abraxas-365/langchain-rust/pull/246
205206
# and fix: https://github.com/Abraxas-365/langchain-rust/pull/250
206207
langchain-rust = { git = "https://github.com/chirino/langchain-rust", branch = "main" }
208+
209+
# to pickup feat: https://github.com/Aeledfyr/deepsize/pull/41
210+
deepsize = { git = "https://github.com/chirino/deepsize", branch = "main" }

common/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ bytesize = { workspace = true, features = ["serde"] }
1515
chrono = { workspace = true }
1616
clap = { workspace = true, features = ["derive", "env"] }
1717
cpe = { workspace = true }
18+
deepsize = { workspace = true }
1819
hex = { workspace = true }
1920
human-date-parser = { workspace = true }
2021
itertools = { workspace = true }

common/src/cpe.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use cpe::{
22
cpe::Cpe as _,
33
uri::{OwnedUri, Uri},
44
};
5+
use deepsize::DeepSizeOf;
56
use serde::{
67
de::{Error, Visitor},
78
Deserialize, Deserializer, Serialize, Serializer,
@@ -17,7 +18,7 @@ use utoipa::{
1718
};
1819
use uuid::Uuid;
1920

20-
#[derive(Clone, Hash, Eq, PartialEq)]
21+
#[derive(Clone, Hash, Eq, PartialEq, DeepSizeOf)]
2122
pub struct Cpe {
2223
uri: OwnedUri,
2324
}

common/src/purl.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use deepsize::DeepSizeOf;
12
use packageurl::PackageUrl;
23
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
34
use serde::{
@@ -25,7 +26,7 @@ pub enum PurlErr {
2526
Package(#[from] packageurl::Error),
2627
}
2728

28-
#[derive(Clone, PartialEq, Eq, Hash)]
29+
#[derive(Clone, PartialEq, Eq, Hash, DeepSizeOf)]
2930
pub struct Purl {
3031
pub ty: String,
3132
pub namespace: Option<String>,

entity/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ trustify-cvss = { workspace = true }
1111

1212
async-graphql = { workspace = true, features = ["uuid", "time"] }
1313
cpe = { workspace = true }
14+
deepsize = { workspace = true }
1415
schemars = { workspace = true }
1516
sea-orm = { workspace = true, features = [
1617
"sqlx-postgres",

entity/src/relationship.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use deepsize::DeepSizeOf;
12
use sea_orm::{DeriveActiveEnum, EnumIter};
23
use std::fmt;
34

@@ -17,6 +18,7 @@ use std::fmt;
1718
)]
1819
#[sea_orm(rs_type = "i32", db_type = "Integer")]
1920
#[serde(rename_all = "snake_case")]
21+
#[derive(DeepSizeOf)]
2022
// When adding a new variant, also add this to the "relationship" table.
2123
pub enum Relationship {
2224
#[sea_orm(num_value = 0)]

modules/analysis/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ actix-http = { workspace = true }
1414
actix-web = { workspace = true }
1515
anyhow = { workspace = true }
1616
cpe = { workspace = true }
17+
deepsize = { workspace = true, features = ["cpe", "petgraph"] }
1718
log = { workspace = true }
1819
moka = { workspace = true, features = ["sync"] }
1920
parking_lot = { workspace = true }

modules/analysis/src/model.rs

+16-36
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55
ops::{Deref, DerefMut},
66
};
77

8+
use deepsize::DeepSizeOf;
89
use moka::sync::Cache;
910
use std::sync::Arc;
1011
use trustify_common::{cpe::Cpe, purl::Purl};
@@ -25,7 +26,7 @@ impl fmt::Display for AnalysisStatus {
2526
}
2627
}
2728

28-
#[derive(Debug, Clone, PartialEq, Eq, ToSchema, serde::Serialize)]
29+
#[derive(Debug, Clone, PartialEq, Eq, ToSchema, serde::Serialize, DeepSizeOf)]
2930
pub struct PackageNode {
3031
pub sbom_id: String,
3132
pub node_id: String,
@@ -37,33 +38,6 @@ pub struct PackageNode {
3738
pub document_id: String,
3839
pub product_name: String,
3940
pub product_version: String,
40-
pub approximate_memory_size: u32,
41-
}
42-
43-
impl PackageNode {
44-
pub(crate) fn set_approximate_memory_size(&self) -> PackageNode {
45-
// Is there a better way to do this?
46-
let size = size_of::<PackageNode>()
47-
+ self.sbom_id.len()
48-
+ self.node_id.len()
49-
+ self.purl.iter().fold(0, |acc, purl|
50-
// use the json string length as an approximation of the memory size
51-
acc + serde_json::to_string(purl).unwrap_or_else(|_| "".to_string()).len())
52-
+ self.cpe.iter().fold(0, |acc, cpe|
53-
// use the json string length as an approximation of the memory size
54-
acc + serde_json::to_string(cpe).unwrap_or_else(|_| "".to_string()).len())
55-
+ self.name.len()
56-
+ self.version.len()
57-
+ self.published.len()
58-
+ self.document_id.len()
59-
+ self.product_name.len()
60-
+ self.product_version.len();
61-
62-
PackageNode {
63-
approximate_memory_size: size.try_into().unwrap_or(u32::MAX),
64-
..self.clone()
65-
}
66-
}
6741
}
6842

6943
impl fmt::Display for PackageNode {
@@ -188,20 +162,26 @@ pub struct GraphMap {
188162
}
189163

190164
#[allow(clippy::ptr_arg)] // &String is required by Cache::builder().weigher() method
191-
fn weigher(key: &String, value: &Arc<PackageGraph>) -> u32 {
192-
let mut result = key.len();
193-
for n in value.raw_nodes() {
194-
result += n.weight.approximate_memory_size as usize;
195-
}
196-
result += size_of_val(value.raw_edges());
197-
result.try_into().unwrap_or(u32::MAX)
165+
fn size_of_graph_entry(key: &String, value: &Arc<PackageGraph>) -> u32 {
166+
(
167+
key.deep_size_of()
168+
+ value.as_ref().deep_size_of()
169+
// Also add in some entry overhead of the cache entry
170+
+ 20
171+
// todo: find a better estimate for the the moka ValueEntry
172+
)
173+
.try_into()
174+
.unwrap_or(u32::MAX)
198175
}
199176

200177
impl GraphMap {
201178
// Create a new instance of GraphMap
202179
pub fn new(cap: u64) -> Self {
203180
GraphMap {
204-
map: Cache::builder().weigher(weigher).max_capacity(cap).build(),
181+
map: Cache::builder()
182+
.weigher(size_of_graph_entry)
183+
.max_capacity(cap)
184+
.build(),
205185
}
206186
}
207187

modules/analysis/src/service/load.rs

+12-16
Original file line numberDiff line numberDiff line change
@@ -254,22 +254,18 @@ impl AnalysisService {
254254

255255
match nodes.entry(package.node_id.clone()) {
256256
Entry::Vacant(entry) => {
257-
let index = g.add_node(
258-
PackageNode {
259-
sbom_id: distinct_sbom_id.to_string(),
260-
node_id: package.node_id,
261-
purl: to_purls(package.purls),
262-
cpe: to_cpes(package.cpes),
263-
name: package.node_name,
264-
version: package.node_version.clone().unwrap_or_default(),
265-
published: package.published.clone(),
266-
document_id: package.document_id.clone().unwrap_or_default(),
267-
product_name: package.product_name.clone().unwrap_or_default(),
268-
product_version: package.product_version.clone().unwrap_or_default(),
269-
approximate_memory_size: 0,
270-
}
271-
.set_approximate_memory_size(),
272-
);
257+
let index = g.add_node(PackageNode {
258+
sbom_id: distinct_sbom_id.to_string(),
259+
node_id: package.node_id,
260+
purl: to_purls(package.purls),
261+
cpe: to_cpes(package.cpes),
262+
name: package.node_name,
263+
version: package.node_version.clone().unwrap_or_default(),
264+
published: package.published.clone(),
265+
document_id: package.document_id.clone().unwrap_or_default(),
266+
product_name: package.product_name.clone().unwrap_or_default(),
267+
product_version: package.product_version.clone().unwrap_or_default(),
268+
});
273269

274270
log::debug!("Inserting - id: {}, index: {index:?}", entry.key());
275271

modules/analysis/src/service/test.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -261,18 +261,19 @@ async fn test_cache_size_used(ctx: &TrustifyContext) -> Result<(), anyhow::Error
261261
let all_graphs = service.load_all_graphs(&ctx.db).await?;
262262
assert_eq!(all_graphs.len(), 1);
263263

264-
// Does 3.4 KB sound right?
264+
let kb = 1024;
265265
let small_sbom_size = service.cache_size_used();
266-
assert_eq!(small_sbom_size, 3505u64);
266+
assert!(small_sbom_size > 6 * kb);
267+
assert!(small_sbom_size < 7 * kb);
267268

268269
ctx.ingest_documents(["spdx/quarkus-bom-3.2.11.Final-redhat-00001.json"])
269270
.await?;
270271
let all_graphs = service.load_all_graphs(&ctx.db).await?;
271272
assert_eq!(all_graphs.len(), 2);
272273

273-
// Does 676.7 KB sound right?
274274
let big_sbom_size = service.cache_size_used() - small_sbom_size;
275-
assert_eq!(big_sbom_size, 693006u64);
275+
assert!(big_sbom_size > 950 * kb);
276+
assert!(big_sbom_size < 960 * kb);
276277

277278
// Now lets try it with small cache that can at least fit the small bom
278279
let service = AnalysisService::new_sized(small_sbom_size * 2);

0 commit comments

Comments
 (0)