Skip to content

Add named "additional data" support #121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 72 additions & 37 deletions analyzeme/src/event.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
use crate::timestamp::Timestamp;
use memchr::memchr;
use std::borrow::Cow;
use std::time::Duration;

#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct Argument<'a> {
pub name: Option<Cow<'a, str>>,
pub value: Cow<'a, str>,
}

impl<'a> Argument<'a> {
pub fn new(value: &'a str) -> Self {
Self { name: None, value: Cow::from(value) }
}

pub fn new_named(name: &'a str, value: &'a str) -> Self {
Self { name: Some(Cow::from(name)), value: Cow::from(value) }
}
}

#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct Event<'a> {
pub event_kind: Cow<'a, str>,
pub label: Cow<'a, str>,
pub additional_data: Vec<Cow<'a, str>>,
pub additional_data: Vec<Argument<'a>>,
pub timestamp: Timestamp,
pub thread_id: u32,
}
Expand Down Expand Up @@ -38,7 +53,7 @@ impl<'a> Event<'a> {
}
}

pub(crate) fn parse_event_id(event_id: Cow<'a, str>) -> (Cow<'a, str>, Vec<Cow<'a, str>>) {
pub(crate) fn parse_event_id(event_id: Cow<'a, str>) -> (Cow<'a, str>, Vec<Argument<'a>>) {
let event_id = match event_id {
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Expand Down Expand Up @@ -75,52 +90,58 @@ struct Parser<'a> {
pos: usize,
}

const SEPARATOR_BYTE: u8 = measureme::event_id::SEPARATOR_BYTE.as_bytes()[0];
const ARGUMENT_VALUE_TAG_BYTE: u8 = measureme::event_id::ARGUMENT_VALUE_TAG_BYTE.as_bytes()[0];
const ARGUMENT_NAME_TAG_BYTE: u8 = measureme::event_id::ARGUMENT_NAME_TAG_BYTE.as_bytes()[0];

impl<'a> Parser<'a> {
fn new(full_text: Cow<'a, [u8]>) -> Parser<'a> {
Parser { full_text, pos: 0 }
}

fn peek(&self) -> u8 {
self.full_text[self.pos]
}

fn parse_label(&mut self) -> Result<Cow<'a, str>, String> {
assert!(self.pos == 0);
self.parse_separator_terminated_text()
let text = self.parse_text()?;
if text.is_empty() {
return self.err("<label> is empty");
} else {
Ok(text)
}
}

fn parse_separator_terminated_text(&mut self) -> Result<Cow<'a, str>, String> {
fn parse_text(&mut self) -> Result<Cow<'a, str>, String> {
let start = self.pos;

let end = memchr(SEPARATOR_BYTE, &self.full_text[start..])
.map(|pos| pos + start)
.unwrap_or(self.full_text.len());

if start == end {
return self.err("Zero-length <text>");
}

self.pos = end;

if self.full_text[start..end].iter().any(u8::is_ascii_control) {
return self.err("Found ASCII control character in <text>");
}

Ok(self.substring(start, end))
self.pos += self.full_text[start..]
.iter()
.take_while(|c| !u8::is_ascii_control(c))
.count();
Ok(self.substring(start, self.pos))
}

fn parse_arg(&mut self) -> Result<Cow<'a, str>, String> {
if self.peek() != SEPARATOR_BYTE {
return self.err(&format!(
"Expected '\\x{:x}' char at start of <argument>",
SEPARATOR_BYTE
));
fn parse_arg(&mut self) -> Result<Argument<'a>, String> {
let name = if let Some(&byte) = self.full_text.get(self.pos) {
if byte == ARGUMENT_NAME_TAG_BYTE {
self.pos += 1;
Some(self.parse_text()?)
} else {
None
}
} else {
None
};
let value = if let Some(&byte) = self.full_text.get(self.pos) {
if byte == ARGUMENT_VALUE_TAG_BYTE {
self.pos += 1;
Some(self.parse_text()?)
} else {
None
}
} else {
None
};
match (name, value) {
(name, Some(value)) => Ok(Argument { name, value }),
(_, None) => self.err("Unable to parse required <argument_value>"),
}

self.pos += 1;
self.parse_separator_terminated_text()
}

fn err<T>(&self, message: &str) -> Result<T, String> {
Expand Down Expand Up @@ -161,7 +182,7 @@ mod tests {
let (label, args) = Event::parse_event_id(Cow::from("foo\x1emy_arg"));

assert_eq!(label, "foo");
assert_eq!(args, vec![Cow::from("my_arg")]);
assert_eq!(args, vec![Argument::new("my_arg")]);
}

#[test]
Expand All @@ -171,7 +192,21 @@ mod tests {
assert_eq!(label, "foo");
assert_eq!(
args,
vec![Cow::from("arg1"), Cow::from("arg2"), Cow::from("arg3")]
vec![Argument::new("arg1"), Argument::new("arg2"), Argument::new("arg3")]
);
}

#[test]
fn parse_event_id_n_named_args() {
let (label, args) = Event::parse_event_id(Cow::from("foo\x1darg1\x1eval1\x1darg2\x1eval2"));

assert_eq!(label, "foo");
assert_eq!(
args,
vec![
Argument::new_named("arg1", "val1"),
Argument::new_named("arg2", "val2"),
]
);
}
}
2 changes: 1 addition & 1 deletion analyzeme/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ mod stringtable;
pub mod testing_common;
mod timestamp;

pub use crate::event::Event;
pub use crate::event::{Event, Argument};
pub use crate::lightweight_event::LightweightEvent;
pub use crate::profiling_data::{ProfilingData, ProfilingDataBuilder};
pub use crate::stack_collapse::collapse_stacks;
Expand Down
11 changes: 6 additions & 5 deletions analyzeme/src/testing_common.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::timestamp::Timestamp;
use crate::{Event, ProfilingData};
use crate::{Event, Argument, ProfilingData};
use measureme::{EventId, EventIdBuilder, Profiler, SerializationSink, StringId};
use rustc_hash::FxHashMap;
use std::borrow::Cow;
Expand All @@ -22,15 +22,16 @@ fn mk_filestem(file_name_stem: &str) -> PathBuf {
struct ExpectedEvent {
kind: Cow<'static, str>,
label: Cow<'static, str>,
args: Vec<Cow<'static, str>>,
args: Vec<Argument<'static>>,
}

impl ExpectedEvent {
fn new(kind: &'static str, label: &'static str, args: &[&'static str]) -> ExpectedEvent {
fn new(kind: &'static str, label: &'static str, args: &[Argument<'static>])
-> ExpectedEvent {
ExpectedEvent {
kind: Cow::from(kind),
label: Cow::from(label),
args: args.iter().map(|&x| Cow::from(x)).collect(),
args: args.to_vec(),
}
}
}
Expand Down Expand Up @@ -65,7 +66,7 @@ fn generate_profiling_data<S: SerializationSink>(
let expected_events_templates = vec![
ExpectedEvent::new("Generic", "SomeGenericActivity", &[]),
ExpectedEvent::new("Query", "SomeQuery", &[]),
ExpectedEvent::new("QueryWithArg", "AQueryWithArg", &["some_arg"]),
ExpectedEvent::new("QueryWithArg", "AQueryWithArg", &[Argument::new("some_arg")]),
];

let threads: Vec<_> = (0..num_threads)
Expand Down
6 changes: 5 additions & 1 deletion crox/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,11 @@ fn get_args(full_event: &analyzeme::Event) -> Option<FxHashMap<String, String>>
.additional_data
.iter()
.enumerate()
.map(|(i, arg)| (format!("arg{}", i).to_string(), arg.to_string()))
.map(|(i, arg)| (if let Some(name) = &arg.name {
name.to_string()
} else {
format!("arg{}", i).to_string()
}, arg.value.to_string()))
.collect(),
)
} else {
Expand Down
14 changes: 9 additions & 5 deletions measureme/src/event_id.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
use crate::{Profiler, SerializationSink, StringComponent, StringId};

/// Event IDs are strings conforming to the following grammar:
/// Event IDs are strings conforming to the following (eBNF) grammar:
///
/// ```ignore
/// <event_id> = <label> {<argument>}
/// <label> = <text>
/// <argument> = '\x1E' <text>
/// <argument> = ['\x1D' <argument_name>] '\x1E' <argument_value>
/// <argument_name> = <text>
/// <argument_value> = <text>
Comment on lines +8 to +10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be simpler to make the grammar:

<argument> = '\x1E' (<argument_name_value_pair> | <argument_value>)
<argument_name_value_pair> = <text> '\x1D' <text>
<argument_value> = <text>

Then we could always parse the leading \x1E and then do a string split on the \x1D if it exists.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that would also make the format compatible with the current format. It's fine if we need to break that but we'll have to bump the format number and coordinate redeploying new versions on perf.rlo.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sounds like there might be another breaking change in the works from another contributor so if you think it's simpler to use the current strategy rather than this suggestion, that's fine with me!

/// <text> = regex([^[[:cntrl:]]]+) // Anything but ASCII control characters
/// ```
///
/// This means there's always a "label", followed by an optional list of
/// arguments. Future versions my support other optional suffixes (with a tag
/// other than '\x11' after the '\x1E' separator), such as a "category".

/// The byte used to separate arguments from the label and each other.
pub const SEPARATOR_BYTE: &str = "\x1E";
/// The byte used to denote the following text is an argument value.
pub const ARGUMENT_VALUE_TAG_BYTE: &str = "\x1E";
/// The byte used to denote the following text is an argument name.
pub const ARGUMENT_NAME_TAG_BYTE: &str = "\x1D";

/// An `EventId` is a `StringId` with the additional guarantee that the
/// corresponding string conforms to the event_id grammar.
Expand Down Expand Up @@ -73,7 +77,7 @@ impl<'p, S: SerializationSink> EventIdBuilder<'p, S> {
// Label
StringComponent::Ref(label),
// Seperator and start tag for arg
StringComponent::Value(SEPARATOR_BYTE),
StringComponent::Value(ARGUMENT_VALUE_TAG_BYTE),
// Arg string id
StringComponent::Ref(arg),
]))
Expand Down
8 changes: 7 additions & 1 deletion mmview/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@ fn system_time_to_micros_since(t: SystemTime, since: SystemTime) -> u128 {
}

fn print_event(event: &Event<'_>, global_start_time: SystemTime) {
let additional_data = event.additional_data.join(",");
let additional_data = event.additional_data.iter().map(|arg| {
if let Some(name) = &arg.name {
format!("{} = {}", name, arg.value)
} else {
arg.value.to_string()
}
}).collect::<Vec<_>>().join(", ");

let timestamp = match event.timestamp {
Timestamp::Instant(t) => {
Expand Down