Skip to content

Commit 7298b5b

Browse files
authored
Adds high-level Writer API for defining macros (#938)
* Renames Macro -> MacroDef * Stubs out ExprGroupWriter, impl WriteAsIon for TemplateMacroRef * Enables `Writer` to compile macros and add them to the default module * Introduces `MacroIdLike` trait that allows `&Macro`, addresses, and names to be used for writing E-exps. * Updates `write_log_events` to use managed writer * Improves documentation for the `write_log_events` example
1 parent bbcf8cb commit 7298b5b

22 files changed

+1096
-263
lines changed

examples/write_log_events.rs

+114-57
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
//! This program demonstrates implementing WriteAsIon using Ion 1.1's e-expressions for a more
2-
//! compact encoding. It uses raw-level writer APIs that end users are unlikely to leverage.
3-
//! Ion 1.1 is not yet finalized; the encoding produced by this example and the APIs it uses
4-
//! are very likely to change.
5-
2+
//! compact encoding.
63
use ion_rs::*;
74

85
fn main() -> IonResult<()> {
96
#[cfg(not(feature = "experimental"))]
10-
panic!("This example requires the 'experimental' feature to work.");
7+
{
8+
eprintln!("This example requires the 'experimental' feature to work. Rebuild it with the flag `--features experimental`.");
9+
}
1110

1211
#[cfg(feature = "experimental")]
13-
example::write_log_events()
12+
example::write_log_events()?;
13+
14+
Ok(())
1415
}
1516

1617
#[cfg(feature = "experimental")]
1718
mod example {
1819
use chrono::{DateTime, FixedOffset};
20+
use ion_rs::v1_1::Macro;
1921
use ion_rs::*;
2022
use rand::rngs::StdRng;
2123
use rand::seq::SliceRandom;
@@ -55,29 +57,49 @@ mod example {
5557
ion_1_1_file.path().to_string_lossy(),
5658
);
5759

58-
// Encode the log events as Ion 1.0 data
59-
let buf_writer = BufWriter::new(ion_1_0_file.as_file());
60-
let mut ion_writer = v1_0::RawBinaryWriter::new(buf_writer)?;
61-
for event in &events {
62-
ion_writer.write(SerializeWithoutMacros(event))?;
63-
}
64-
ion_writer.flush()?;
65-
drop(ion_writer);
66-
67-
// Encode the log events as Ion 1.1 data
68-
let buf_writer = BufWriter::new(ion_1_1_file.as_file());
69-
let mut ion_writer = v1_1::RawBinaryWriter::new(buf_writer)?;
70-
for event in &events {
71-
ion_writer.write(SerializeWithMacros(event))?;
72-
}
73-
ion_writer.flush()?;
74-
drop(ion_writer);
60+
// === Encode the log events as Ion 1.0 data ===
61+
// First, we initialize a writer...
62+
let mut ion_writer = Writer::new(v1_0::Binary, BufWriter::new(ion_1_0_file.as_file()))?;
63+
// ...then we encode all of the events...
64+
ion_writer.write_all(events.iter().map(|e| SerializeWithoutMacros(e)))?;
65+
// ...finally, we close the writer, consuming it.
66+
ion_writer.close()?;
67+
68+
// === Encode the log events as Ion 1.1 data ===
69+
// First, we initialize a writer...
70+
let mut ion_writer = Writer::new(v1_1::Binary, BufWriter::new(ion_1_1_file.as_file()))?;
71+
72+
// ...then we define some macros that we intend to use to encode our log data...
73+
let macros = LogEventMacros {
74+
thread_name: ion_writer.compile_macro(
75+
// This macro includes the prefix common to all thread names, allowing the writer to only encode
76+
// the suffix of each.
77+
&format!(
78+
r#"
79+
(macro thread_name (suffix) (.make_string {THREAD_NAME_PREFIX} (%suffix) ))
80+
"#
81+
),
82+
)?,
83+
log_statements: log_statements
84+
.iter()
85+
// As you'll see later, each LogStatement has an associated macro definition in text.
86+
.map(|log_statement| ion_writer.compile_macro(&log_statement.macro_source))
87+
.collect::<IonResult<Vec<Macro>>>()?,
88+
};
89+
90+
// ...then we encode all of the events using the macros we just defined...
91+
ion_writer.write_all(events.iter().map(|e| SerializeWithMacros(e, &macros)))?;
92+
// ...finally, we close the writer, consuming it.
93+
ion_writer.close()?;
94+
95+
// === Encoded file size comparison ===
7596

7697
let size_in_1_0 = ion_1_0_file
7798
.as_file()
7899
.metadata()
79100
.expect("failed to read Ion 1.0 file length")
80101
.len();
102+
81103
let size_in_1_1 = ion_1_1_file
82104
.as_file()
83105
.metadata()
@@ -100,18 +122,13 @@ mod example {
100122

101123
// A log statement in the fictional codebase
102124
#[derive(Debug)]
103-
// This struct has several fields that get populated but which are not (yet) used: `logger_name`
104-
// `log_level`, and `format`. Currently, the encoded output for Ion 1.0 writes these as symbol
105-
// IDs and Ion 1.1 refers to them as part of a macro. In both cases, however, the encoding
106-
// context is not written out in the resulting Ion stream.
107-
// TODO: Include the symbol/macro table definitions in the resulting output stream.
108-
#[allow(dead_code)]
109125
struct LogStatement {
110126
index: usize,
111127
logger_name: String,
112128
log_level: String,
113129
format: String,
114130
parameter_types: Vec<ParameterType>,
131+
macro_source: String,
115132
}
116133

117134
impl LogStatement {
@@ -122,12 +139,37 @@ mod example {
122139
format: impl Into<String>,
123140
parameter_types: impl Into<Vec<ParameterType>>,
124141
) -> Self {
142+
let format = format.into();
143+
let macro_source = format!(
144+
// Note that there are two levels of interpolation in this string literal.
145+
// The `format!` macro will process it first, replacing variable names in single
146+
// braces (like `{class_name}`) with the corresponding text, and replacing
147+
// double braces (like the `{{...}}` surrounding the template) with single braces.
148+
// The resulting string is our macro source, which will be compiled by the Writer.
149+
r#"
150+
(macro
151+
ls{index:<42} // Name
152+
(timestamp thread_id thread_name parameters) // Signature
153+
{{ // Template
154+
loggerName: "{class_name}",
155+
logLevel: {log_level},
156+
format: "{format}",
157+
timestamp: (%timestamp),
158+
thread_id: (%thread_id),
159+
thread_name: (%thread_name),
160+
parameters: (%parameters)
161+
}}
162+
)
163+
"#
164+
);
165+
println!("{macro_source}");
125166
Self {
126167
index,
127168
logger_name: format!("{PACKAGE_NAME}.{class_name}"),
128169
log_level: log_level.to_string(),
129170
format: format.into(),
130171
parameter_types: parameter_types.into(),
172+
macro_source,
131173
}
132174
}
133175
}
@@ -160,6 +202,13 @@ mod example {
160202

161203
// ===== Serialization logic for the above types =====
162204

205+
// A simple container to store macros related to serializing LogEvent
206+
struct LogEventMacros {
207+
thread_name: Macro,
208+
log_statements: Vec<Macro>,
209+
}
210+
211+
// Defines how a `Parameter` is serialized as Ion
163212
impl WriteAsIon for Parameter {
164213
fn write_as_ion<V: ValueWriter>(&self, writer: V) -> IonResult<()> {
165214
match self {
@@ -173,71 +222,79 @@ mod example {
173222
// the future, as types will be able to define both a macro-ized serialization and a no-macros
174223
// serialization, allowing the writer to choose whichever is more appropriate.
175224
struct SerializeWithoutMacros<'a, 'b>(&'a LogEvent<'b>);
176-
struct SerializeWithMacros<'a, 'b>(&'a LogEvent<'b>);
225+
struct SerializeWithMacros<'a, 'b>(&'a LogEvent<'b>, &'a LogEventMacros);
177226

178227
// When serializing without macros (usually in Ion 1.0), we write out a struct with each
179-
// field name/value pair. In the case of recurring strings, we take the liberty of writing
180-
// out symbol IDs instead of the full text; this silent type coercion from string to symbol
181-
// is technically data loss, but results in a much more compact encoding.
228+
// field name/value pair.
182229
impl WriteAsIon for SerializeWithoutMacros<'_, '_> {
183230
fn write_as_ion<V: ValueWriter>(&self, writer: V) -> IonResult<()> {
184231
let event = self.0;
185232
let mut struct_ = writer.struct_writer()?;
186233
struct_
187234
// v--- Each field name is a symbol ID
188-
.write(10, event.timestamp)?
189-
.write(11, event.thread_id)?
190-
.write(12, &event.thread_name)?
191-
// v--- The fixed strings from the log statement are also SIDs
192-
.write(13, RawSymbolRef::SymbolId(17))? // logger name
193-
.write(14, RawSymbolRef::SymbolId(18))? // log level
194-
.write(15, RawSymbolRef::SymbolId(19))? // format
195-
.write(16, &event.parameters)?;
235+
.write("timestamp", event.timestamp)?
236+
.write("threadId", event.thread_id)?
237+
.write("threadName", &event.thread_name)?
238+
.write(
239+
"loggerName",
240+
SymbolRef::with_text(&event.statement.logger_name),
241+
)?
242+
.write("logLevel", SymbolRef::with_text(&event.statement.log_level))?
243+
.write("format", SymbolRef::with_text(&event.statement.format))?
244+
.write("parameters", &event.parameters)?;
196245
struct_.close()
197246
}
198247
}
199248

249+
impl WriteAsIon for SerializeWithMacros<'_, '_> {
250+
fn write_as_ion<V: ValueWriter>(&self, writer: V) -> IonResult<()> {
251+
let SerializeWithMacros(event, macros) = *self;
252+
253+
// Create an e-expression writer to invoke the macro corresponding to this log statement.
254+
let mut eexp = writer.eexp_writer(&macros.log_statements[event.statement.index])?;
255+
eexp.write(event.timestamp)?
256+
.write(event.thread_id)?
257+
// Wrap the thread name in the `ThreadName` wrapper to change its serialization.
258+
.write(ThreadName(&event.thread_name, &macros.thread_name))?
259+
.write(&event.parameters)?;
260+
eexp.close()
261+
}
262+
}
263+
200264
// When leveraging macros, the thread name's recurring prefix can be elided from the output.
201-
// This wrapper type is used by the `SerializeWithMacros` type to change to serialization
265+
// This wrapper type is used by the `SerializeWithMacros` type to change the serialization
202266
// behavior for the thread name.
203-
struct ThreadName<'a>(&'a str);
267+
struct ThreadName<'a>(&'a str, &'a Macro);
204268

205269
impl WriteAsIon for ThreadName<'_> {
206270
fn write_as_ion<V: ValueWriter>(&self, writer: V) -> IonResult<()> {
271+
let thread_name_macro = self.1;
207272
// ID 12 chosen arbitrarily, but aligns with Ion 1.0 encoding above
208-
let mut eexp = writer.eexp_writer(12)?;
273+
let mut eexp = writer.eexp_writer(thread_name_macro)?;
209274
eexp
210275
// Ignore the part of the thread name that starts with the recurring prefix.
211276
.write(&self.0[THREAD_NAME_PREFIX.len()..])?;
212277
eexp.close()
213278
}
214279
}
215280

216-
impl WriteAsIon for SerializeWithMacros<'_, '_> {
217-
fn write_as_ion<V: ValueWriter>(&self, writer: V) -> IonResult<()> {
218-
let event = self.0;
219-
let mut eexp = writer.eexp_writer(event.statement.index)?;
220-
eexp.write(event.timestamp)?
221-
.write(event.thread_id)?
222-
// Wrap the thread name in the `ThreadName` wrapper to change its serialization.
223-
.write(ThreadName(&event.thread_name))?
224-
.write(&event.parameters)?;
225-
eexp.close()
226-
}
227-
}
228-
229281
// ===== Random generation of sample data =====
230282

283+
// Any time we need an integer, we'll generate a random one between 0 and 5,000.
231284
const INT_PARAMETER_RANGE: Range<i64> = 0..5_000;
232285
fn generate_int_parameter(rng: &mut StdRng) -> Parameter {
233286
Parameter::Int(rng.gen_range(INT_PARAMETER_RANGE))
234287
}
235288

289+
// Any time we need a string, we'll select one at random from this collection of plural nouns.
236290
fn generate_string_parameter(rng: &mut StdRng) -> Parameter {
237291
const WORDS: &[&str] = &["users", "transactions", "accounts", "customers", "waffles"];
238292
Parameter::String(WORDS.choose(rng).unwrap().to_string())
239293
}
240294

295+
// These are the log statements that our fictional program contains.
296+
// Each log event will be associated with a randomly selected log statement and its parameters
297+
// will be populated using the methods above.
241298
fn log_statements() -> Vec<LogStatement> {
242299
use ParameterType::*;
243300
vec![

src/constants.rs

+1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ pub(crate) mod v1_1 {
124124

125125
pub const ION: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(1);
126126
pub const ENCODING: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(10);
127+
pub const MACRO_TABLE: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(14);
127128
pub const SYMBOL_TABLE: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(15);
128129
pub const MODULE: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(16);
129130
pub const EMPTY_TEXT: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(33);

src/lazy/binary/raw/v1_1/binary_buffer.rs

+3-14
Original file line numberDiff line numberDiff line change
@@ -1311,7 +1311,7 @@ mod tests {
13111311
) -> IonResult<()> {
13121312
let mut context = EncodingContext::for_ion_version(IonVersion::v1_1);
13131313
let template_macro =
1314-
TemplateCompiler::compile_from_source(context.get_ref(), macro_source)?;
1314+
TemplateCompiler::compile_from_source(context.macro_table(), macro_source)?;
13151315
let macro_address = context
13161316
.macro_table_mut()
13171317
.add_template_macro(template_macro)?;
@@ -1607,28 +1607,17 @@ mod tests {
16071607

16081608
#[test]
16091609
fn roundtrip_macro_addresses_up_to_20_bits() -> IonResult<()> {
1610-
use std::fmt::Write;
1611-
16121610
// This is a large enough value that many macros will be encoded using 20 bits.
16131611
// However, it is not large enough to fully exercise the 20-bit encoding space. To do that,
16141612
// we would need approximately 1 million macros, which takes too much time to execute in a
16151613
// debug build.
16161614
const MAX_TEST_MACRO_ADDRESS: usize = 6_000;
16171615

1618-
// Construct an encoding directive that defines this number of macros. Each macro will expand
1619-
// to its own address.
1620-
let mut macro_definitions = String::from("$ion::\n(module _\n (macro_table _\n");
1621-
for address in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS {
1622-
writeln!(macro_definitions, " (macro m{address} () {address})")?;
1623-
}
1624-
macro_definitions.push_str(" )\n)\n");
1625-
let encoding_directive = Element::read_one(macro_definitions)?;
16261616
let mut writer = Writer::new(v1_1::Binary, Vec::new())?;
1627-
writer.write(&encoding_directive)?;
1628-
16291617
// Invoke each of the macros we just defined in order.
16301618
for address in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS {
1631-
writer.eexp_writer(address)?.close()?;
1619+
let macro_n = writer.compile_macro(format!("(macro m{address} () {address})"))?;
1620+
writer.eexp_writer(&macro_n)?.close()?;
16321621
}
16331622
let data = writer.close()?;
16341623

src/lazy/binary/raw/v1_1/struct.rs

+8-19
Original file line numberDiff line numberDiff line change
@@ -326,41 +326,30 @@ impl<'top> Iterator for RawBinaryStructIterator_1_1<'top> {
326326
#[cfg(test)]
327327
mod tests {
328328
use crate::{
329-
v1_1, AnyEncoding, Element, ElementReader, IonResult, MacroTable, Reader, SequenceWriter,
330-
StructWriter, ValueWriter, Writer,
329+
v1_1, AnyEncoding, Element, ElementReader, IonResult, Reader, SequenceWriter, StructWriter,
330+
ValueWriter, Writer,
331331
};
332332

333333
#[test]
334334
fn field_value_eexp() -> IonResult<()> {
335335
let mut writer = Writer::new(v1_1::Binary, Vec::new())?;
336-
let encoding_directive = Element::read_one(
337-
r#"
338-
$ion::
339-
(module _
340-
(symbol_table _)
341-
(macro_table
342-
_
343-
(macro greet (name) (.make_string "hello, " (%name)))
344-
)
345-
)
346-
"#,
347-
)?;
348-
writer.write(&encoding_directive)?;
349-
let macro_id = MacroTable::FIRST_USER_MACRO_ID;
336+
337+
let greet =
338+
writer.compile_macro(r#"(macro greet (name) (.make_string "hello, " (%name)))"#)?;
350339
let mut struct_writer = writer.struct_writer()?;
351340

352341
let field_writer = struct_writer.field_writer("Waldo");
353-
let mut eexp_writer = field_writer.eexp_writer(macro_id)?;
342+
let mut eexp_writer = field_writer.eexp_writer(&greet)?;
354343
eexp_writer.write("Waldo")?;
355344
eexp_writer.close()?;
356345

357346
let field_writer = struct_writer.field_writer("Winnifred");
358-
let mut eexp_writer = field_writer.eexp_writer(macro_id)?;
347+
let mut eexp_writer = field_writer.eexp_writer(&greet)?;
359348
eexp_writer.write("Winnifred")?;
360349
eexp_writer.close()?;
361350

362351
let field_writer = struct_writer.field_writer("Winston");
363-
let mut eexp_writer = field_writer.eexp_writer(macro_id)?;
352+
let mut eexp_writer = field_writer.eexp_writer(&greet)?;
364353
eexp_writer.write("Winston")?;
365354
eexp_writer.close()?;
366355

src/lazy/encoder/binary/v1_0/value_writer.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::lazy::text::raw::v1_1::reader::MacroIdLike;
12
use std::mem;
23

34
use bumpalo::collections::Vec as BumpVec;
@@ -16,7 +17,6 @@ use crate::lazy::encoder::private::Sealed;
1617
use crate::lazy::encoder::value_writer::ValueWriter;
1718
use crate::lazy::encoder::value_writer::{delegate_value_writer_to_self, AnnotatableWriter};
1819
use crate::lazy::never::Never;
19-
use crate::lazy::text::raw::v1_1::reader::MacroIdRef;
2020
use crate::raw_symbol_ref::AsRawSymbolRef;
2121
use crate::result::{EncodingError, IonFailure};
2222
use crate::{Decimal, Int, IonError, IonResult, IonType, RawSymbolRef, SymbolId, Timestamp};
@@ -437,7 +437,7 @@ impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_0<'value, 'top>
437437
BinaryStructWriter_1_0::new(self.allocator, self.output_buffer)
438438
.with_annotations(self.annotations)
439439
}
440-
fn eexp_writer<'a>(self, _macro_id: impl Into<MacroIdRef<'a>>) -> IonResult<Self::EExpWriter> {
440+
fn eexp_writer<'a>(self, _macro_id: impl MacroIdLike<'a>) -> IonResult<Self::EExpWriter> {
441441
IonResult::encoding_error("binary Ion 1.0 does not support macros")
442442
}
443443
}

0 commit comments

Comments
 (0)