Skip to content

Commit 431b88f

Browse files
authored
Bitcode rewrite (#19)
1 parent 3140043 commit 431b88f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+7977
-6952
lines changed

.cargo/config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
[build]
2-
rustflags = ["-C", "target-cpu=native"]
2+
rustflags = ["-C", "target-cpu=native"]

.github/workflows/build.yml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,26 @@ jobs:
2424
# `i686-unknown-linux-gnu` so we just need to check this page for a
2525
# compatible nightly:
2626
# https://rust-lang.github.io/rustup-components-history/mips64-unknown-linux-gnuabi64.html
27-
toolchain: nightly-2023-07-04
27+
toolchain: nightly-2023-04-25
2828
override: true
2929
components: rustfmt, miri
3030
- name: Lint
3131
run: cargo fmt --check
32-
- name: Test (debug)
32+
- name: Check (no-default-features)
33+
run: cargo check --no-default-features
34+
- name: Test
3335
run: cargo test
36+
- name: Test (all-features)
37+
run: cargo test --all-features
3438
- name: Install i686 and GCC multilib
3539
run: rustup target add i686-unknown-linux-gnu && sudo apt update && sudo apt install -y gcc-multilib
36-
- name: Test (32-bit)
37-
run: cargo test --target i686-unknown-linux-gnu
40+
- name: Test (32-bit all-features)
41+
run: cargo test --target i686-unknown-linux-gnu --all-features
3842
- name: Setup Miri
3943
run: cargo miri setup
40-
- name: Test (miri)
41-
run: MIRIFLAGS="-Zmiri-permissive-provenance" cargo miri test
44+
- name: Test (miri all-features)
45+
run: cargo miri test --all-features
4246
- name: Setup Miri (big-endian)
4347
run: rustup target add mips64-unknown-linux-gnuabi64 && cargo miri setup --target mips64-unknown-linux-gnuabi64
4448
- name: Test (miri big-endian)
45-
run: MIRIFLAGS="-Zmiri-permissive-provenance" cargo miri test --target mips64-unknown-linux-gnuabi64
49+
run: cargo miri test --target mips64-unknown-linux-gnuabi64

.gitignore

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
/target
2-
/Cargo.lock
3-
/bitcode_derive/Cargo.lock
1+
target/
2+
Cargo.lock
3+
perf.*
44
.idea
5-
perf.data
6-
perf.data.old

Cargo.toml

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,41 @@ members = [
66
[package]
77
name = "bitcode"
88
authors = [ "Cai Bear", "Finn Bear" ]
9-
version = "0.5.1"
9+
version = "0.6.0-beta.1"
1010
edition = "2021"
1111
license = "MIT OR Apache-2.0"
1212
repository = "https://github.com/SoftbearStudios/bitcode"
1313
description = "bitcode is a bitwise binary serializer"
1414
exclude = ["fuzz/"]
1515

1616
[dependencies]
17-
bitcode_derive = { version = "0.5.0", path="./bitcode_derive", optional = true }
18-
bytemuck = { version = "1.13", features = [ "extern_crate_alloc" ] }
19-
from_bytes_or_zeroed = "0.1"
20-
residua-zigzag = "0.1.0"
17+
arrayvec = { version = "0.7", default-features = false, optional = true }
18+
bitcode_derive = { version = "0.6.0-beta.1", path = "./bitcode_derive", optional = true }
19+
bytemuck = { version = "1.14", features = [ "min_const_generics", "must_cast" ] }
20+
glam = { version = "0.22", default-features = false, features = [ "std" ], optional = true }
2121
serde = { version = "1.0", optional = true }
22-
simdutf8 = { version = "0.1.4", optional = true }
2322

2423
[dev-dependencies]
25-
arrayvec = { version = "0.7.2", features = [ "serde" ] }
24+
arrayvec = { version = "0.7", features = [ "serde" ] }
2625
bincode = "1.3.3"
27-
bitvec = { version = "1.0.1" }
28-
flate2 = "1.0.25"
29-
lz4_flex = "0.10.0"
30-
musli = "0.0.42"
31-
paste = "1.0.12"
32-
postcard = { version = "1.0", features = ["alloc"] }
33-
rand = { version = "0.8.5", default-features = false }
26+
flate2 = "1.0.28"
27+
lz4_flex = { version = "0.11.2", default-features = false }
28+
paste = "1.0.14"
29+
rand = "0.8.5"
3430
rand_chacha = "0.3.1"
35-
serde = { version = "1.0.159", features = [ "derive" ] }
31+
serde = { version = "1.0", features = [ "derive" ] }
32+
33+
# zstd doesn't compile with miri big-endian.
34+
[target.'cfg(not(miri))'.dev-dependencies]
35+
zstd = "0.13.0"
3636

3737
[features]
3838
derive = [ "bitcode_derive" ]
39-
default = [ "derive", "simdutf8" ]
39+
default = [ "derive" ]
4040

4141
[package.metadata.docs.rs]
42-
features = ["serde"]
42+
features = [ "derive", "serde" ]
4343

44-
[profile.bench]
45-
lto = true
44+
# TODO halfs speed of benches_borrowed::bench_bitcode_decode
45+
#[profile.bench]
46+
#lto = true

README.md

Lines changed: 63 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -2,111 +2,76 @@
22
[![Documentation](https://docs.rs/bitcode/badge.svg)](https://docs.rs/bitcode)
33
[![crates.io](https://img.shields.io/crates/v/bitcode.svg)](https://crates.io/crates/bitcode)
44
[![Build](https://github.com/SoftbearStudios/bitcode/actions/workflows/build.yml/badge.svg)](https://github.com/SoftbearStudios/bitcode/actions/workflows/build.yml)
5-
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
65

7-
A bitwise encoder/decoder similar to [bincode](https://github.com/bincode-org/bincode), which attempts to shrink the serialized size without sacrificing speed (as would be the case with compression).
8-
9-
The format may change between major versions, so we are free to optimize it.
10-
11-
## Comparison with [bincode](https://github.com/bincode-org/bincode)
12-
13-
### Features
14-
15-
- Bitwise serialization
16-
- [Gamma](https://en.wikipedia.org/wiki/Elias_gamma_coding) encoded lengths and enum variant indices
17-
18-
### Additional features with `#[derive(bitcode::Encode, bitcode::Decode)]`
19-
20-
- Enums use the fewest possible bits, e.g. an enum with 4 variants uses 2 bits
21-
- Apply attributes to fields/enum variants:
22-
23-
| Attribute | Type | Result |
24-
|-----------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------|
25-
| `#[bitcode_hint(ascii)]` | String | Uses 7 bits per character |
26-
| `#[bitcode_hint(ascii_lowercase)]` | String | Uses 5 bits per character |
27-
| `#[bitcode_hint(expected_range = "50..100"]` | u8-u64 | Uses log2(range.end - range.start) bits |
28-
| `#[bitcode_hint(expected_range = "0.0..1.0"]` | f32/f64 | Uses ~25 bits for `f32` and ~54 bits for `f64` |
29-
| `#[bitcode_hint(frequency = 123)` | enum variant | Frequent variants use fewer bits (see [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding)) |
30-
| `#[bitcode_hint(gamma)]` | i8-i64/u8-u64 | Small integers use fewer bits (see [Elias gamma coding](https://en.wikipedia.org/wiki/Elias_gamma_coding)) |
31-
| `#[bitcode(with_serde)]` | T: Serialize | Uses `serde::Serialize` instead of `bitcode::Encode` |
32-
33-
### Limitations
34-
35-
- Doesn't support streaming APIs
36-
- Format may change between major versions
37-
- With `feature = "derive"`, types containing themselves must use `#[bitcode(recursive)]` to compile
38-
39-
## Benchmarks vs. [bincode](https://github.com/bincode-org/bincode) and [postcard](https://github.com/jamesmunns/postcard)
40-
41-
### Primitives (size in bits)
42-
43-
| Type | Bitcode (derive) | Bitcode (serde) | Bincode | Bincode (varint) | Postcard |
44-
|---------------------|------------------|-----------------|---------|------------------|----------|
45-
| bool | 1 | 1 | 8 | 8 | 8 |
46-
| u8/i8 | 8 | 8 | 8 | 8 | 8 |
47-
| u16/i16 | 16 | 16 | 16 | 8-24 | 8-24 |
48-
| u32/i32 | 32 | 32 | 32 | 8-40 | 8-40 |
49-
| u64/i64 | 64 | 64 | 64 | 8-72 | 8-80 |
50-
| u128/i128 | 128 | 128 | 128 | 8-136 | 8-152 |
51-
| usize/isize | 64 | 64 | 64 | 8-72 | 8-80 |
52-
| f32 | 32 | 32 | 32 | 32 | 32 |
53-
| f64 | 64 | 64 | 64 | 64 | 64 |
54-
| char | 21 | 21 | 8-32 | 8-32 | 16-40 |
55-
| Option<()> | 1 | 1 | 8 | 8 | 8 |
56-
| Result<(), ()> | 1 | 1-3 | 32 | 8 | 8 |
57-
| enum { A, B, C, D } | 2 | 1-5 | 32 | 8 | 8 |
58-
| Duration | 94 | 96 | 96 | 16-112 | 16-120 |
59-
60-
<sup>Note: These are defaults, and can be optimized with hints in the case of Bitcode (derive) or custom `impl Serialize` in the case of `serde` serializers.</sup>
61-
62-
### Values (size in bits)
63-
64-
| Value | Bitcode (derive) | Bitcode (serde) | Bincode | Bincode (varint) | Postcard |
65-
|---------------------|------------------|-----------------|---------|------------------|----------|
66-
| [true; 4] | 4 | 4 | 32 | 32 | 32 |
67-
| vec![(); 0] | 1 | 1 | 64 | 8 | 8 |
68-
| vec![(); 1] | 3 | 3 | 64 | 8 | 8 |
69-
| vec![(); 256] | 17 | 17 | 64 | 24 | 16 |
70-
| vec![(); 65536] | 33 | 33 | 64 | 40 | 24 |
71-
| "" | 1 | 1 | 64 | 8 | 8 |
72-
| "abcd" | 37 | 37 | 96 | 40 | 40 |
73-
| "abcd1234" | 71 | 71 | 128 | 72 | 72 |
74-
75-
76-
### Random [Structs and Enums](https://github.com/SoftbearStudios/bitcode/blob/2a47235eee64f4a7c49ad1841a5b509abd2d0e99/src/benches.rs#L16-L88) (average size and speed)
77-
78-
| Format | Size (bytes) | Serialize (ns) | Deserialize (ns) |
79-
|------------------------|--------------|----------------|------------------|
80-
| Bitcode (derive) | 6.2 | 14 | 50 |
81-
| Bitcode (serde) | 6.7 | 18 | 59 |
82-
| Bincode | 20.3 | 17 | 61 |
83-
| Bincode (varint) | 10.9 | 26 | 68 |
84-
| Bincode (LZ4) | 9.9 | 58 | 73 |
85-
| Bincode (Deflate Fast) | 8.4 | 336 | 279 |
86-
| Bincode (Deflate Best) | 7.8 | 1990 | 275 |
87-
| Postcard | 10.7 | 21 | 57 |
88-
89-
### More benchmarks
90-
91-
[rust_serialization_benchmark](https://david.kolo.ski/rust_serialization_benchmark/)
92-
93-
## Acknowledgement
94-
95-
Some test cases were derived from [bincode](https://github.com/bincode-org/bincode) (see comment in `tests.rs`).
6+
A binary encoder/decoder with the following goals:
7+
- 🔥 Blazingly fast
8+
- 🐁 Tiny serialized size
9+
- 💎 Highly compressible by Deflate/LZ4/Zstd
10+
11+
In contrast, these are non-goals:
12+
- Stable format across major versions
13+
- Self describing format
14+
- Compatibility with languages other than Rust
15+
16+
See [rust_serialization_benchmark](https://github.com/djkoloski/rust_serialization_benchmark) for benchmarks.
17+
18+
## Example
19+
```rust
20+
use bitcode::{Encode, Decode};
21+
22+
#[derive(Encode, Decode, PartialEq, Debug)]
23+
struct Foo<'a> {
24+
x: u32,
25+
y: &'a str,
26+
}
27+
28+
let original = Foo {
29+
x: 10,
30+
y: "abc",
31+
};
32+
33+
let encoded: Vec<u8> = bitcode::encode(&original); // No error
34+
let decoded: Foo<'_> = bitcode::decode(&encoded).unwrap();
35+
assert_eq!(original, decoded);
36+
```
37+
38+
## Library Example
39+
40+
Add bitcode to libraries without specifying the major version so binary crates can pick the version.
41+
This is a minimal stable subset of the bitcode API so avoid using any other functionality.
42+
```toml
43+
bitcode = { version = "0", features = ["derive"], default-features = false, optional = true }
44+
```
45+
```rust
46+
#[cfg_attr(feature = "bitcode", derive(bitcode::Encode, bitcode::Decode))]
47+
pub struct Vec2 {
48+
x: f32,
49+
y: f32,
50+
}
51+
```
52+
53+
## Tuple vs Array
54+
If you have multiple values of the same type:
55+
- Use a tuple or struct when the values are semantically different: `x: u32, y: u32`
56+
- Use an array when all values are semantically similar: `pixels: [u8; 16]`
57+
58+
## Implementation Details
59+
- Heavily inspired by <https://github.com/That3Percent/tree-buf>
60+
- All instances of each field are grouped together making compression easier
61+
- Uses smaller integers where possible all the way down to 1 bit
62+
- Validation is performed up front on typed vectors before deserialization
63+
- Code is designed to be auto-vectorized by LLVM
9664

9765
## License
98-
9966
Licensed under either of
100-
101-
* Apache License, Version 2.0
102-
([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
103-
* MIT license
104-
([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
67+
* Apache License, Version 2.0
68+
([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
69+
* MIT license
70+
([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
10571

10672
at your option.
10773

10874
## Contribution
109-
11075
Unless you explicitly state otherwise, any contribution intentionally submitted
11176
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
112-
dual licensed as above, without any additional terms or conditions.
77+
dual licensed as above, without any additional terms or conditions.

bitcode_derive/Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "bitcode_derive"
33
authors = [ "Cai Bear", "Finn Bear" ]
4-
version = "0.5.0"
4+
version = "0.6.0-beta.1"
55
edition = "2021"
66
license = "MIT OR Apache-2.0"
77
repository = "https://github.com/SoftbearStudios/bitcode/"
@@ -11,7 +11,6 @@ description = "Implementation of #[derive(Encode, Decode)] for bitcode"
1111
proc-macro = true
1212

1313
[dependencies]
14-
packagemerge = "0.1"
1514
proc-macro2 = "1.0"
1615
quote = "1.0"
17-
syn = { version = "2.0.3", features = [ "extra-traits" ] }
16+
syn = { version = "2.0.3", features = [ "extra-traits", "visit-mut" ] }

0 commit comments

Comments
 (0)