-
Notifications
You must be signed in to change notification settings - Fork 60
Make the BERT protocol encoding aware #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
3520318
70fd1ff
4e78dc4
aa084e7
31ab659
3113c6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| #include "ruby.h" | ||
| #include "ruby/encoding.h" | ||
| #include <stdint.h> | ||
| #include <netinet/in.h> | ||
|
|
||
|
|
@@ -14,9 +15,12 @@ | |
| #define ERL_BIN 109 | ||
| #define ERL_SMALL_BIGNUM 110 | ||
| #define ERL_LARGE_BIGNUM 111 | ||
| #define ERL_ENC_STRING 112 | ||
| #define ERL_UNICODE_STRING 113 | ||
| #define ERL_VERSION 131 | ||
| #define ERL_VERSION2 132 | ||
|
|
||
| #define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM) | ||
| #define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_UNICODE_STRING) | ||
|
||
| #define BERT_TYPE_OFFSET (ERL_SMALL_INT) | ||
|
|
||
| static VALUE rb_mBERT; | ||
|
|
@@ -40,6 +44,8 @@ static VALUE bert_read_nil(struct bert_buf *buf); | |
| static VALUE bert_read_string(struct bert_buf *buf); | ||
| static VALUE bert_read_list(struct bert_buf *buf); | ||
| static VALUE bert_read_bin(struct bert_buf *buf); | ||
| static VALUE bert_read_enc_string(struct bert_buf *buf); | ||
| static VALUE bert_read_unicode_string(struct bert_buf *buf); | ||
| static VALUE bert_read_sbignum(struct bert_buf *buf); | ||
| static VALUE bert_read_lbignum(struct bert_buf *buf); | ||
|
|
||
|
|
@@ -59,7 +65,9 @@ static bert_ptr bert_callbacks[] = { | |
| &bert_read_list, | ||
| &bert_read_bin, | ||
| &bert_read_sbignum, | ||
| &bert_read_lbignum | ||
| &bert_read_lbignum, | ||
| &bert_read_enc_string, | ||
| &bert_read_unicode_string | ||
| }; | ||
|
|
||
| static inline uint8_t bert_buf_read8(struct bert_buf *buf) | ||
|
|
@@ -293,6 +301,34 @@ static VALUE bert_read_bin(struct bert_buf *buf) | |
| return rb_bin; | ||
| } | ||
|
|
||
| static VALUE bert_read_unicode_string(struct bert_buf *buf) | ||
| { | ||
| VALUE rb_str; | ||
|
|
||
| rb_str = bert_read_bin(buf); | ||
| rb_enc_associate(rb_str, rb_utf8_encoding()); | ||
|
|
||
| return rb_str; | ||
| } | ||
|
|
||
| static VALUE bert_read_enc_string(struct bert_buf *buf) | ||
| { | ||
| uint8_t type; | ||
| VALUE rb_bin, enc; | ||
|
|
||
| rb_bin = bert_read_bin(buf); | ||
|
|
||
| bert_buf_ensure(buf, 1); | ||
| type = bert_buf_read8(buf); | ||
| if (ERL_BIN != type) | ||
| rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type); | ||
|
|
||
| enc = bert_read_bin(buf); | ||
| rb_enc_associate(rb_bin, rb_find_encoding(enc)); | ||
|
|
||
| return rb_bin; | ||
| } | ||
|
|
||
| static VALUE bert_read_string(struct bert_buf *buf) | ||
| { | ||
| uint16_t i, length; | ||
|
|
@@ -467,17 +503,20 @@ static VALUE bert_read_invalid(struct bert_buf *buf) | |
| static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) | ||
| { | ||
| struct bert_buf buf; | ||
| uint8_t proto_version; | ||
|
|
||
| Check_Type(rb_string, T_STRING); | ||
| buf.data = (uint8_t *)RSTRING_PTR(rb_string); | ||
| buf.end = buf.data + RSTRING_LEN(rb_string); | ||
|
|
||
| bert_buf_ensure(&buf, 1); | ||
|
|
||
| if (bert_buf_read8(&buf) != ERL_VERSION) | ||
| rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); | ||
|
|
||
| return bert_read(&buf); | ||
| proto_version = bert_buf_read8(&buf); | ||
| if (proto_version == ERL_VERSION || proto_version == ERL_VERSION2) { | ||
| return bert_read(&buf); | ||
| } else { | ||
| rb_raise(rb_eTypeError, "Invalid magic value for BERT string"); | ||
| } | ||
| } | ||
|
|
||
| static VALUE rb_bert_impl(VALUE klass) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,4 +22,4 @@ | |
| # Global method for specifying that an array should be encoded as a tuple. | ||
| def t | ||
| BERT::Tuple | ||
| end | ||
| end | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC this is the first time we're diverging from erlang's External Term Format. And as said format does have these values (for
NEW_FUN_EXTandEXPORT_EXTresp.), I think a comment explaining that we knowingly diverge here for version 2 would be useful.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there other (available) values we should use instead?
Adherence to erlang formats isn't important for GitHub's uses of BERT -- which all involve Ruby -- but I could imagine it being an issue if anyone else is still using BERT with erlang.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assuming the online docs are up to date, numbers from 120 onwards are free, but they could probably add new types at any point.
We could reduce the divergence by using 107 (
STRING_EXT) for utf8 strings whose length fits within 2 bytes and go withENC_STRINGif they're bigger, but we'd still re-use or hope we never find new types useful forENC_STRING.The types we're making unavailable aren't something that I think make sense to transfer between ruby and erlang, so I don't think anybody will miss them. But even so making an note of the rationale in the code would be useful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I chose these values because they are used for the offset in the callbacks struct. Would adding a comment and removing the ERL prefix be enough?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The use in the offset makes it a bit trickier. How about we use
ERLEXT_and have a comment saying these are specific to our own v2? My concern here is coming back to the code and having to figure out how the C, ruby and spec match up so leaving a note would be polite :)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I added a comment! :D