Skip to content

Commit

Permalink
Stubbed out message types
Browse files Browse the repository at this point in the history
  • Loading branch information
Kardbord committed May 9, 2024
1 parent bfee1fc commit fe88fa4
Showing 1 changed file with 72 additions and 14 deletions.
86 changes: 72 additions & 14 deletions analyzer/protobuf.spicy
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,83 @@ module protobuf;

import zeek;

# TODO: Our example here models a simple example packet format of static size:
# 19 payload bytes, followed by the protocol number for the next layer, for
# which the data then follows subsequently. (This is just what our test trace
# happens to contain). Adapt as suitable.
public type Packet = unit {
payload: bytes &size=19;
protocol: uint16; # next-layer protocol, values need to be mapped to analyzers in Zeek scriptland
payload: Message;

on %done {
# Feed into Zeek's next-layer packet analysis.
zeek::forward_packet(self.protocol);
zeek::forward_packet(self.payload);
}
};

type WireType = enum {
VARINT = 0x00,
I64 = 0x01,
LEN = 0x02,
SGROUP = 0x03, # deprecated
EGROUP = 0x04, # deprecated
I32 = 0x05
# message := (tag value)*
# A message is encoded as a sequence of zero or more pairs of tags and values.
type Message = unit {
# TODO
};

# tag := (field << 3) | wire_type
# A tag is a combination of a wire_type, stored in the least significant three bits,
# and the field number that is defined in the .proto file.
# Encoded as uint32 varint.
type Tag = unit {
# TODO
};

# value := varint for wire_type == VARINT,
# i32 for wire_type == I32,
# i64 for wire_type == I64,
# len-prefix for wire_type == LEN,
# <empty> for wire_type == SGROUP or EGROUP
# A value is stored differently depending on the wire_type specified in the tag.
type Value = unit {
# TODO
};

# varint := int32 | int64 | uint32 | uint64 | bool | enum | sint32 | sint64
# Encoded as varints (sintN are ZigZag-encoded first).
type VarInt = unit {
# TODO
};

# i32 := sfixed32 | fixed32 | float
# Encoded as 4-byte little-endian.
# memcpy of the equivalent C types (u?int32_t, float)
type I32 = unit {
# TODO
};

# i64 := sfixed64 | fixed64 | double
# Encoded as 8-byte little-endian.
# memcpy of the equivalent C types (u?int64_t, double)
type I64 = unit {
# TODO
};

# len-prefix := size (message | string | bytes | packed)
# A length-prefixed value is stored as a length (encoded as a varint), and then one
# of the listed data types.
# Size encoded as int32 varint
type LenPrefix = unit {
# TODO
};

# string := valid UTF-8 string (e.g. ASCII)
# As described, a string must use UTF-8 character encoding. A string cannot exceed 2GB.
type String = unit {
# TODO
};

# bytes := any sequence of 8-bit bytes
# As described, bytes can store custom data types, up to 2GB in size.
type Bytes = unit {
# TODO
};

# packed := varint* | i32* | i64*
# Use the packed data type when you are storing consecutive values of the type described
# in the protocol definition. The tag is dropped for values after the first, which
# amortizes the costs of tags to one per field, rather than per element.
type Packed = unit {
# TODO
};

0 comments on commit fe88fa4

Please sign in to comment.