libwreport  3.6
bufr.h
1 #ifndef WREPORT_BUFFERS_BUFR_H
2 #define WREPORT_BUFFERS_BUFR_H
3 
4 #include <wreport/error.h>
5 #include <wreport/var.h>
6 #include <string>
7 #include <functional>
8 #include <cstdint>
9 
10 namespace wreport {
11 struct Var;
12 
13 namespace buffers {
14 
18 class BufrInput
19 {
20 protected:
25  void scan_section_length(unsigned sec_no);
26 
27 public:
29  const uint8_t* data;
30 
32  size_t data_len;
33 
41  const char* fname = nullptr;
42 
50  size_t start_offset = 0;
51 
53  unsigned s4_cursor = 0;
54 
56  uint8_t pbyte = 0;
57 
59  int pbyte_len = 0;
60 
62  unsigned sec[6];
63 
64 
71  BufrInput(const std::string& in);
72 
80  void scan_lead_sections();
81 
93  void scan_other_sections(bool has_optional);
94 
96  unsigned offset() const { return s4_cursor; }
97 
99  unsigned bits_left() const { return (data_len - s4_cursor) * 8 + pbyte_len; }
100 
102  inline unsigned read_byte(unsigned pos) const
103  {
104  return (unsigned)data[pos];
105  }
106 
108  inline unsigned read_byte(unsigned section, unsigned pos) const
109  {
110  return (unsigned)data[sec[section] + pos];
111  }
112 
114  unsigned read_number(unsigned pos, unsigned byte_len) const
115  {
116  unsigned res = 0;
117  for (unsigned i = 0; i < byte_len; ++i)
118  {
119  res <<= 8;
120  res |= data[pos + i];
121  }
122  return res;
123  }
124 
129  inline unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
130  {
131  return read_number(sec[section] + pos, byte_len);
132  }
133 
138  uint32_t get_bits(unsigned n)
139  {
140  uint32_t result = 0;
141 
142  if (s4_cursor == data_len)
143  parse_error("end of buffer while looking for %d bits of bit-packed data", n);
144 
145  // TODO: review and benchmark and possibly simplify
146  // (a possible alternative approach is to keep a current bitmask that
147  // starts at 0x80 and is shifted right by 1 at each read until it
148  // reaches 0, and get rid of pbyte_len)
149  for (unsigned i = 0; i < n; i++)
150  {
151  if (pbyte_len == 0)
152  {
153  pbyte_len = 8;
154  pbyte = data[s4_cursor++];
155  }
156  result <<= 1;
157  if (pbyte & 0x80)
158  result |= 1;
159  pbyte <<= 1;
160  pbyte_len--;
161  }
162 
163  return result;
164  }
165 
167  void debug_dump_next_bits(const char* desc, int count) const;
168 
170  void parse_error(const char* fmt, ...) const WREPORT_THROWF_ATTRS(2, 3);
171 
173  void parse_error(unsigned pos, const char* fmt, ...) const WREPORT_THROWF_ATTRS(3, 4);
174 
176  void parse_error(unsigned section, unsigned pos, const char* fmt, ...) const WREPORT_THROWF_ATTRS(4, 5);
177 
190  void check_available_data(unsigned pos, size_t datalen, const char* expected);
191 
206  void check_available_data(unsigned section, unsigned pos, size_t datalen, const char* expected);
207 
220  void decode_compressed_number(Var& dest, uint32_t base, unsigned diffbits);
221 
230  void decode_number(Var& dest);
231 
232  bool decode_compressed_base(Varinfo info, uint32_t& base, uint32_t& diffbits);
233 
238  void decode_compressed_number(Varinfo info, unsigned subsets, std::function<void(unsigned, Var&&)> dest)
239  {
240  // Data field base value
241  uint32_t base;
242 
243  // Number of bits used for each difference value
244  uint32_t diffbits;
245 
246  bool missing = decode_compressed_base(info, base, diffbits);
247  if (missing)
248  {
249  for (unsigned i = 0; i < subsets; ++i)
250  dest(i, Var(info));
251  }
252  else if (!diffbits)
253  {
254  Var var(info, info->decode_binary(base));
255  for (unsigned i = 0; i < subsets; ++i)
256  dest(i, Var(var));
257  }
258  else
259  {
260  Var var(info);
261  for (unsigned i = 0; i < subsets; ++i)
262  {
263  decode_compressed_number(var, base, diffbits);
264  dest(i, std::move(var));
265  }
266  }
267  }
268 
269  template<typename Adder>
270  void decode_string(Varinfo info, unsigned subsets, Adder& dest)
271  {
272  // Decode the base value
273  char str[info->bit_len / 8 + 2];
274  size_t len;
275  bool missing = !decode_string(info->bit_len, str, len);
276 
277  // Decode the number of bits (encoded in 6 bits) for each difference
278  // value
279  uint32_t diffbits = get_bits(6);
280 
281  if (missing && diffbits == 0)
282  dest.add_missing(info);
283  else if (diffbits == 0)
284  {
285  // Add the same string to all the subsets
286  dest.add_same(Var(info, str));
287  } else {
288  /* For compressed strings, the reference value must be all zeros */
289  for (size_t i = 0; i < len; ++i)
290  if (str[i] != 0)
291  error_unimplemented::throwf("compressed strings with %d bit deltas have non-zero reference value", diffbits);
292 
293  /* Let's also check that the number of
294  * difference characters is the same length as
295  * the reference string */
296  if (diffbits > len)
297  error_unimplemented::throwf("compressed strings with %zd characters have %d bit deltas (deltas should not be longer than field)", len, diffbits);
298 
299  for (unsigned i = 0; i < subsets; ++i)
300  {
301  // Set the variable value
302  if (decode_string(diffbits * 8, str, len))
303  {
304  // Compute the value for this subset
305  dest.add_var(i, Var(info, str));
306  } else {
307  // Missing value
308  dest.add_var(i, Var(info));
309  }
310  }
311  }
312  }
313 
314  template<typename Adder>
315  void decode_compressed_number(Varinfo info, unsigned subsets, Adder& dest)
316  {
317  // Data field base value
318  uint32_t base;
319 
320  // Number of bits used for each difference value
321  uint32_t diffbits;
322 
323  bool missing = decode_compressed_base(info, base, diffbits);
324  if (missing)
325  dest.add_missing(info);
326  else if (!diffbits)
327  dest.add_same(Var(info, info->decode_binary(base)));
328  else
329  {
330  Var var(info);
331  for (unsigned i = 0; i < subsets; ++i)
332  {
333  decode_compressed_number(var, base, diffbits);
334  dest.add_var(i, std::move(var));
335  }
336  }
337  }
338 
343  void decode_compressed_number(Varinfo info, unsigned associated_field_bits, unsigned subsets, std::function<void(unsigned, Var&&, uint32_t)> dest);
344 
356  void decode_compressed_semantic_number(Var& dest, unsigned subsets);
357 
374  bool decode_string(unsigned bit_len, char* str, size_t& len);
375 
387  void decode_string(Var& dest);
388 
400  void decode_string(Var& dest, unsigned subsets);
401 
406  void decode_string(Varinfo info, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
407 
419  void decode_binary(Var& dest);
420 
428  std::string decode_uncompressed_bitmap(unsigned size)
429  {
430  std::string buf;
431  buf.resize(size);
432  for (unsigned i = 0; i < size; ++i)
433  {
434  uint32_t val = get_bits(1);
435  buf[i] = (val == 0) ? '+' : '-';
436  }
437  return buf;
438  }
439 
453  std::string decode_compressed_bitmap(unsigned size)
454  {
455  std::string buf;
456  buf.resize(size);
457  for (unsigned i = 0; i < size; ++i)
458  {
459  uint32_t val = get_bits(1);
460  buf[i] = (val == 0) ? '+' : '-';
461  // Decode the number of bits (encoded in 6 bits) of difference
462  // values. It's odd to repeat this for each bit in the bitmap, but
463  // that's how things are transmitted and it's somewhat consistent
464  // with how data compression is specified
465  val = get_bits(6);
466  // If compressed, ensure that the difference bits are 0 and they are
467  // not trying to transmit odd things like delta bitmaps
468  if (val != 0)
469  parse_error("bitmap entry %u declares %u difference bits, but we only support 0", i, val);
470  }
471  return buf;
472  }
473 };
474 
479 {
481  std::string& out;
482 
484  uint8_t pbyte;
485 
488 
495  BufrOutput(std::string& out);
496 
500  void add_bits(uint32_t val, int n);
501 
506  void raw_append(const char* str, int len)
507  {
508  out.append(str, len);
509  }
510 
512  void append_short(unsigned short val)
513  {
514  add_bits(val, 16);
515  }
516 
518  void append_byte(unsigned char val)
519  {
520  add_bits(val, 8);
521  }
522 
524  void append_missing(unsigned len_bits)
525  {
526  add_bits(0xffffffff, len_bits);
527  }
528 
530  void append_string(const Var& var, unsigned len_bits);
531 
533  void append_string(const char* val, unsigned len_bits);
534 
536  void append_binary(const unsigned char* val, unsigned len_bits);
537 
539  void append_var(Varinfo info, const Var& var);
540 
542  void append_missing(Varinfo info);
543 
548  void flush();
549 };
550 
551 
552 }
553 }
554 
555 #endif
unsigned offset() const
Return the current decoding byte offset.
Definition: bufr.h:96
void parse_error(const char *fmt,...) const WREPORT_THROWF_ATTRS(2
Throw an error_parse at the current decoding location.
void decode_number(Var &dest)
Decode a number as described by dest.info(), and set it as value for dest.
void decode_compressed_semantic_number(Var &dest, unsigned subsets)
Decode a number as described by dest.info(), and set it as value for dest.
void append_byte(unsigned char val)
Append an 8 bits integer.
Definition: bufr.h:518
uint8_t pbyte
Byte to which we are appending bits to encode.
Definition: bufr.h:484
const uint8_t * data
Input buffer.
Definition: bufr.h:29
static void throwf(const char *fmt,...) WREPORT_THROWF_ATTRS(1
Throw the exception, building the message printf-style.
BufrInput(const std::string &in)
Wrap a string iinto a BufrInput.
A physical variable.
Definition: var.h:23
std::string decode_uncompressed_bitmap(unsigned size)
Decode an uncompressed bitmap of size bits.
Definition: bufr.h:428
void append_short(unsigned short val)
Append a 16 bits integer.
Definition: bufr.h:512
size_t start_offset
File offset of the start of the message.
Definition: bufr.h:50
unsigned read_byte(unsigned section, unsigned pos) const
Read a byte value at offset pos inside section section.
Definition: bufr.h:108
Binary buffer with bit-level append operations.
Definition: bufr.h:478
void decode_binary(Var &dest)
Decode a generic binary value as-is, as described by dest.info(), ad set it as value for dest...
const char * fname
Input file name (optional).
Definition: bufr.h:41
wreport exceptions.
void decode_compressed_number(Varinfo info, unsigned subsets, std::function< void(unsigned, Var &&)> dest)
Decode a number as described by info from a compressed bufr with subsets subsets, and send the result...
Definition: bufr.h:238
unsigned sec[6]
Offsets of the start of BUFR sections.
Definition: bufr.h:62
unsigned s4_cursor
Offset of the byte we are currently decoding.
Definition: bufr.h:53
double decode_binary(uint32_t val) const
Decode a double value from a decimal integer value using Varinfo binary encoding informations (bit_re...
uint8_t pbyte
Byte we are currently decoding.
Definition: bufr.h:56
void append_missing(unsigned len_bits)
Append a missing value len_bits long.
Definition: bufr.h:524
Information about a variable.
Definition: varinfo.h:135
size_t data_len
Input buffer size.
Definition: bufr.h:32
void raw_append(const char *str, int len)
Append a string len bits long to the output buffer as it is, ignoring partially encoded bits...
Definition: bufr.h:506
unsigned read_byte(unsigned pos) const
Read a byte value at offset pos.
Definition: bufr.h:102
int pbyte_len
Bits left in pbyte to decode.
Definition: bufr.h:59
void scan_lead_sections()
Scan the message filling in the sec[] array of start offsets of sections 0 and 1. ...
unsigned bit_len
Length in bits of the variable when encoded as an unsigned binary value.
Definition: varinfo.h:170
String functions.
Definition: benchmark.h:13
void debug_dump_next_bits(const char *desc, int count) const
Dump to stderr &#39;count&#39; bits of &#39;buf&#39;, starting at the &#39;ofs-th&#39; bit.
std::string decode_compressed_bitmap(unsigned size)
Decode a "compressed" bitmap of size bits.
Definition: bufr.h:453
int pbyte_len
Number of bits already encoded in pbyte.
Definition: bufr.h:487
void scan_other_sections(bool has_optional)
Scan the message filling in the sec[] array of section start offsets of all sections from 2 on...
#define WREPORT_THROWF_ATTRS(a, b)
Tell the compiler that a function always throws and expects printf-style arguments.
Definition: error.h:56
void scan_section_length(unsigned sec_no)
Scan length of section sec_no, filling in the start of the next section in sec[sec_no + 1]...
void decode_compressed_number(Var &dest, uint32_t base, unsigned diffbits)
Decode a compressed number as described by dest.info(), ad set it as value for dest.
Binary buffer with bit-level read operations.
Definition: bufr.h:18
uint32_t get_bits(unsigned n)
Get the integer value of the next &#39;n&#39; bits from the decode input n must be <= 32. ...
Definition: bufr.h:138
std::string & out
Output buffer to which we append encoded data.
Definition: bufr.h:481
void void void void check_available_data(unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos; throw error_parse ...
unsigned read_number(unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos.
Definition: bufr.h:114
unsigned bits_left() const
Return the number of bits left in the message to be decoded.
Definition: bufr.h:99
unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos inside section section...
Definition: bufr.h:129