@@ -473,4 +473,131 @@ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_
473
473
return rb_str_intern (msgpack_buffer_read_top_as_string (b , length , true, utf8 ));
474
474
}
475
475
476
+ // Hash keys are likely to be repeated, and are frozen.
477
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
478
+ // and save much more expensive lookups into the global fstring table.
479
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
480
+ // to be able to fit easily embeded inside msgpack_unpacker_t.
481
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
482
+ // performance.
483
+ #define MSGPACK_KEY_CACHE_CAPACITY 63
484
+
485
+ typedef struct msgpack_key_cache_t msgpack_key_cache_t ;
486
+ struct msgpack_key_cache_t {
487
+ int length ;
488
+ VALUE entries [MSGPACK_KEY_CACHE_CAPACITY ];
489
+ };
490
+
491
+ static inline VALUE build_interned_string (const char * str , const long length )
492
+ {
493
+ # ifdef HAVE_RB_ENC_INTERNED_STR
494
+ return rb_enc_interned_str (str , length , rb_utf8_encoding ());
495
+ # else
496
+ VALUE rstring = rb_utf8_str_new (str , length );
497
+ return rb_funcall (rb_str_freeze (rstring ), s_uminus , 0 );
498
+ # endif
499
+ }
500
+
501
+ static inline VALUE build_symbol (const char * str , const long length )
502
+ {
503
+ return rb_str_intern (build_interned_string (str , length ));
504
+ }
505
+
506
+ static void rvalue_cache_insert_at (msgpack_key_cache_t * cache , int index , VALUE rstring )
507
+ {
508
+ MEMMOVE (& cache -> entries [index + 1 ], & cache -> entries [index ], VALUE , cache -> length - index );
509
+ cache -> length ++ ;
510
+ cache -> entries [index ] = rstring ;
511
+ }
512
+
513
+ static inline int rstring_cache_cmp (const char * str , const long length , VALUE rstring )
514
+ {
515
+ long rstring_length = RSTRING_LEN (rstring );
516
+ if (length == rstring_length ) {
517
+ return memcmp (str , RSTRING_PTR (rstring ), length );
518
+ } else {
519
+ return (int )(length - rstring_length );
520
+ }
521
+ }
522
+
523
+ static VALUE rstring_cache_fetch (msgpack_key_cache_t * cache , const char * str , const long length )
524
+ {
525
+ int low = 0 ;
526
+ int high = cache -> length - 1 ;
527
+ int mid = 0 ;
528
+ int last_cmp = 0 ;
529
+
530
+ while (low <= high ) {
531
+ mid = (high + low ) >> 1 ;
532
+ VALUE entry = cache -> entries [mid ];
533
+ last_cmp = rstring_cache_cmp (str , length , entry );
534
+
535
+ if (last_cmp == 0 ) {
536
+ return entry ;
537
+ } else if (last_cmp > 0 ) {
538
+ low = mid + 1 ;
539
+ } else {
540
+ high = mid - 1 ;
541
+ }
542
+ }
543
+
544
+ VALUE rstring = build_interned_string (str , length );
545
+
546
+ if (cache -> length < MSGPACK_KEY_CACHE_CAPACITY ) {
547
+ if (last_cmp > 0 ) {
548
+ mid += 1 ;
549
+ }
550
+
551
+ rvalue_cache_insert_at (cache , mid , rstring );
552
+ }
553
+ return rstring ;
554
+ }
555
+
556
+ static VALUE rsymbol_cache_fetch (msgpack_key_cache_t * cache , const char * str , const long length )
557
+ {
558
+ int low = 0 ;
559
+ int high = cache -> length - 1 ;
560
+ int mid = 0 ;
561
+ int last_cmp = 0 ;
562
+
563
+ while (low <= high ) {
564
+ mid = (high + low ) >> 1 ;
565
+ VALUE entry = cache -> entries [mid ];
566
+ last_cmp = rstring_cache_cmp (str , length , rb_sym2str (entry ));
567
+
568
+ if (last_cmp == 0 ) {
569
+ return entry ;
570
+ } else if (last_cmp > 0 ) {
571
+ low = mid + 1 ;
572
+ } else {
573
+ high = mid - 1 ;
574
+ }
575
+ }
576
+
577
+ VALUE rsymbol = build_symbol (str , length );
578
+
579
+ if (cache -> length < MSGPACK_KEY_CACHE_CAPACITY ) {
580
+ if (last_cmp > 0 ) {
581
+ mid += 1 ;
582
+ }
583
+
584
+ rvalue_cache_insert_at (cache , mid , rsymbol );
585
+ }
586
+ return rsymbol ;
587
+ }
588
+
589
+ static inline VALUE msgpack_buffer_read_top_as_interned_symbol (msgpack_buffer_t * b , msgpack_key_cache_t * cache , size_t length )
590
+ {
591
+ VALUE result = rsymbol_cache_fetch (cache , b -> read_buffer , length );
592
+ _msgpack_buffer_consumed (b , length );
593
+ return result ;
594
+ }
595
+
596
+ static inline VALUE msgpack_buffer_read_top_as_interned_string (msgpack_buffer_t * b , msgpack_key_cache_t * cache , size_t length )
597
+ {
598
+ VALUE result = rstring_cache_fetch (cache , b -> read_buffer , length );
599
+ _msgpack_buffer_consumed (b , length );
600
+ return result ;
601
+ }
602
+
476
603
#endif
0 commit comments