From d542d21c643c36623b782b73519ff236b0037e9f Mon Sep 17 00:00:00 2001 From: Anu Sudarsan Date: Fri, 20 Jul 2018 08:03:09 +0200 Subject: [PATCH] Switch AVRO InstanceCache to use Caffeine cache The InstanceCache was made time based cache to workaround google/guava#2408 as a part of 1d2f31e. The time-based cache did not work for all workloads, especially with the hardcoded 1 MINUTE ttl value. In addition the default concurrencyLevel of 4 was not good enough for the cache to work out-of-the-box. So switch the InstanceCache to use size-based Caffeine (https://github.com/ben-manes/caffeine) cache which has the fix for the original guava bug google/guava#2408. Also in terms of concurrencyLevel switching to Caffeine helps. From https://github.com/ben-manes/caffeine/wiki/Benchmarks: "Caffeine and ConcurrentLinkedHashMap size their internal structures based on the number of CPUs" --- pom.xml | 6 ++++++ .../apache/hadoop/hive/serde2/avro/InstanceCache.java | 11 +++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index 8235be3..1b59821 100644 --- a/pom.xml +++ b/pom.xml @@ -377,6 +377,12 @@ + + com.github.ben-manes.caffeine + caffeine + 2.6.1 + + com.twitter diff --git a/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java b/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java index c3b2f60..0206871 100644 --- a/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java +++ b/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java @@ -13,20 +13,19 @@ */ package org.apache.hadoop.hive.serde2.avro; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Cache; import java.util.Set; -import java.util.concurrent.TimeUnit; /** - * This is a thread-safe, time-bounded fork of the Hive version. + * This is a thread-safe, size-bounded fork of the Hive version. * It also includes the correctness fix from HIVE-11288. */ public abstract class InstanceCache { - private final Cache cache = CacheBuilder.newBuilder() - .expireAfterWrite(1, TimeUnit.MINUTES) + private final Cache cache = Caffeine.newBuilder() + .maximumSize(100_000) .build(); protected InstanceCache() {}