diff --git a/src/main/java/org/openx/data/jsonserde/objectinspector/JsonObjectInspectorFactory.java b/src/main/java/org/openx/data/jsonserde/objectinspector/JsonObjectInspectorFactory.java index a27d2599..1c489dc8 100644 --- a/src/main/java/org/openx/data/jsonserde/objectinspector/JsonObjectInspectorFactory.java +++ b/src/main/java/org/openx/data/jsonserde/objectinspector/JsonObjectInspectorFactory.java @@ -16,6 +16,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; + import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; @@ -29,6 +30,7 @@ import org.openx.data.jsonserde.objectinspector.primitive.JavaStringDoubleObjectInspector; import org.openx.data.jsonserde.objectinspector.primitive.JavaStringFloatObjectInspector; import org.openx.data.jsonserde.objectinspector.primitive.JavaStringIntObjectInspector; +import org.openx.data.jsonserde.objectinspector.primitive.JavaStringJsonObjectInspector; import org.openx.data.jsonserde.objectinspector.primitive.JavaStringLongObjectInspector; import org.openx.data.jsonserde.objectinspector.primitive.JavaStringShortObjectInspector; import org.openx.data.jsonserde.objectinspector.primitive.JavaStringTimestampObjectInspector; @@ -171,6 +173,7 @@ public static JsonMapObjectInspector getJsonMapObjectInspector( = new EnumMap(PrimitiveCategory.class); static { + primitiveOICache.put(PrimitiveCategory.STRING, new JavaStringJsonObjectInspector()); primitiveOICache.put(PrimitiveCategory.BYTE, new JavaStringByteObjectInspector()); primitiveOICache.put(PrimitiveCategory.SHORT, new JavaStringShortObjectInspector()); primitiveOICache.put(PrimitiveCategory.INT, new JavaStringIntObjectInspector()); diff --git a/src/main/java/org/openx/data/jsonserde/objectinspector/primitive/JavaStringJsonObjectInspector.java b/src/main/java/org/openx/data/jsonserde/objectinspector/primitive/JavaStringJsonObjectInspector.java new file mode 100644 index 00000000..49d83cc7 --- /dev/null +++ b/src/main/java/org/openx/data/jsonserde/objectinspector/primitive/JavaStringJsonObjectInspector.java @@ -0,0 +1,48 @@ +package org.openx.data.jsonserde.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector; +import org.apache.hadoop.io.Text; +import org.apache.log4j.Logger; + +public class JavaStringJsonObjectInspector extends AbstractPrimitiveJavaObjectInspector + implements + SettableStringObjectInspector { + + Logger logger = Logger.getLogger(JavaStringJsonObjectInspector.class); + + public JavaStringJsonObjectInspector() { + super(PrimitiveObjectInspectorUtils.stringTypeEntry); + } + + @Override + public Text getPrimitiveWritableObject(Object o) { + return o == null ? null : new Text(((String) o.toString())); + } + + @Override + public String getPrimitiveJavaObject(Object o) { + return o == null ? null : o.toString(); + } + + @Override + public Object create(Text value) { + return value == null ? null : value.toString(); + } + + @Override + public Object set(Object o, Text value) { + return value == null ? null : value.toString(); + } + + @Override + public Object create(String value) { + return value; + } + + @Override + public Object set(Object o, String value) { + return value; + } +} diff --git a/src/test/java/org/openx/data/jsonserde/GetJsonObjectTest.java b/src/test/java/org/openx/data/jsonserde/GetJsonObjectTest.java new file mode 100644 index 00000000..842666a4 --- /dev/null +++ b/src/test/java/org/openx/data/jsonserde/GetJsonObjectTest.java @@ -0,0 +1,154 @@ +package org.openx.data.jsonserde; + +import static org.junit.Assert.*; + +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.udf.UDFJson; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.junit.Before; +import org.junit.Test; +import org.openx.data.jsonserde.json.JSONException; +import org.openx.data.jsonserde.json.JSONObject; + +/** + * Tests getJson Object + * + * @author snagmote + * + */ +public class GetJsonObjectTest { + + static JsonSerDe instance; + + @Before + public void setUp() throws Exception { + initialize(); + } + + static public void initialize() throws Exception { + instance = new JsonSerDe(); + Configuration conf = null; + Properties tbl = new Properties(); + // from google video API + tbl.setProperty(Constants.LIST_COLUMNS, "kind,etag,pageInfo,v_items"); + tbl.setProperty( + Constants.LIST_COLUMN_TYPES, + ("string,string," + "string," + + "ARRAY," + + "topicDetails:STRUCT,relevantTopicIds:ARRAY>" + + ">>").toLowerCase()); + tbl.setProperty("mapping.v_items", "items"); + tbl.setProperty("mapping.v_statistics", "statistics"); + + instance.initialize(conf, tbl); + tbl.setProperty("mapping.v_items", "items"); + tbl.setProperty("mapping.v_statistics", "statistics"); + + instance.initialize(conf, tbl); + } + + @Test + public void testGetJsonObject() throws SerDeException, JSONException { + Writable w = new Text( + "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": { \"totalResults\": 1, \"resultsPerPage\": 1 }, \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }"); + + JSONObject result = (JSONObject) instance.deserialize(w); + + StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector(); + + Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo")); + + StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo") + .getFieldObjectInspector(); + + UDFJson udfJson = new UDFJson(); + Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.totalresults"); + assertEquals("1", output.toString()); + + } + + @Test + public void testNestedGetJsonObject() throws SerDeException, JSONException { + Writable w = new Text( + "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": { \"pagehit\":{ \"kind\": \"youtube#video\" } ,\"totalResults\": 1, \"resultsPerPage\": 1 }, \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }"); + + StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector(); + JSONObject result = (JSONObject) instance.deserialize(w); + + Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo")); + + StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo") + .getFieldObjectInspector(); + + UDFJson udfJson = new UDFJson(); + Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.pagehit"); + assertEquals("{\"kind\":\"youtube#video\"}", output.toString()); + } + + @Test + public void testStringWhenNotJson() throws SerDeException, JSONException { + Writable w = new Text( + "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": \"page\", \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }"); + + StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector(); + JSONObject result = (JSONObject) instance.deserialize(w); + + Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo")); + + StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo") + .getFieldObjectInspector(); + + UDFJson udfJson = new UDFJson(); + Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.test_field"); + assertNull(output); + } + + @Test + public void testStringWhenFieldIsNotInJson() throws SerDeException, JSONException { + Writable w = new Text( + "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": { \"totalResults\": 1, \"resultsPerPage\": 1 }, \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }"); + + StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector(); + JSONObject result = (JSONObject) instance.deserialize(w); + + Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo")); + + StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo") + .getFieldObjectInspector(); + + UDFJson udfJson = new UDFJson(); + Text output = udfJson.evaluate(loi.getPrimitiveJavaObject(res), "$.test_field"); + assertNull(output); + } + + + @Test + public void testStringWhenJson() throws SerDeException, JSONException { + Writable w = new Text( + "{ \"kind\": \"youtube#videoListResponse\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/mYlS4-ghMGhc1wTFCwoQl3IYDZc\\\"\", \"pageInfo\": \"page\", \"items\": [ { \"kind\": \"youtube#video\", \"etag\": \"\\\"79S54kzisD_9SOTfQLu_0TVQSpY/A4foLs-VO317Po_ulY6b5mSimZA\\\"\", \"id\": \"wHkPb68dxEw\", \"statistics\": { \"viewCount\": \"9211\", \"likeCount\": \"79\", \"dislikeCount\": \"11\", \"favoriteCount\": \"0\", \"commentCount\": \"29\" }, \"topicDetails\": { \"topicIds\": [ \"/m/02mjmr\" ], \"relevantTopicIds\": [ \"/m/0cnfvd\", \"/m/01jdpf\" ] } } ] }"); + + StructObjectInspector soi = (StructObjectInspector) instance.getObjectInspector(); + JSONObject result = (JSONObject) instance.deserialize(w); + + Object res = soi.getStructFieldData(result, soi.getStructFieldRef("pageinfo")); + + StringObjectInspector loi = (StringObjectInspector) soi.getStructFieldRef("pageinfo") + .getFieldObjectInspector(); + + String sres = loi.getPrimitiveJavaObject(res); + assertEquals("page", sres); + + } + +}