|
12 | 12 | // See the License for the specific language governing permissions and |
13 | 13 | // limitations under the License. |
14 | 14 |
|
15 | | -use std::str::FromStr; |
16 | | - |
17 | 15 | use anyhow::anyhow; |
18 | 16 | use futures_async_stream::try_stream; |
19 | | -use risingwave_common::cast::{ |
20 | | - str_to_date, str_to_time, str_to_timestamp, str_with_time_zone_to_timestamptz, |
21 | | -}; |
22 | | -use risingwave_common::error::ErrorCode::{InternalError, ProtocolError}; |
| 17 | +use risingwave_common::error::ErrorCode::ProtocolError; |
23 | 18 | use risingwave_common::error::{Result, RwError}; |
24 | | -use risingwave_common::types::{DataType, Datum, Decimal, ScalarImpl}; |
| 19 | +use risingwave_common::types::{DataType, Datum}; |
25 | 20 | use risingwave_common::util::iter_util::ZipEqFast; |
26 | 21 | use simd_json::{BorrowedValue, StaticNode, ValueAccess}; |
27 | 22 |
|
| 23 | +use crate::impl_common_parser_logic; |
28 | 24 | use crate::parser::canal::operators::*; |
29 | | -use crate::parser::common::json_object_smart_get_value; |
| 25 | +use crate::parser::common::{do_parse_simd_json_value, json_object_smart_get_value}; |
30 | 26 | use crate::parser::util::at_least_one_ok; |
31 | 27 | use crate::parser::{SourceStreamChunkRowWriter, WriteGuard}; |
32 | | -use crate::source::{SourceColumnDesc, SourceContextRef}; |
33 | | -use crate::{ensure_rust_type, ensure_str, impl_common_parser_logic}; |
| 28 | +use crate::source::{SourceColumnDesc, SourceContextRef, SourceFormat}; |
34 | 29 |
|
35 | 30 | const AFTER: &str = "data"; |
36 | 31 | const BEFORE: &str = "old"; |
@@ -104,7 +99,6 @@ impl CanalJsonParser { |
104 | 99 | }) |
105 | 100 | }) |
106 | 101 | .collect::<Vec<Result<_>>>(); |
107 | | - |
108 | 102 | at_least_one_ok(results) |
109 | 103 | } |
110 | 104 | CANAL_UPDATE_EVENT => { |
@@ -152,7 +146,6 @@ impl CanalJsonParser { |
152 | 146 | }) |
153 | 147 | }) |
154 | 148 | .collect::<Vec<Result<_>>>(); |
155 | | - |
156 | 149 | at_least_one_ok(results) |
157 | 150 | } |
158 | 151 | CANAL_DELETE_EVENT => { |
@@ -195,70 +188,41 @@ fn cannal_simd_json_parse_value( |
195 | 188 | ) -> Result<Datum> { |
196 | 189 | match value { |
197 | 190 | None | Some(BorrowedValue::Static(StaticNode::Null)) => Ok(None), |
198 | | - Some(v) => Ok(Some(cannal_do_parse_simd_json_value(dtype, v).map_err( |
199 | | - |e| { |
| 191 | + Some(v) => Ok(Some( |
| 192 | + do_parse_simd_json_value(&SourceFormat::CanalJson, dtype, v).map_err(|e| { |
200 | 193 | tracing::warn!("failed to parse type '{}' from json: {}", dtype, e); |
201 | 194 | anyhow!("failed to parse type '{}' from json: {}", dtype, e) |
202 | | - }, |
203 | | - )?)), |
| 195 | + })?, |
| 196 | + )), |
204 | 197 | } |
205 | 198 | } |
206 | 199 |
|
207 | | -#[inline] |
208 | | -fn cannal_do_parse_simd_json_value(dtype: &DataType, v: &BorrowedValue<'_>) -> Result<ScalarImpl> { |
209 | | - let v = match dtype { |
210 | | - // mysql use tinyint to represent boolean |
211 | | - DataType::Boolean => ScalarImpl::Bool(ensure_rust_type!(v, i16) != 0), |
212 | | - DataType::Int16 => ScalarImpl::Int16(ensure_rust_type!(v, i16)), |
213 | | - DataType::Int32 => ScalarImpl::Int32(ensure_rust_type!(v, i32)), |
214 | | - DataType::Int64 => ScalarImpl::Int64(ensure_rust_type!(v, i64)), |
215 | | - DataType::Float32 => ScalarImpl::Float32(ensure_rust_type!(v, f32).into()), |
216 | | - DataType::Float64 => ScalarImpl::Float64(ensure_rust_type!(v, f64).into()), |
217 | | - // FIXME: decimal should have more precision than f64 |
218 | | - DataType::Decimal => Decimal::from_str(ensure_str!(v, "string")) |
219 | | - .map_err(|_| anyhow!("parse decimal from string err {}", v))? |
220 | | - .into(), |
221 | | - DataType::Varchar => ensure_str!(v, "varchar").to_string().into(), |
222 | | - DataType::Date => str_to_date(ensure_str!(v, "date"))?.into(), |
223 | | - DataType::Time => str_to_time(ensure_str!(v, "time"))?.into(), |
224 | | - DataType::Timestamp => str_to_timestamp(ensure_str!(v, "string"))?.into(), |
225 | | - DataType::Timestamptz => { |
226 | | - str_with_time_zone_to_timestamptz(ensure_str!(v, "string"))?.into() |
227 | | - } |
228 | | - _ => { |
229 | | - return Err(RwError::from(InternalError(format!( |
230 | | - "cannal data source not support type {}", |
231 | | - dtype |
232 | | - )))) |
233 | | - } |
234 | | - }; |
235 | | - Ok(v) |
236 | | -} |
237 | | - |
238 | 200 | #[cfg(test)] |
239 | 201 | mod tests { |
240 | | - |
241 | 202 | use std::str::FromStr; |
242 | 203 |
|
243 | 204 | use risingwave_common::array::Op; |
244 | 205 | use risingwave_common::cast::str_to_timestamp; |
245 | 206 | use risingwave_common::row::Row; |
246 | | - use risingwave_common::types::{DataType, Decimal, ScalarImpl, ToOwnedDatum}; |
| 207 | + use risingwave_common::types::{DataType, Decimal, JsonbVal, ScalarImpl, ToOwnedDatum}; |
| 208 | + use serde_json::Value; |
247 | 209 |
|
248 | 210 | use super::*; |
249 | 211 | use crate::parser::SourceStreamChunkBuilder; |
250 | 212 | use crate::source::SourceColumnDesc; |
251 | 213 |
|
252 | 214 | #[tokio::test] |
253 | 215 | async fn test_data_types() { |
254 | | - let payload = br#"{"id":0,"database":"test","table":"data_type","pkNames":["id"],"isDdl":false,"type":"INSERT","es":1682057341424,"ts":1682057382913,"sql":"","sqlType":{"id":4,"tinyint":-6,"smallint":5,"mediumint":4,"int":4,"bigint":-5,"float":7,"double":8,"decimal":3,"date":91,"datetime":93,"time":92,"timestamp":93,"char":1,"varchar":12,"binary":2004,"varbinary":2004,"blob":2004,"text":2005,"enum":4,"set":-7},"mysqlType":{"binary":"binary","varbinary":"varbinary","enum":"enum","set":"set","bigint":"bigint","float":"float","datetime":"datetime","varchar":"varchar","smallint":"smallint","mediumint":"mediumint","double":"double","date":"date","char":"char","id":"int","tinyint":"tinyint","decimal":"decimal","blob":"blob","text":"text","int":"int","time":"time","timestamp":"timestamp"},"old":null,"data":[{"id":"1","tinyint":"5","smallint":"136","mediumint":"172113","int":"1801160058","bigint":"3916589616287113937","float":"0","double":"0.15652","decimal":"1.20364700","date":"2023-04-20","datetime":"2023-02-15 13:01:36","time":"20:23:41","timestamp":"2022-10-13 12:12:54","char":"Kathleen","varchar":"atque esse fugiat et quibusdam qui.","binary":"Joseph\u0000\u0000\u0000\u0000","varbinary":"Douglas","blob":"ducimus ut in commodi necessitatibus error magni repellat exercitationem!","text":"rerum sunt nulla quo quibusdam velit doloremque.","enum":"1","set":"1"}]}"#; |
| 216 | + let payload = br#"{"id":0,"database":"test","table":"data_type","pkNames":["id"],"isDdl":false,"type":"INSERT","es":1682057341424,"ts":1682057382913,"sql":"","sqlType":{"id":4,"tinyint":-6,"smallint":5,"mediumint":4,"int":4,"bigint":-5,"float":7,"double":8,"decimal":3,"date":91,"datetime":93,"time":92,"timestamp":93,"char":1,"varchar":12,"binary":2004,"varbinary":2004,"blob":2004,"text":2005,"enum":4,"set":-7,"json":12},"mysqlType":{"binary":"binary","varbinary":"varbinary","enum":"enum","set":"set","bigint":"bigint","float":"float","datetime":"datetime","varchar":"varchar","smallint":"smallint","mediumint":"mediumint","double":"double","date":"date","char":"char","id":"int","tinyint":"tinyint","decimal":"decimal","blob":"blob","text":"text","int":"int","time":"time","timestamp":"timestamp","json":"json"},"old":null,"data":[{"id":"1","tinyint":"5","smallint":"136","mediumint":"172113","int":"1801160058","bigint":"3916589616287113937","float":"0","double":"0.15652","decimal":"1.20364700","date":"2023-04-20","datetime":"2023-02-15 13:01:36","time":"20:23:41","timestamp":"2022-10-13 12:12:54","char":"Kathleen","varchar":"atque esse fugiat et quibusdam qui.","binary":"Joseph\u0000\u0000\u0000\u0000","varbinary":"Douglas","blob":"ducimus ut in commodi necessitatibus error magni repellat exercitationem!","text":"rerum sunt nulla quo quibusdam velit doloremque.","enum":"1","set":"1","json":"{\"a\": 1, \"b\": 2}"}]}"#; |
255 | 217 | let descs = vec![ |
256 | 218 | SourceColumnDesc::simple("id", DataType::Int32, 0.into()), |
257 | 219 | SourceColumnDesc::simple("date", DataType::Date, 1.into()), |
258 | 220 | SourceColumnDesc::simple("datetime", DataType::Timestamp, 2.into()), |
259 | 221 | SourceColumnDesc::simple("time", DataType::Time, 3.into()), |
260 | 222 | SourceColumnDesc::simple("timestamp", DataType::Timestamp, 4.into()), |
261 | 223 | SourceColumnDesc::simple("char", DataType::Varchar, 5.into()), |
| 224 | + SourceColumnDesc::simple("binary", DataType::Bytea, 6.into()), |
| 225 | + SourceColumnDesc::simple("json", DataType::Jsonb, 7.into()), |
262 | 226 | ]; |
263 | 227 | let parser = CanalJsonParser::new(descs.clone(), Default::default()).unwrap(); |
264 | 228 |
|
@@ -299,6 +263,18 @@ mod tests { |
299 | 263 | row.datum_at(5).to_owned_datum(), |
300 | 264 | Some(ScalarImpl::Utf8(Box::from("Kathleen".to_string()))) |
301 | 265 | ); |
| 266 | + assert_eq!( |
| 267 | + row.datum_at(6).to_owned_datum(), |
| 268 | + Some(ScalarImpl::Bytea(Box::from( |
| 269 | + "Joseph\u{0}\u{0}\u{0}\u{0}".as_bytes() |
| 270 | + ))) |
| 271 | + ); |
| 272 | + assert_eq!( |
| 273 | + row.datum_at(7).to_owned_datum(), |
| 274 | + Some(ScalarImpl::Jsonb(JsonbVal::from(Value::from( |
| 275 | + "{\"a\": 1, \"b\": 2}".to_string() |
| 276 | + )))) |
| 277 | + ); |
302 | 278 | } |
303 | 279 |
|
304 | 280 | #[tokio::test] |
|
0 commit comments