@@ -77,43 +77,81 @@ object FactorGenerationStage extends DatagenStage {
7777 .format(" org.apache.spark.sql.execution.datasources.csv.CSVFileFormat" )
7878 .option(" header" , " true" )
7979 .option(" delimiter" , " |" )
80- .load(s " ${args.outputDir}/raw/transfer/*.csv " )
81- .select($" fromId" , $" toId" , $" amount" .cast(" double" ), $" createTime" )
80+ .load(s " ${args.outputDir}/snapshot/AccountTransferAccount.csv " )
81+ .select(
82+ $" fromId" ,
83+ $" toId" ,
84+ $" amount" .cast(" double" ),
85+ (unix_timestamp(
86+ coalesce(
87+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss.SSS" ),
88+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss" )
89+ )
90+ ) * 1000 ).alias(" createTime" )
91+ )
8292
8393 val withdrawRDD = spark.read
8494 .format(" org.apache.spark.sql.execution.datasources.csv.CSVFileFormat" )
8595 .option(" header" , " true" )
8696 .option(" delimiter" , " |" )
87- .load(s " ${args.outputDir}/raw/withdraw/*.csv " )
88- .select($" fromId" , $" toId" , $" amount" .cast(" double" ), $" createTime" )
97+ .load(s " ${args.outputDir}/snapshot/AccountWithdrawAccount.csv " )
98+ .select(
99+ $" fromId" ,
100+ $" toId" ,
101+ $" amount" .cast(" double" ),
102+ (unix_timestamp(
103+ coalesce(
104+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss.SSS" ),
105+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss" )
106+ )
107+ ) * 1000 ).alias(" createTime" )
108+ )
89109
90110 val depositRDD = spark.read
91111 .format(" org.apache.spark.sql.execution.datasources.csv.CSVFileFormat" )
92112 .option(" header" , " true" )
93113 .option(" delimiter" , " |" )
94- .load(s " ${args.outputDir}/raw/deposit/* .csv " )
114+ .load(s " ${args.outputDir}/snapshot/LoanDepositAccount .csv " )
95115 .select($" accountId" , $" loanId" )
96116
97117 val personInvestRDD = spark.read
98118 .format(" org.apache.spark.sql.execution.datasources.csv.CSVFileFormat" )
99119 .option(" header" , " true" )
100120 .option(" delimiter" , " |" )
101- .load(s " ${args.outputDir}/raw/personInvest/*.csv " )
102- .select($" investorId" , $" companyId" , $" createTime" )
121+ .load(s " ${args.outputDir}/snapshot/PersonInvestCompany.csv " )
122+ .select(
123+ $" investorId" ,
124+ $" companyId" ,
125+ (unix_timestamp(
126+ coalesce(
127+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss.SSS" ),
128+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss" )
129+ )
130+ ) * 1000 ).alias(" createTime" )
131+ )
103132
104133 val OwnRDD = spark.read
105134 .format(" org.apache.spark.sql.execution.datasources.csv.CSVFileFormat" )
106135 .option(" header" , " true" )
107136 .option(" delimiter" , " |" )
108- .load(s " ${args.outputDir}/raw/personOwnAccount/* .csv " )
137+ .load(s " ${args.outputDir}/snapshot/PersonOwnAccount .csv " )
109138 .select($" personId" , $" accountId" )
110139
111140 val personGuaranteeRDD = spark.read
112141 .format(" org.apache.spark.sql.execution.datasources.csv.CSVFileFormat" )
113142 .option(" header" , " true" )
114143 .option(" delimiter" , " |" )
115- .load(s " ${args.outputDir}/raw/personGuarantee/*.csv " )
116- .select($" fromId" , $" toId" , $" createTime" )
144+ .load(s " ${args.outputDir}/snapshot/PersonGuaranteePerson.csv " )
145+ .select(
146+ $" fromId" ,
147+ $" toId" ,
148+ (unix_timestamp(
149+ coalesce(
150+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss.SSS" ),
151+ to_timestamp($" createTime" , " yyyy-MM-dd HH:mm:ss" )
152+ )
153+ ) * 1000 ).alias(" createTime" )
154+ )
117155
118156 def transformItems (
119157 df : DataFrame ,
0 commit comments