@@ -424,18 +424,17 @@ TEST_F(IcebergTransformE2ETest, yearPartitioning) {
424424
425425  auto  dataPath = fmt::format (" {}"  , outputDirectory->getPath ());
426426  auto  partitionDirs = listFirstLevelDirectories (dataPath);
427-   std::unordered_map<int32_t , int32_t > yearToYearsSince1970 = {
428-       {2020 , 50 }, {2021 , 51 }, {2022 , 52 }, {2023 , 53 }, {2024 , 54 }, {2025 , 55 }};
429427
430-   for  (const   auto & [ year, yearsSince1970] : yearToYearsSince1970 ) {
431-     const  auto  expectedDirName = fmt::format (" c_date_year={}"  , yearsSince1970 );
428+   for  (int32_t  year =  2020 ;  year <=  2025 ; year++ ) {
429+     const  auto  expectedDirName = fmt::format (" c_date_year={}"  , year );
432430    bool  foundPartition = false ;
433431    auto  yearFilter = [](const  int32_t  year) -> std::string {
434432      return  fmt::format (
435433          " YEAR(DATE '{}-01-01')={}"  ,
436434          std::to_string (year),
437435          std::to_string (year));
438436    };
437+ 
439438    for  (const  auto & dir : partitionDirs) {
440439      const  auto  dirName = std::filesystem::path (dir).filename ().string ();
441440      if  (dirName == expectedDirName) {
@@ -791,10 +790,9 @@ TEST_F(IcebergTransformE2ETest, timestampYearPartitioning) {
791790  for  (const  auto & dir : partitionDirs) {
792791    const  auto  dirName = std::filesystem::path (dir).filename ().string ();
793792    auto  [c, v] = buildFilter (dirName);
794-     auto  yearsSince1970 = std::stoi (v);
795-     auto  year = 1970  + yearsSince1970;
793+     auto  year = std::stoi (v);
796794    std::string filter = fmt::format (" YEAR(c_timestamp) = {}"  , year);
797-     auto  expectedRowCount = yearToExpectedCount[ year] ;
795+     auto  expectedRowCount = yearToExpectedCount. at ( year) ;
798796    verifyPartitionData (rowType_, dir, filter, expectedRowCount);
799797  }
800798}
@@ -820,28 +818,44 @@ TEST_F(IcebergTransformE2ETest, timestampMonthPartitioning) {
820818
821819  auto  dataPath = fmt::format (" {}"  , outputDirectory->getPath ());
822820  auto  partitionDirs = listFirstLevelDirectories (dataPath);
821+   std::unordered_map<std::string, int32_t > monthToExpectedCount;
822+ 
823+   for  (const  auto & batch : batches) {
824+     auto  timestampVector = batch->childAt (6 )->as <SimpleVector<Timestamp>>();
825+     for  (vector_size_t  i = 0 ; i < batch->size (); i++) {
826+       if  (!timestampVector->isNullAt (i)) {
827+         Timestamp ts = timestampVector->valueAt (i);
828+         std::tm tm;
829+         if  (Timestamp::epochToCalendarUtc (ts.getSeconds (), tm)) {
830+           int32_t  year = tm.tm_year  + 1900 ;
831+           int32_t  month = tm.tm_mon  + 1 ;
832+           std::string monthKey = fmt::format (" {:04d}-{:02d}"  , year, month);
833+           monthToExpectedCount[monthKey]++;
834+         }
835+       }
836+     }
837+   }
838+ 
823839  for  (const  auto & dir : partitionDirs) {
824840    const  auto  dirName = std::filesystem::path (dir).filename ().string ();
825841    auto  [c, v] = buildFilter (dirName);
826-     auto  monthsSince1970 = std::stoi (v);
827-     auto  yearsSince1970 = monthsSince1970 / 12 ;
828-     auto  monthOfYear = (monthsSince1970 % 12 ) + 1 ;
829-     auto  year = 1970  + yearsSince1970;
830-     std::string filter = fmt::format (
831-         " YEAR(c_timestamp) = {} AND MONTH(c_timestamp) = {}"  ,
832-         year,
833-         monthOfYear);
842+     size_t  dashPos = v.find (' -'  );
843+     ASSERT_NE (dashPos, std::string::npos) << " Invalid month format: "   << v;
834844
835-     verifyPartitionData (rowType_, dir, filter, 0 , true );
845+     int32_t  year = std::stoi (v.substr (0 , dashPos));
846+     int32_t  month = std::stoi (v.substr (dashPos + 1 ));
847+     std::string filter = fmt::format (
848+         " YEAR(c_timestamp) = {} AND MONTH(c_timestamp) = {}"  , year, month);
849+     std::string monthKey = fmt::format (" {:04d}-{:02d}"  , year, month);
850+     auto  expectedCount = monthToExpectedCount[monthKey];
851+     verifyPartitionData (rowType_, dir, filter, expectedCount);
836852  }
837853}
838854
839855TEST_F (IcebergTransformE2ETest, timestampDayPartitioning) {
840856  constexpr  auto  numBatches = 2 ;
841857  constexpr  auto  rowsPerBatch = 100 ;
842- 
843858  auto  batches = createTestData (numBatches, rowsPerBatch);
844- 
845859  auto  outputDirectory = TempDirectoryPath::create ();
846860  auto  dataSink = createIcebergDataSink (
847861      rowType_, outputDirectory->getPath (), {" day(c_timestamp)"  });
@@ -858,27 +872,55 @@ TEST_F(IcebergTransformE2ETest, timestampDayPartitioning) {
858872
859873  auto  dataPath = fmt::format (" {}"  , outputDirectory->getPath ());
860874  auto  partitionDirs = listFirstLevelDirectories (dataPath);
875+   std::unordered_map<std::string, int32_t > dayToExpectedCount;
876+   for  (const  auto & batch : batches) {
877+     auto  timestampVector = batch->childAt (6 )->as <SimpleVector<Timestamp>>();
878+     for  (vector_size_t  i = 0 ; i < batch->size (); i++) {
879+       if  (!timestampVector->isNullAt (i)) {
880+         Timestamp ts = timestampVector->valueAt (i);
881+         std::tm tm;
882+         if  (Timestamp::epochToCalendarUtc (ts.getSeconds (), tm)) {
883+           int32_t  year = tm.tm_year  + 1900 ;
884+           int32_t  month = tm.tm_mon  + 1 ;
885+           int32_t  day = tm.tm_mday ;
886+           std::string dayKey =
887+               fmt::format (" {:04d}-{:02d}-{:02d}"  , year, month, day);
888+           dayToExpectedCount[dayKey]++;
889+         }
890+       }
891+     }
892+   }
893+ 
861894  for  (const  auto & dir : partitionDirs) {
862895    const  auto  dirName = std::filesystem::path (dir).filename ().string ();
863896    auto  [c, v] = buildFilter (dirName);
864-     auto  daysSince1970 = std::stoi (v);
897+     std::vector<std::string> dateParts;
898+     folly::split (' -'  , v, dateParts);
899+     ASSERT_EQ (dateParts.size (), 3 ) << " Invalid day format: "   << v;
900+ 
901+     int32_t  year = std::stoi (dateParts[0 ]);
902+     int32_t  month = std::stoi (dateParts[1 ]);
903+     int32_t  day = std::stoi (dateParts[2 ]);
865904
866905    std::string filter = fmt::format (
867-         " c_timestamp >= TIMESTAMP '1970-01-01' + INTERVAL  {} DAY AND  " 
868-         " c_timestamp < TIMESTAMP '1970-01-01' + INTERVAL {} DAY "  ,
869-         daysSince1970 ,
870-         daysSince1970 +  1 );
906+         " YEAR( c_timestamp) = {} AND MONTH(c_timestamp) =  {} AND  DAY(c_timestamp) = {} " , 
907+         year ,
908+         month ,
909+         day );
871910
872-     verifyPartitionData (rowType_, dir, filter, 0 , true );
911+     //  Get expected count for this day
912+     std::string dayKey = fmt::format (" {:04d}-{:02d}-{:02d}"  , year, month, day);
913+     auto  expectedCount = dayToExpectedCount[dayKey];
914+ 
915+     //  Verify partition data with actual row count check
916+     verifyPartitionData (rowType_, dir, filter, expectedCount);
873917  }
874918}
875919
876920TEST_F (IcebergTransformE2ETest, timestampHourPartitioning) {
877921  constexpr  auto  numBatches = 2 ;
878922  constexpr  auto  rowsPerBatch = 100 ;
879- 
880923  auto  batches = createTestData (numBatches, rowsPerBatch);
881- 
882924  auto  outputDirectory = TempDirectoryPath::create ();
883925  auto  dataSink = createIcebergDataSink (
884926      rowType_, outputDirectory->getPath (), {" hour(c_timestamp)"  });
@@ -895,18 +937,50 @@ TEST_F(IcebergTransformE2ETest, timestampHourPartitioning) {
895937
896938  auto  dataPath = fmt::format (" {}"  , outputDirectory->getPath ());
897939  auto  partitionDirs = listFirstLevelDirectories (dataPath);
940+   std::unordered_map<std::string, int32_t > hourToExpectedCount;
941+ 
942+   for  (const  auto & batch : batches) {
943+     auto  timestampVector = batch->childAt (6 )->as <SimpleVector<Timestamp>>();
944+     for  (vector_size_t  i = 0 ; i < batch->size (); i++) {
945+       if  (!timestampVector->isNullAt (i)) {
946+         Timestamp ts = timestampVector->valueAt (i);
947+         std::tm tm;
948+         if  (Timestamp::epochToCalendarUtc (ts.getSeconds (), tm)) {
949+           int32_t  year = tm.tm_year  + 1900 ;
950+           int32_t  month = tm.tm_mon  + 1 ;
951+           int32_t  day = tm.tm_mday ;
952+           int32_t  hour = tm.tm_hour ;
953+           std::string hourKey = fmt::format (
954+               " {:04d}-{:02d}-{:02d}-{:02d}"  , year, month, day, hour);
955+           hourToExpectedCount[hourKey]++;
956+         }
957+       }
958+     }
959+   }
898960
899961  for  (const  auto & dir : partitionDirs) {
900962    const  auto  dirName = std::filesystem::path (dir).filename ().string ();
901963    auto  [c, v] = buildFilter (dirName);
902-     auto  hoursSince1970 = std::stoi (v);
903-     std::string filter = fmt::format (
904-         " c_timestamp >= TIMESTAMP '1970-01-01' + INTERVAL {} HOUR AND " 
905-         " c_timestamp < TIMESTAMP '1970-01-01' + INTERVAL {} HOUR"  ,
906-         hoursSince1970,
907-         hoursSince1970 + 1 );
964+     std::vector<std::string> dateParts;
965+     folly::split (' -'  , v, dateParts);
966+     ASSERT_EQ (dateParts.size (), 4 ) << " Invalid hour format: "   << v;
908967
909-     verifyPartitionData (rowType_, dir, filter, 0 , true );
968+     int32_t  year = std::stoi (dateParts[0 ]);
969+     int32_t  month = std::stoi (dateParts[1 ]);
970+     int32_t  day = std::stoi (dateParts[2 ]);
971+     int32_t  hour = std::stoi (dateParts[3 ]);
972+ 
973+     std::string filter = fmt::format (
974+         " YEAR(c_timestamp) = {} AND MONTH(c_timestamp) = {} AND " 
975+         " DAY(c_timestamp) = {} AND HOUR(c_timestamp) = {}"  ,
976+         year,
977+         month,
978+         day,
979+         hour);
980+     std::string hourKey =
981+         fmt::format (" {:04d}-{:02d}-{:02d}-{:02d}"  , year, month, day, hour);
982+     auto  expectedCount = hourToExpectedCount[hourKey];
983+     verifyPartitionData (rowType_, dir, filter, expectedCount);
910984  }
911985}
912986
0 commit comments