@@ -157,6 +157,21 @@ class IcebergTransformE2ETest : public IcebergTestBase {
157157    return  partitionDirs;
158158  }
159159
160+   std::vector<std::string> listDirectoriesRecursively (const  std::string& path) {
161+     std::vector<std::string> allDirs;
162+     auto  firstLevelDirs = listFirstLevelDirectories (path);
163+     allDirs.insert (allDirs.end (), firstLevelDirs.begin (), firstLevelDirs.end ());
164+ 
165+     for  (const  auto & dir : firstLevelDirs) {
166+       if  (std::filesystem::is_directory (dir)) {
167+         auto  subDirs = listDirectoriesRecursively (dir);
168+         allDirs.insert (allDirs.end (), subDirs.begin (), subDirs.end ());
169+       }
170+     }
171+ 
172+     return  allDirs;
173+   }
174+ 
160175  //  Verify the number of partitions and their naming convention.
161176  void  verifyPartitionCount (
162177      const  std::string& outputPath,
@@ -895,4 +910,120 @@ TEST_F(IcebergTransformE2ETest, timestampHourPartitioning) {
895910  }
896911}
897912
913+ TEST_F (IcebergTransformE2ETest, partitionFolderNamingConventions) {
914+   auto  intVector = makeFlatVector<int32_t >(1 , [](auto ) { return  42 ; });
915+   auto  bigintVector =
916+       makeFlatVector<int64_t >(1 , [](auto ) { return  9876543210 ; });
917+   auto  varcharVector =
918+       BaseVector::create<FlatVector<StringView>>(VARCHAR (), 1 , opPool_.get ());
919+   varcharVector->set (0 , StringView (" test string"  ));
920+ 
921+   auto  varcharVector2 =
922+       BaseVector::create<FlatVector<StringView>>(VARCHAR (), 1 , opPool_.get ());
923+   varcharVector2->setNull (0 , true );
924+ 
925+   auto  decimalVector =
926+       BaseVector::create<FlatVector<int64_t >>(DECIMAL (18 , 3 ), 1 , opPool_.get ());
927+   decimalVector->set (0 , 1234567890 );
928+ 
929+   auto  varbinaryVector =
930+       BaseVector::create<FlatVector<StringView>>(VARBINARY (), 1 , opPool_.get ());
931+   std::string binaryData = " binary\0 data\1\2\3 "  ;
932+   varbinaryVector->set (0 , StringView (binaryData));
933+ 
934+   auto  rowVector = makeRowVector (
935+       {" c_int"  ,
936+        " c_bigint"  ,
937+        " c_varchar"  ,
938+        " c_varchar2"  ,
939+        " c_decimal"  ,
940+        " c_varbinary"  },
941+       {intVector,
942+        bigintVector,
943+        varcharVector,
944+        varcharVector2,
945+        decimalVector,
946+        varbinaryVector});
947+   auto  outputDirectory = TempDirectoryPath::create ();
948+   auto  dataSink = createIcebergDataSink (
949+       asRowType (rowVector->type ()),
950+       outputDirectory->getPath (),
951+       {" c_int"  ,
952+        " c_bigint"  ,
953+        " c_varchar"  ,
954+        " c_decimal"  ,
955+        " c_varbinary"  ,
956+        " c_varchar2"  });
957+ 
958+   dataSink->appendData (rowVector);
959+   ASSERT_TRUE (dataSink->finish ());
960+   dataSink->close ();
961+ 
962+   verifyTotalRowCount (
963+       asRowType (rowVector->type ()), outputDirectory->getPath (), 1 );
964+   auto  dataPath = fmt::format (" {}"  , outputDirectory->getPath ());
965+   auto  partitionDirs = listDirectoriesRecursively (dataPath);
966+ 
967+   const  std::string expectedIntFolder = " c_int=42"  ;
968+   const  std::string expectedBigintFolder = " c_bigint=9876543210"  ;
969+   const  std::string expectedVarcharFolder = " c_varchar=test+string"  ;
970+   const  std::string expectedVarcharFolder2 = " c_varchar2=null"  ;
971+   const  std::string expectedDecimalFolder = " c_decimal=1234567.890"  ;
972+   const  std::string expectedVarbinary = " c_varbinary="   +
973+       encoding::Base64::encode (binaryData.data (), binaryData.size ());
974+ 
975+   bool  foundIntPartition = false ;
976+   bool  foundBigintPartition = false ;
977+   bool  foundVarcharPartition = false ;
978+   bool  foundVarcharPartition2 = false ;
979+   bool  foundDecimalPartition = false ;
980+   bool  foundVarbinaryPartition = false ;
981+ 
982+   for  (const  auto & dir : partitionDirs) {
983+     const  auto  dirName = std::filesystem::path (dir).filename ().string ();
984+ 
985+     if  (dirName == expectedIntFolder) {
986+       foundIntPartition = true ;
987+       verifyPartitionData (asRowType (rowVector->type ()), dir, " c_int = 42"  , 1 );
988+     } else  if  (dirName == expectedBigintFolder) {
989+       foundBigintPartition = true ;
990+       verifyPartitionData (
991+           asRowType (rowVector->type ()), dir, " c_bigint = 9876543210"  , 1 );
992+     } else  if  (dirName == expectedVarcharFolder) {
993+       foundVarcharPartition = true ;
994+       verifyPartitionData (
995+           asRowType (rowVector->type ()), dir, " c_varchar = 'test string'"  , 1 );
996+     } else  if  (dirName == expectedVarcharFolder2) {
997+       foundVarcharPartition2 = true ;
998+       verifyPartitionData (
999+           asRowType (rowVector->type ()), dir, " c_varchar2 IS NULL"  , 1 );
1000+     } else  if  (dirName == expectedDecimalFolder) {
1001+       foundDecimalPartition = true ;
1002+       verifyPartitionData (
1003+           asRowType (rowVector->type ()),
1004+           dir,
1005+           " c_decimal = DECIMAL '1234567.890'"  ,
1006+           1 );
1007+     } else  if  (dirName.find (expectedVarbinary) == 0 ) {
1008+       foundVarbinaryPartition = true ;
1009+       verifyPartitionData (
1010+           asRowType (rowVector->type ()), dir, " c_varbinary IS NOT NULL"  , 1 );
1011+     }
1012+   }
1013+ 
1014+   ASSERT_TRUE (foundIntPartition)
1015+       << " Integer partition folder not found: "   << expectedIntFolder;
1016+   ASSERT_TRUE (foundBigintPartition)
1017+       << " Bigint partition folder not found: "   << expectedBigintFolder;
1018+   ASSERT_TRUE (foundVarcharPartition)
1019+       << " Varchar partition folder not found: "   << expectedVarcharFolder;
1020+   ASSERT_TRUE (foundVarcharPartition2)
1021+       << " Varchar2 partition folder not found: "   << expectedVarcharFolder2;
1022+   ASSERT_TRUE (foundDecimalPartition)
1023+       << " Decimal partition folder not found: "   << expectedDecimalFolder;
1024+   ASSERT_TRUE (foundVarbinaryPartition)
1025+       << " Varbinary partition folder not found with prefix: " 
1026+       << expectedVarbinary;
1027+ }
1028+ 
8981029} //  namespace facebook::velox::connector::hive::iceberg::test
0 commit comments