@@ -1920,9 +1920,375 @@ IEnumerable<InlinedNode> enumerateTree(string id)
19201920 //Console.WriteLine($"nodeCountInlined: {nodeCountInlined,16:N0}");
19211921 }
19221922
1923+ static void SmTest20220917 ( )
1924+ {
1925+ var filename = @"" ;
1926+ var storePath = @"" ;
1927+ var tmpPath = Path . Combine ( storePath , "tmp" ) ;
1928+ if ( ! Directory . Exists ( storePath ) ) Directory . CreateDirectory ( storePath ) ;
1929+ if ( ! Directory . Exists ( tmpPath ) ) Directory . CreateDirectory ( tmpPath ) ;
1930+ _ = GenericChunk . Defs . Positions3d ;
1931+
1932+
1933+ var info = Laszip . LaszipInfo ( filename , ParseConfig . Default ) ;
1934+ Report . Line ( $ "{ new Cell ( info . Bounds ) } ") ;
1935+
1936+ var rootCell = new Cell ( info . Bounds ) ;
1937+ var rootDir = Path . Combine ( storePath , "root2" ) ;
1938+
1939+ //Check(rootDir); return;
1940+
1941+ static void Check ( string dir )
1942+ {
1943+ var fileCount = Directory . EnumerateFiles ( dir ) . Count ( ) ;
1944+ var subDirs = Directory . GetDirectories ( dir ) ;
1945+ var hasDirs = subDirs . Length > 0 ;
1946+
1947+ //Report.Line($"{fileCount,4} files | {subDirs.Length} subdirs | {dir}");
1948+
1949+ if ( hasDirs )
1950+ {
1951+ if ( fileCount > 0 ) Report . Error ( $ "[{ dir } ] has files AND dirs") ;
1952+
1953+ foreach ( var subDir in subDirs )
1954+ {
1955+ Check ( subDir ) ;
1956+ }
1957+ }
1958+ else
1959+ {
1960+ if ( fileCount == 0 )
1961+ {
1962+ Report . Error ( $ "[{ dir } ] empty") ;
1963+ }
1964+ else if ( fileCount > 1 )
1965+ {
1966+ Report . Error ( $ "[{ dir } ] { fileCount } files") ;
1967+ }
1968+ else
1969+ {
1970+ //Report.Line($"[{dir}] {fileCount} files");
1971+ //if (fileCount > 1)
1972+ //{
1973+ // var fns = Directory.GetFiles(dir);
1974+ // var merged = GenericChunk.ImmutableMerge(fns.Select(LoadChunk));
1975+ // if (merged.Count > 128 * 1024)
1976+ // {
1977+ // Console.ForegroundColor = ConsoleColor.Red;
1978+ // }
1979+ // else
1980+ // {
1981+ // Console.ForegroundColor = ConsoleColor.Green;
1982+ // }
1983+ // Console.WriteLine($"{fns.Length,4} files | {merged.Count,12:N0} points | {dir}");
1984+ // Console.ResetColor();
1985+ //}
1986+ }
1987+ }
1988+ }
1989+
1990+ static GenericChunk LoadChunk ( string path )
1991+ {
1992+ var buffer = File . ReadAllBytes ( path ) ;
1993+ var map = ( IReadOnlyDictionary < Durable . Def , object > ) DurableCodec . DeserializeDurableMap ( buffer . UnGZip ( ) ) ;
1994+ return new GenericChunk ( map ) ;
1995+ }
1996+
1997+ static byte [ ] SerializeChunk ( GenericChunk chunk )
1998+ => DurableCodec . SerializeDurableMap ( chunk . Data ) . Gzip ( ) ;
1999+
2000+ static void SaveChunk ( string path , GenericChunk chunk )
2001+ {
2002+ while ( true )
2003+ {
2004+ try
2005+ {
2006+ var bufferGzip = SerializeChunk ( chunk ) ;
2007+ File . WriteAllBytes ( path , bufferGzip ) ;
2008+ _ = File . ReadAllBytes ( path ) . UnGZip ( ) ; // check if file can be read back and decompressed
2009+ break ;
2010+ }
2011+ catch ( Exception e )
2012+ {
2013+ Report . Error ( $ "[{ path } ] { e . Message } ") ;
2014+ continue ;
2015+ }
2016+ }
2017+ }
2018+
2019+ {
2020+ var ts = new List < Task > ( ) ;
2021+ var q = new Queue < ( string fileName , byte [ ] content , string [ ] filesToDelete ) > ( ) ;
2022+
2023+ var runSaveTask = true ;
2024+ var saveTask = Task . Run ( ( ) =>
2025+ {
2026+ while ( q . Count > 0 || runSaveTask )
2027+ {
2028+ Report . Line ( $ "[QUEUE] length = { q . Count } ") ;
2029+ ( string fn , byte [ ] buffer , string [ ] filesToDelete ) x = default ;
2030+ var success = false ;
2031+ lock ( q )
2032+ {
2033+ if ( q . Count > 0 )
2034+ {
2035+ x = q . Dequeue ( ) ;
2036+ success = true ;
2037+ }
2038+ else
2039+ {
2040+ success = false ;
2041+ }
2042+ }
2043+
2044+ if ( success )
2045+ {
2046+ File . WriteAllBytes ( x . fn , x . buffer ) ;
2047+ foreach ( var fn in x . filesToDelete ) File . Delete ( fn ) ;
2048+ }
2049+ else
2050+ {
2051+ Task . Delay ( 1000 ) . Wait ( ) ;
2052+ }
2053+ }
2054+ } ) ;
2055+
2056+ var phase1Chunks = Directory . GetFiles ( tmpPath , "*.gz" ) . Where ( s => Path . GetFileName ( s ) . StartsWith ( "chunk-" ) ) . Take ( int . MaxValue ) . ToArray ( ) ;
2057+ if ( phase1Chunks . Length > 0 )
2058+ {
2059+ var totalCount = 0L ;
2060+ Report . BeginTimed ( "phase 2" ) ;
2061+
2062+ for ( var _i = 0 ; _i < phase1Chunks . Length ; _i ++ )
2063+ {
2064+ var fn = phase1Chunks [ _i ] ;
2065+ try
2066+ {
2067+ Report . Line ( $ "[[{ _i + 1 , 5 } /{ phase1Chunks . Length } ]] { fn } | { totalCount , 15 : N0} ") ;
2068+ var chunk = LoadChunk ( fn ) ;
2069+ Interlocked . Add ( ref totalCount , chunk . Count ) ;
2070+ SplitAndSave ( rootCell , rootDir , Path . GetFileName ( fn ) , chunk ) ;
2071+ }
2072+ catch ( Exception e )
2073+ {
2074+ Report . Error ( $ "[{ fn } ] { e } ") ;
2075+ }
2076+ }
2077+
2078+ while ( q . Count > 0 ) Task . Delay ( 1000 ) . Wait ( ) ;
2079+ Report . Line ( "merging" ) ;
2080+ Merge ( rootCell , rootDir ) ;
2081+
2082+
2083+ Report . Line ( "waiting for tasks to finish" ) ;
2084+ Task . WhenAll ( ts ) . Wait ( ) ;
2085+ Task . WhenAll ( ts ) . Wait ( ) ;
2086+ runSaveTask = false ;
2087+ Report . Line ( "waiting for queue worker to finish" ) ;
2088+ saveTask . Wait ( ) ;
2089+ Report . EndTimed ( ) ;
2090+
2091+ Check ( rootDir ) ;
2092+ return ;
2093+ }
2094+
2095+ void Merge ( Cell cell , string dir )
2096+ {
2097+ var fileCount = Directory . EnumerateFiles ( dir ) . Count ( ) ;
2098+ var subDirs = Directory . GetDirectories ( dir ) ;
2099+ var hasDirs = subDirs . Length > 0 ;
2100+
2101+ if ( hasDirs )
2102+ {
2103+ if ( fileCount > 0 ) throw new Exception ( $ "[{ dir } ] has files AND dirs") ;
2104+
2105+ foreach ( var subDir in subDirs )
2106+ {
2107+ var octantIndex = int . Parse ( Path . GetFileName ( subDir ) ) ;
2108+ var subCell = cell . GetOctant ( octantIndex ) ;
2109+ Merge ( subCell , subDir ) ;
2110+ }
2111+ }
2112+ else
2113+ {
2114+ if ( fileCount == 0 )
2115+ {
2116+ throw new Exception ( $ "[{ dir } ] empty") ;
2117+ }
2118+ else
2119+ {
2120+ if ( fileCount > 1 )
2121+ {
2122+ var fns = Directory . GetFiles ( dir ) ;
2123+ var merged = GenericChunk . ImmutableMerge ( fns . Select ( LoadChunk ) ) ;
2124+ foreach ( var fn in fns ) File . Delete ( fn ) ;
2125+
2126+ if ( merged . Count > 128 * 1024 )
2127+ {
2128+ SplitAndSave ( cell , dir , "merged.gz" , merged ) ;
2129+ Console . ForegroundColor = ConsoleColor . Red ;
2130+ }
2131+ else
2132+ {
2133+ ts . Add ( Task . Run ( ( ) =>
2134+ {
2135+ var fn = Path . Combine ( dir , "merged.gz" ) ;
2136+ var buffer = SerializeChunk ( merged ) ;
2137+ lock ( q ) q . Enqueue ( ( fn , buffer , fns ) ) ;
2138+ } ) ) ;
2139+
2140+ Console . ForegroundColor = ConsoleColor . Green ;
2141+ }
2142+
2143+ Console . WriteLine ( $ "{ fns . Length , 4 } files | { merged . Count , 12 : N0} points | { dir } ") ;
2144+ Console . ResetColor ( ) ;
2145+ }
2146+ }
2147+ }
2148+ }
2149+
2150+ void SplitAndSave ( Cell cell , string path , string filename , GenericChunk chunk )
2151+ {
2152+ var split = chunk . Count > 128 * 1024 ;
2153+
2154+ if ( Directory . Exists ( path ) )
2155+ {
2156+ if ( ! split && Directory . EnumerateDirectories ( path ) . Any ( ) )
2157+ {
2158+ split = true ;
2159+ }
2160+ }
2161+ else
2162+ {
2163+ Directory . CreateDirectory ( path ) ;
2164+ Report . Warn ( $ "create { cell } ") ;
2165+ }
2166+
2167+ if ( split )
2168+ {
2169+ {
2170+ var octants = chunk . Split ( cell ) ;
2171+ for ( var i = 0 ; i < 8 ; i ++ )
2172+ {
2173+ var o = octants [ i ] ;
2174+ if ( o == null ) continue ;
2175+ var subPath = Path . Combine ( path , i . ToString ( ) ) ;
2176+ SplitAndSave ( cell . GetOctant ( i ) , subPath , filename , o ) ;
2177+ }
2178+ }
2179+
2180+ var filesToPushDown = Directory . GetFiles ( path ) ;
2181+ foreach ( var fn in filesToPushDown )
2182+ {
2183+ Report . Line ( $ "push down { fn } ") ;
2184+ var chunkToPushDown = LoadChunk ( fn ) ;
2185+ var octants = chunkToPushDown . Split ( cell ) ;
2186+ for ( var i = 0 ; i < 8 ; i ++ )
2187+ {
2188+ var o = octants [ i ] ;
2189+ if ( o == null ) continue ;
2190+ var subPath = Path . Combine ( path , i . ToString ( ) ) ;
2191+ SplitAndSave ( cell . GetOctant ( i ) , subPath , Path . GetFileName ( fn ) , o ) ;
2192+ }
2193+ File . Delete ( fn ) ;
2194+ }
2195+ }
2196+ else
2197+ {
2198+ ts . Add ( Task . Run ( ( ) =>
2199+ {
2200+ var fn = Path . Combine ( path , filename ) ;
2201+ var buffer = SerializeChunk ( chunk ) ;
2202+ lock ( q ) q . Enqueue ( ( fn , buffer , Array . Empty < string > ( ) ) ) ;
2203+ } ) ) ;
2204+ //SaveChunk(Path.Combine(path, filename), chunk);
2205+ }
2206+ }
2207+ }
2208+
2209+ {
2210+ var phase1Chunks = Directory . GetFiles ( tmpPath , "*.gz" ) . Where ( s => Path . GetFileName ( s ) . StartsWith ( "chunk-" ) ) . ToArray ( ) ;
2211+ if ( phase1Chunks . Length > 0 )
2212+ {
2213+ var totalCount = 0L ;
2214+ Report . BeginTimed ( "reading phase 1 chunks (v0.0.2)" ) ;
2215+ var ts = new List < Task > ( ) ;
2216+ for ( var _i = 0 ; _i < phase1Chunks . Length ; _i ++ )
2217+ {
2218+ var fn = phase1Chunks [ _i ] ;
2219+ var buffer = File . ReadAllBytes ( fn ) ;
2220+ //Report.Line($"[[{_i + 1,5}/{phase1Chunks.Length}]] {fn} | {totalCount,15:N0}");
2221+
2222+ ts . Add ( Task . Run ( ( ) =>
2223+ {
2224+ try
2225+ {
2226+ var map = ( IReadOnlyDictionary < Durable . Def , object > ) DurableCodec . DeserializeDurableMap ( buffer . UnGZip ( ) ) ;
2227+ var chunk = new GenericChunk ( map ) ;
2228+ Interlocked . Add ( ref totalCount , chunk . Count ) ;
2229+ }
2230+ catch ( Exception e )
2231+ {
2232+ Report . Error ( $ "[{ fn } ] { e } ") ;
2233+ }
2234+ } ) ) ;
2235+ }
2236+ Task . WhenAll ( ts ) . Wait ( ) ;
2237+ Report . EndTimed ( ) ;
2238+ Report . Line ( $ "total point count: { totalCount : N0} ") ;
2239+ return ;
2240+ }
2241+ }
2242+
2243+ Report . BeginTimed ( "phase 1" ) ;
2244+ {
2245+ var n = 0L ;
2246+ var i = 0 ;
2247+ var ts = new List < Task > ( ) ;
2248+ foreach ( var _chunk in Laszip . Chunks ( filename , ParseConfig . Default . WithMaxChunkPointCount ( 1_000_000 ) ) )
2249+ {
2250+ n += _chunk . Count ;
2251+
2252+ var chunk = _chunk ; //.ImmutableDeduplicate(verbose: false);
2253+ var d = _chunk . Count - chunk . Count ;
2254+ Report . Line ( $ "[{ i ++ , 5 } ] { 100.0 * n / info . PointCount , 6 : N2} % | { d , 10 : N0} dups | { n , 10 : N0} /{ info . PointCount : N0} | { new Cell ( chunk . BoundingBox ) , - 20 } ") ;
2255+
2256+ var j = i ;
2257+ ts . Add ( Task . Run ( ( ) =>
2258+ {
2259+ var outFileName = Path . Combine ( tmpPath , $ "chunk-{ j : 00000} .gz") ;
2260+ SaveChunk ( outFileName , chunk . ToGenericChunk ( ) ) ;
2261+ } ) ) ;
2262+ }
2263+
2264+ Task . WhenAll ( ts ) . Wait ( ) ;
2265+ }
2266+ Report . EndTimed ( ) ;
2267+ return ;
2268+
2269+ //var info = Laszip.LaszipInfo(filename, ParseConfig.Default);
2270+ //return;
2271+
2272+
2273+
2274+ //var store = PointCloud.OpenStore(Path.Combine(storePath, "store.uds"));
2275+ //var config = ImportConfig.Default
2276+ // .WithStorage(store)
2277+ // .WithKey("root")
2278+ // .WithOctreeSplitLimit(65536*2)
2279+ // .WithVerbose(true)
2280+ // ;
2281+ //var pointset = store.GetPointSet("root") ?? PointCloud.Import(filename, config);
2282+ //var root = pointset.Root.Value;
2283+ //Console.WriteLine(root.BoundingBoxExactGlobal);
2284+ //foreach (var p in root.PositionsAbsolute.Take(10)) Console.WriteLine(p);
2285+ }
2286+
19232287 public static void Main ( string [ ] _ )
19242288 {
1925- SmTest20220815 ( ) ;
2289+ SmTest20220917 ( ) ;
2290+
2291+ //SmTest20220815();
19262292
19272293 //new InlinedNodeTests().CanInlineNode(); return;
19282294
0 commit comments