@@ -39,16 +39,20 @@ public JwBibleHarvester(ILogger logger, DownloadUtility downloadUtility, IDataPe
3939 /// </summary>
4040 public IReadOnlyDictionary < ( string LanguageCode , string PublicationCode ) , string > LocalizedPublicationNames => localizedPublicationNames ;
4141
42- internal async Task HarvestBibleLinks (
42+ /// <summary>
43+ /// Discovery phase: Discovers all languages for all publications and sections using alllangs=1 and langwritten=E.
44+ /// This runs FIRST before any harvesting or seeding.
45+ /// </summary>
46+ internal async Task DiscoverLanguages (
4347 Dictionary < string , string > biblePublicationCodeToNameMappings ,
44- ConcurrentDictionary < string , LanguageInfo > languageCodeToInfoMappings ,
45- ConcurrentDictionary < string , List < string > > languageCodeToEditionsMapping ,
4648 bool isTestRun = false )
4749 {
50+ Logger . Information ( "=== DISCOVERY PHASE: Discovering languages for all publications and sections ===" ) ;
51+
4852 foreach ( var publication in biblePublicationCodeToNameMappings )
4953 {
5054 var publicationCode = publication . Key ;
51- Logger . Information ( "Starting harvest for publication: {PublicationCode} ({PublicationName})" , publicationCode , publication . Value ) ;
55+ Logger . Information ( "Discovering languages for publication: {PublicationCode} ({PublicationName})" , publicationCode , publication . Value ) ;
5256
5357 // Discover languages for each book (1-66) and save for English
5458 var allDiscoveredLanguages = await DiscoverLanguagesForAllBooks ( publicationCode , publication . Value , isTestRun ) ;
@@ -60,7 +64,6 @@ internal async Task HarvestBibleLinks(
6064 }
6165
6266 // Save language discovery results for English
63- // Save language discovery
6467 if ( dataPersister != null )
6568 {
6669 var languageCodeToNameMapping = allDiscoveredLanguages . ToDictionary ( kvp => kvp . Key , kvp => kvp . Value . Name ) ;
@@ -76,6 +79,61 @@ internal async Task HarvestBibleLinks(
7679 {
7780 await dataPersister . SavePublicationLanguages ( publicationCode , allDiscoveredLanguages ) ;
7881 }
82+ }
83+
84+ Logger . Information ( "=== DISCOVERY PHASE COMPLETED ===" ) ;
85+ }
86+
87+ internal async Task HarvestBibleLinks (
88+ Dictionary < string , string > biblePublicationCodeToNameMappings ,
89+ ConcurrentDictionary < string , LanguageInfo > languageCodeToInfoMappings ,
90+ ConcurrentDictionary < string , List < string > > languageCodeToEditionsMapping ,
91+ bool isTestRun = false )
92+ {
93+ foreach ( var publication in biblePublicationCodeToNameMappings )
94+ {
95+ var publicationCode = publication . Key ;
96+ Logger . Information ( "Starting harvest for publication: {PublicationCode} ({PublicationName})" , publicationCode , publication . Value ) ;
97+
98+ // Discovery already happened in Phase 1, so get discovered languages from dataPersister
99+ // If dataPersister is not available or doesn't have the data, fall back to discovery
100+ Dictionary < string , LanguageInfo > ? allDiscoveredLanguages = null ;
101+
102+ if ( dataPersister is DbSeeder dbSeeder )
103+ {
104+ // Try to get discovered languages from the data store
105+ var normalizedPublicationCode = publicationCode . ToLowerInvariant ( ) ;
106+ if ( dbSeeder . PublicationLanguages . TryGetValue ( normalizedPublicationCode , out var discoveredLangs ) )
107+ {
108+ allDiscoveredLanguages = discoveredLangs ;
109+ Logger . Debug ( "Using discovered languages from discovery phase for publication {PublicationCode}" , publicationCode ) ;
110+ }
111+ }
112+
113+ // Fallback: If discovery data not available, discover now (shouldn't happen if discovery phase ran)
114+ if ( allDiscoveredLanguages == null || allDiscoveredLanguages . Count == 0 )
115+ {
116+ Logger . Warning ( "No discovered languages found for publication {PublicationCode} in data store. Running discovery now..." , publicationCode ) ;
117+ allDiscoveredLanguages = await DiscoverLanguagesForAllBooks ( publicationCode , publication . Value , isTestRun ) ;
118+
119+ if ( allDiscoveredLanguages == null || allDiscoveredLanguages . Count == 0 )
120+ {
121+ Logger . Warning ( "No languages discovered for publication {PublicationCode}. Skipping." , publicationCode ) ;
122+ continue ;
123+ }
124+
125+ // Save discovered languages if not already saved
126+ if ( dataPersister != null )
127+ {
128+ await dataPersister . SavePublicationLanguages ( publicationCode , allDiscoveredLanguages ) ;
129+ }
130+ }
131+
132+ if ( allDiscoveredLanguages == null || allDiscoveredLanguages . Count == 0 )
133+ {
134+ Logger . Warning ( "No languages available for publication {PublicationCode}. Skipping." , publicationCode ) ;
135+ continue ;
136+ }
79137
80138 // Verify English (E) is available (it will be seeded separately after discovery)
81139 if ( ! allDiscoveredLanguages . TryGetValue ( "E" , out var englishLanguageInfo ) )
0 commit comments