@@ -184,9 +184,8 @@ absl::Status GetNumSamplesToPadAtEndAndValidate(
184184}
185185
186186absl::Status InitializeSubstreamData (
187+ uint32_t required_samples_to_delay,
187188 const SubstreamIdLabelsMap& substream_id_to_labels,
188- const absl::flat_hash_map<uint32_t , std::unique_ptr<EncoderBase>>&
189- substream_id_to_encoder,
190189 const size_t num_samples_per_frame,
191190 bool user_samples_to_trim_at_start_includes_codec_delay,
192191 const uint32_t user_samples_to_trim_at_start,
@@ -198,17 +197,9 @@ absl::Status InitializeSubstreamData(
198197 // samples will occur later to keep trimming logic in one place as much as
199198 // possible.
200199 for (const auto & [substream_id, labels] : substream_id_to_labels) {
201- const auto encoder_iter = substream_id_to_encoder.find (substream_id);
202- if (encoder_iter == substream_id_to_encoder.end ()) {
203- return absl::InvalidArgumentError (absl::StrCat (
204- " Failed to find encoder for substream ID= " , substream_id));
205- }
206-
207- uint32_t encoder_required_samples_to_delay =
208- encoder_iter->second ->GetNumberOfSamplesToDelayAtStart ();
209200 if (user_samples_to_trim_at_start_includes_codec_delay) {
210201 MAYBE_RETURN_IF_NOT_OK (ValidateUserStartTrimIncludesCodecDelay (
211- user_samples_to_trim_at_start, encoder_required_samples_to_delay ));
202+ user_samples_to_trim_at_start, required_samples_to_delay ));
212203 }
213204
214205 // Initialize a `SubstreamData` with virtual samples for any delay
@@ -217,17 +208,17 @@ absl::Status InitializeSubstreamData(
217208 const auto & [substream_data_iter, inserted] =
218209 substream_id_to_substream_data.emplace (
219210 substream_id,
220- SubstreamData{. substream_id = substream_id,
221- . frames_in_obu = SubstreamFrames<InternalSampleType>(
222- num_channels, num_samples_per_frame),
223- . frames_to_encode = SubstreamFrames< int32_t >(
224- num_channels, num_samples_per_frame),
225- . output_gains_linear = {} ,
226- . num_samples_to_trim_at_end = 0 ,
227- . num_samples_to_trim_at_start =
228- encoder_required_samples_to_delay });
211+ SubstreamData{
212+ . substream_id = substream_id,
213+ . frames_in_obu = SubstreamFrames<InternalSampleType>(
214+ num_channels, num_samples_per_frame),
215+ . frames_to_encode = SubstreamFrames< int32_t >(
216+ num_channels, num_samples_per_frame) ,
217+ . output_gains_linear = {} ,
218+ . num_samples_to_trim_at_end = 0 ,
219+ . num_samples_to_trim_at_start = required_samples_to_delay });
229220 substream_data_iter->second .frames_in_obu .PadZeros (
230- encoder_required_samples_to_delay );
221+ required_samples_to_delay );
231222 }
232223
233224 return absl::OkStatus ();
@@ -571,7 +562,7 @@ absl::Status ApplyUserTrimForFrame(const bool from_start,
571562 if (num_samples_trimmed_in_obu > frame_samples_to_trim) {
572563 return absl::InvalidArgumentError (
573564 absl::StrCat (" More samples were trimmed from the " , start_or_end_string,
574- " than expected: (" , num_samples_trimmed_in_obu, " vs " ,
565+ " than expected: (" , num_samples_trimmed_in_obu, " vs " ,
575566 frame_samples_to_trim, " )" ));
576567 }
577568
@@ -654,6 +645,34 @@ AudioFrameGenerator::Create(
654645 codec_config_obu_metadata.codec_config ();
655646 }
656647
648+ // Initialize all of the encoders.
649+ absl::flat_hash_map<uint32_t , std::unique_ptr<EncoderBase>>
650+ substream_id_to_encoder;
651+ for (const auto & audio_frame_metadata : audio_frame_metadatas) {
652+ const DecodedUleb128 audio_element_id =
653+ audio_frame_metadata.audio_element_id ();
654+ const auto audio_elements_iter = audio_elements.find (audio_element_id);
655+ if (audio_elements_iter == audio_elements.end ()) {
656+ return absl::InvalidArgumentError (absl::StrCat (
657+ " Audio Element with ID= " , audio_element_id, " not found" ));
658+ }
659+
660+ // Create an encoder for each substream.
661+ RETURN_IF_NOT_OK (GetEncodingDataAndInitializeEncoders (
662+ codec_config_metadata, audio_elements_iter->second ,
663+ substream_id_to_encoder));
664+ }
665+
666+ // Get the global maximum delay among all encoders. IAMF requires that all
667+ // substreams have the same number of samples trimmed at the start. When
668+ // mixing multiple codec config OBUs, codecs that do not traditionally have
669+ // delay may need delay added for alignment.
670+ uint32_t max_codec_delay = 0 ;
671+ for (const auto & [substream_id, encoder] : substream_id_to_encoder) {
672+ max_codec_delay =
673+ std::max (max_codec_delay, encoder->GetNumberOfSamplesToDelayAtStart ());
674+ }
675+
657676 const auto & first_audio_frame_metadata = *audio_frame_metadatas.begin ();
658677 const int64_t common_samples_to_trim_at_start = static_cast <int64_t >(
659678 first_audio_frame_metadata.samples_to_trim_at_start ());
@@ -664,11 +683,8 @@ AudioFrameGenerator::Create(
664683 const bool common_samples_to_trim_at_start_includes_codec_delay =
665684 first_audio_frame_metadata
666685 .samples_to_trim_at_start_includes_codec_delay ();
667-
668686 absl::flat_hash_map<DecodedUleb128, absl::flat_hash_set<ChannelLabel::Label>>
669687 audio_element_id_to_labels;
670- absl::flat_hash_map<uint32_t , std::unique_ptr<EncoderBase>>
671- substream_id_to_encoder;
672688 absl::flat_hash_map<uint32_t , SubstreamData> substream_id_to_substream_data;
673689 absl::flat_hash_map<uint32_t , TrimmingState> substream_id_to_trimming_state;
674690 for (const auto & audio_frame_metadata : audio_frame_metadatas) {
@@ -687,7 +703,6 @@ AudioFrameGenerator::Create(
687703 " Audio Element with ID= " , audio_element_id, " not found" ));
688704 }
689705
690- // Create an encoder for each substream.
691706 const AudioElementWithData& audio_element_with_data =
692707 audio_elements_iter->second ;
693708 const auto num_samples_per_frame =
@@ -696,13 +711,9 @@ AudioFrameGenerator::Create(
696711 return absl::InvalidArgumentError (
697712 " The spec disallows trimming multiple frames from the end." );
698713 }
699- RETURN_IF_NOT_OK (GetEncodingDataAndInitializeEncoders (
700- codec_config_metadata, audio_element_with_data,
701- substream_id_to_encoder));
702-
703714 // Intermediate data for all substreams belonging to an Audio Element.
704715 RETURN_IF_NOT_OK (InitializeSubstreamData (
705- audio_element_with_data.substream_id_to_labels , substream_id_to_encoder ,
716+ max_codec_delay, audio_element_with_data.substream_id_to_labels ,
706717 num_samples_per_frame,
707718 audio_frame_metadata.samples_to_trim_at_start_includes_codec_delay (),
708719 audio_frame_metadata.samples_to_trim_at_start (),
@@ -736,8 +747,7 @@ AudioFrameGenerator::Create(
736747 const int64_t additional_samples_to_trim_at_start =
737748 common_samples_to_trim_at_start_includes_codec_delay
738749 ? 0
739- : substream_id_to_encoder[substream_id]
740- ->GetNumberOfSamplesToDelayAtStart ();
750+ : max_codec_delay;
741751 substream_id_to_trimming_state[substream_id] = {
742752 .increment_samples_to_trim_at_end_by_padding =
743753 !audio_frame_metadata.samples_to_trim_at_end_includes_padding (),
0 commit comments