@@ -177,25 +177,76 @@ SdfUsdaFileFormat::~SdfUsdaFileFormat()
177177namespace
178178{
179179
180+ bool _CheckBOM (const char * bufferRead, size_t bufferSize) {
181+ // Check for UTF-8 BOM (EF BB BF)
182+ if (bufferSize >= 3 &&
183+ static_cast <unsigned char >(bufferRead[0 ]) == 0xEF &&
184+ static_cast <unsigned char >(bufferRead[1 ]) == 0xBB &&
185+ static_cast <unsigned char >(bufferRead[2 ]) == 0xBF ) {
186+ TF_WARN (" Asset starts with UTF-8 BOM which is not supported, "
187+ " please convert the file to UTF-8 without the BOM." );
188+ return false ;
189+ }
190+
191+ // Check for UTF-16 BOM markers (FE FF or FF FE)
192+ if (bufferSize >= 2 &&
193+ ((static_cast <unsigned char >(bufferRead[0 ]) == 0xFE &&
194+ static_cast <unsigned char >(bufferRead[1 ]) == 0xFF ) ||
195+ (static_cast <unsigned char >(bufferRead[0 ]) == 0xFF &&
196+ static_cast <unsigned char >(bufferRead[1 ]) == 0xFE ))) {
197+ TF_WARN (" Asset starts with UTF-16 BOM marker which is not supported, "
198+ " please convert the file to UTF-8 without the BOM." );
199+ return false ;
200+ }
201+
202+ // Check for UTF-32 BOM markers (00 00 FE FF or FF FE 00 00)
203+ if (bufferSize >= 4 &&
204+ ((static_cast <unsigned char >(bufferRead[0 ]) == 0x00 &&
205+ static_cast <unsigned char >(bufferRead[1 ]) == 0x00 &&
206+ static_cast <unsigned char >(bufferRead[2 ]) == 0xFE &&
207+ static_cast <unsigned char >(bufferRead[3 ]) == 0xFF ) ||
208+ (static_cast <unsigned char >(bufferRead[0 ]) == 0xFF &&
209+ static_cast <unsigned char >(bufferRead[1 ]) == 0xFE &&
210+ static_cast <unsigned char >(bufferRead[2 ]) == 0x00 &&
211+ static_cast <unsigned char >(bufferRead[3 ]) == 0x00 ))) {
212+ TF_WARN (" Asset starts with UTF-32 BOM marker which is not supported, "
213+ " please convert the file to UTF-8 without the BOM." );
214+ return false ;
215+ }
216+
217+ return true ;
218+ }
219+
180220bool
181221_CanReadImpl (const std::shared_ptr<ArAsset>& asset,
182- const std::string& cookie)
222+ const std::string& cookie,
223+ bool bomCheckWarning = true )
183224{
184225 TfErrorMark mark;
185226
227+ constexpr size_t BOM_CHECK_SIZE = 4 ;
186228 constexpr size_t COOKIE_BUFFER_SIZE = 512 ;
187229 char local[COOKIE_BUFFER_SIZE];
188230 std::unique_ptr<char []> remote;
189231 char *buf = local;
190232 size_t cookieLength = cookie.length ();
191- if (cookieLength > COOKIE_BUFFER_SIZE - 1 ) {
192- remote. reset ( new char [cookieLength + 1 ] );
233+ if (BOM_CHECK_SIZE + cookieLength > COOKIE_BUFFER_SIZE - 1 ) {
234+ remote = std::make_unique< char []>( cookieLength + BOM_CHECK_SIZE + 1 );
193235 buf = remote.get ();
194236 }
195- if (asset->Read (buf, cookieLength, /* offset = */ 0 ) != cookieLength) {
237+ // Maximum 4 bytes are needed to check for BOM markers.
238+ size_t bytesRead = asset->Read (buf, BOM_CHECK_SIZE + cookieLength, /* offset = */ 0 );
239+ // At least the cookie length is needed to check for the cookie.
240+ if (bytesRead < cookieLength) {
241+ return false ;
242+ }
243+
244+ // Check bom markers if requested
245+ if (bomCheckWarning && !_CheckBOM (buf, bytesRead)) {
196246 return false ;
197247 }
198248
249+ // It doesn't have BOM markers, so we can check the cookie.
199250 buf[cookieLength] = ' \0 ' ;
200251
201252 // Don't allow errors to escape this function, since this function is
@@ -224,15 +275,15 @@ SdfUsdaFileFormat::CanRead(const string& filePath) const
224275
225276 std::shared_ptr<ArAsset> asset = ArGetResolver ().OpenAsset (
226277 ArResolvedPath (filePath));
227- return asset && _CanReadImpl (asset, GetFileCookie ());
278+ return asset && _CanReadImpl (asset, GetFileCookie (), false );
228279}
229280
230281bool
231282SdfUsdaFileFormat::_CanReadFromAsset (
232283 const std::string& resolvedPath,
233284 const std::shared_ptr<ArAsset>& asset) const
234285{
235- return _CanReadImpl (asset, GetFileCookie ());
286+ return _CanReadImpl (asset, GetFileCookie (), false );
236287}
237288
238289bool
@@ -261,7 +312,7 @@ SdfUsdaFileFormat::_ReadFromAsset(
261312{
262313 // Quick check to see if the file has the magic cookie before spinning up
263314 // the parser.
264- if (!_CanReadImpl (asset, GetFileCookie ())) {
315+ if (!_CanReadImpl (asset, GetFileCookie (), true )) {
265316 TF_RUNTIME_ERROR (" <%s> is not a valid %s layer" ,
266317 resolvedPath.c_str (),
267318 GetFormatId ().GetText ());
0 commit comments