Skip to content
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 52 additions & 41 deletions sherpa-onnx/csrc/file-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,30 @@
#include "sherpa-onnx/csrc/file-utils.h"

#include <fstream>
#include <filesystem>
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

你好,我们现在避免使用 filesystem 这个头文件. 详见 #2998

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的谢谢

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

您好,看ai审阅还有超过4GB的大文件的读取问题,需要考虑循环分块读取吗?

#include <memory>
#include <sstream>
#include <string>
#include <vector>

#ifdef _WIN32
#include <windows.h>
#else
#include <limits.h>
#include <stdlib.h>
#endif

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {
std::wstring ToWideString(const std::string &s);

bool FileExists(const std::string &filename) {
return std::ifstream(filename).good();
try {
#ifdef _WIN32
std::wstring wide_path = ToWideString(filename);
std::filesystem::path file_path(wide_path);
#else
std::filesystem::path file_path(filename);
#endif
return std::filesystem::exists(file_path) &&
std::filesystem::is_regular_file(file_path);
} catch (const std::exception&) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you describe which function can throw in the code?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

谢谢,确实触发概率太低,我将去掉

return false;
}
}

void AssertFileExists(const std::string &filename) {
Expand All @@ -33,20 +39,30 @@ void AssertFileExists(const std::string &filename) {
}

std::vector<char> ReadFile(const std::string &filename) {
std::ifstream file(filename, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
return {};
}
try {
#ifdef _WIN32
std::wstring wide_path = ToWideString(filename);
std::filesystem::path file_path(wide_path);
#else
std::filesystem::path file_path(filename);
#endif
std::ifstream file(file_path, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
return {};
}

std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);

std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(size);
if (!file.read(buffer.data(), size)) {
return {};
}

std::vector<char> buffer(size);
if (!file.read(buffer.data(), size)) {
return buffer;
} catch (const std::exception&) {
return {};
}

return buffer;
}

#if __ANDROID_API__ >= 9
Expand Down Expand Up @@ -119,33 +135,28 @@ std::string ResolveAbsolutePath(const std::string &path) {
return path;
}

try {
#ifdef _WIN32
// Check if path is already absolute (drive letter or UNC path)
if ((path.size() > 1 && path[1] == ':') ||
(path.size() > 1 && path[0] == '\\' && path[1] == '\\')) {
return path;
}

char buffer[MAX_PATH];
if (GetFullPathNameA(path.c_str(), MAX_PATH, buffer, nullptr)) {
return std::string(buffer);
}

return path; // fallback on failure

std::wstring wide_path = ToWideString(path);
std::filesystem::path fs_path(wide_path);
#else
// POSIX: absolute paths start with '/'
if (path[0] == '/') {
std::filesystem::path fs_path(path);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

std::filesystem::pathstd::string 构造时的编码行为是实现定义的。为了明确地将输入字符串 path 解释为 UTF-8 编码,从而避免在处理中文等非 ASCII 字符时出现潜在的编码问题,建议使用 std::filesystem::u8path()。这能让代码的意图更清晰,也更健壮。

Suggested change
std::filesystem::path fs_path(path);
std::filesystem::path fs_path = std::filesystem::u8path(path);

#endif

// If already absolute, return normalized path
if (fs_path.is_absolute()) {
return fs_path.lexically_normal().u8string();
}

// Convert to absolute path and normalize
std::filesystem::path abs_path = std::filesystem::absolute(fs_path);
abs_path = abs_path.lexically_normal();

return abs_path.u8string();
} catch (const std::exception&) {
// If conversion fails, return original path
return path;
}

char buffer[PATH_MAX];
if (realpath(path.c_str(), buffer)) {
return std::string(buffer);
}

return path; // fallback on failure
#endif
}

} // namespace sherpa_onnx