Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion .github/workflows/clucene-ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:

run_clucene_ut_macos:
name: CLucene UT (MacOS)
runs-on: macos-12
runs-on: macos-15
steps:
- name: "Checkout ${{ github.event.pull_request.number }} ${{ github.event.pull_request.head.sha }}"
uses: actions/checkout@v4
Expand Down Expand Up @@ -108,9 +108,36 @@ jobs:
'maven' \
'node' \
'llvm@16'

- name: "Ensure Correct Xcode"
run: sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer

- name: "Set SDKROOT"
run: echo "SDKROOT=$(xcrun --show-sdk-path)" >> $GITHUB_ENV

- name: "Set Compiler"
run: |
echo "CC=$(xcrun --find clang)" >> $GITHUB_ENV
echo "CXX=$(xcrun --find clang++)" >> $GITHUB_ENV

- name: "Set Compilation Flags"
run: |
echo "CFLAGS=-isysroot $(xcrun --show-sdk-path)" >> $GITHUB_ENV
echo "CXXFLAGS=-isysroot $(xcrun --show-sdk-path)" >> $GITHUB_ENV

- name: "Use Homebrew LLVM"
run: |
echo "CC=$(brew --prefix llvm@16)/bin/clang" >> $GITHUB_ENV
echo "CXX=$(brew --prefix llvm@16)/bin/clang++" >> $GITHUB_ENV

- name: "Run"
run: |
set -x
export SDKROOT=$(xcrun --show-sdk-path)
export CC=$(xcrun --find clang)
export CXX=$(xcrun --find clang++)
export CFLAGS="-isysroot $(xcrun --show-sdk-path)"
export CXXFLAGS="-isysroot $(xcrun --show-sdk-path)"
mkdir build && cd build
cmake ../
make cl_test
Expand Down
1 change: 1 addition & 0 deletions src/contribs-lib/CLucene/analysis/jieba/FullSegment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class FullSegment: public SegmentBase {

void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
stopWordList_.reserve(1000);
if (ifs.is_open()) {
string line;
while (getline(ifs, line)) {
Expand Down
1 change: 1 addition & 0 deletions src/contribs-lib/CLucene/analysis/jieba/MixSegment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ class MixSegment: public SegmentTagged {

void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
stopWordList_.reserve(1000);
if (ifs.is_open()) {
string line;
while (getline(ifs, line)) {
Expand Down
1 change: 1 addition & 0 deletions src/contribs-lib/CLucene/analysis/jieba/StringUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ namespace limonp {

inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
res.clear();
res.reserve(src.size());
size_t Start = 0;
size_t end = 0;
string sub;
Expand Down
1 change: 1 addition & 0 deletions src/core/CLucene/index/SDocumentWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,7 @@ int32_t SDocumentsWriter<T>::flush(bool _closeDocStore) {
}

newFiles.clear();
newFiles.reserve(10);

docStoreOffset = numDocsInStore;

Expand Down
11 changes: 9 additions & 2 deletions src/core/CLucene/index/SDocumentWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -733,10 +733,17 @@ class SDocumentsWriter : public IDocumentsWriter {


std::string segmentFileName(const std::string &extension) {
return segment + "." + extension;
std::string result;
result.reserve(segment.size() + extension.size() + 1);
result = segment;
result += ".";
result += extension;
return result;
}
std::string segmentFileName(const char *extension) {
return segmentFileName(string(extension));
if (!extension) return segment;
std::string ext(extension);
return segmentFileName(ext);
}
int32_t getMaxBufferedDocs() override {
return maxBufferedDocs;
Expand Down
4 changes: 2 additions & 2 deletions src/core/CLucene/index/SegmentInfos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ string SegmentInfo::segString(Directory* dir) {
// Already cached:
return _files;
}

_files.reserve(10);
bool useCompoundFile = getUseCompoundFile();

if (useCompoundFile) {
Expand Down Expand Up @@ -912,7 +912,7 @@ string SegmentInfo::segString(Directory* dir) {
// contents (NOTE: NFS clients often have such stale
// caching):
vector<string> files;

files.reserve(10);
int64_t genA = -1;

if (directory != NULL){
Expand Down
4 changes: 2 additions & 2 deletions src/test/analysis/TestStandard95.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ std::vector<std::string> datas = {
"{\"qid\": \"qid_8325162146787472205\", \"category\": \"娱乐-博彩\", \"title\": \"第一次发图。。。(图2)请高手进来批一下。。第一次发图。拍摄不好 \", \"desc\": \"第一次发图。不好虽见笑```谢谢进来帮批示的朋友。。:)))\\r\", \"answer\": \"6 12场感觉挺悬的...祝好\"}"
};

std::vector<std::string> tokens = {
std::vector<std::string> standard95_tokens = {
"qid|qid_1815059893214501395|category|烦|恼|恋|爱|title|请|问|深|入|骨|髓|地|喜|欢|一|个|人|怎|么|办|我|不|能|确|定|对|方|是|不|是|喜|欢|我|我|却|想|desc|我|不|能|确|定|对|方|是|不|是|喜|欢|我|我|却|想|分|分|秒|秒|跟|他|在|一|起|有|谁|能|告|诉|我|如|何|能|想|他|少|一|点|answer|一|定|要|告|诉|他|你|很|喜|欢|他|很|爱|他|虽|然|不|知|道|你|和|他|现|在|的|关|系|是|什|么|但|如|果|真|的|觉|得|很|喜|欢|就|向|他|表|白|啊|起|码|你|努|力|过|了|女|生|主|动|多|少|占|一|点|优|势|的|呵|呵|只|愿|曾|经|拥|有|到|以|后|就|算|感|情|没|现|在|这|么|强|烈|了|也|不|会|觉|得|遗|憾|啊|与|其|每|天|那|么|痛|苦|的|想|他|恋|他|还|不|如|直|接|告|诉|他|不|要|怕|回|破|坏|你|们|现|有|的|感|情|因|为|如|果|不|告|诉|他|你|可|能|回|后|悔|一|辈|子|",
"qid|qid_2063849676113062517|category|游|戏|完|美|游|戏|诛|仙|title|我|登|陆|诛|仙|2|时|总|说|我|账|号|密|码|错|误|但|是|我|打|的|是|正|确|的|就|算|不|对|我|desc|answer|被|盗|号|了|我|的|号|在|22|号|那|天|被|盗|了|跟|你|一|样|情|况|link|密|码|与|账|号|错|误|我|密|保|都|有|了|呐|邮|箱|换|密|码|也|不|行|还|被|删|了|号|伤|心|兼|郁|闷|呵|呵|盗|号|了|建|议|跟|完|美|申|请|把|号|要|回|来|或|者|玩|新|的|号|",
"qid|qid_6625582808814915192|category|游|戏|网|络|游|戏|title|斩|魔|仙|者|称|号|怎|么|得|来|的|desc|斩|魔|仙|者|称|号|怎|么|得|来|的|answer|楼|主|您|好|以|下|为|转|载|r|r|圣|诞|前|热|身|来|生|肖|传|说|做|斩|魔|仙|者|r|r|一|年|一|度|的|圣|诞|节|快|要|来|临|了|大|街|小|巷|商|户|们|都|在|忙|着|准|备|12|月|25|日|圣|诞|的|来|临|而|这|时|候|一|些|妖|魔|也|正|蠢|蠢|欲|动|准|备|作|乱|作|为|生|肖|世|界|肩|负|维|护|世|界|和|平|拯|救|全|人|类|的|生|肖|使|者|怎|么|能|不|有|所|行|动|为|了|生|肖|世|界|的|安|定|而|做|防|范|准|备|r|r|要|让|妖|魔|鬼|怪|能|对|你|有|所|心|悸|除|了|自|己|本|身|武|艺|要|高|强|最|好|能|在|妖|魔|界|打|出|知|名|度|这|样|当|你|的|亲|朋|好|友|被|妖|魔|袭|击|时|只|要|爆|出|你|的|名|号|这|些|妖|魔|上|就|会|落|荒|而|逃|岂|不|好|哉|那|么|斩|魔|仙|者|这|个|响|亮|的|称|号|应|该|足|够|能|震|慑|住|妖|魔|让|他|们|铭|记|在|心|了|吧|r|r|斩|魔|仙|者|的|称|号|r|r|而|且|这|个|斩|魔|仙|者|的|称|号|并|不|是|人|人|都|能|得|到|的|只|有|成|功|挑|战|70|级|副|本|中|的|隐|藏|boss|羽|翼|仙|的|人|才|能|获|得|此|称|号|并|且|前|提|条|件|是|在|12|月|18|日|12|月|25|日|之|间|第|一|队|成|功|挑|战|羽|翼|仙|的|人|才|能|获|此|称|号|因|此|此|称|号|在|全|服|范|围|内|是|绝|对|不|可|能|超|过|5|个|的|r|r|要|挑|战|羽|翼|仙|可|不|是|一|件|容|易|的|事|首|先|要|在|70|级|副|本|中|打|败|4|个|强|大|的|boss|在|打|完|副|本|的|第|4|个|boss|有|一|定|几|率|获|得|道|具|羽|翼|真|元|有|了|羽|翼|真|元|后|就|可|以|与|羽|翼|仙|进|行|一|场|战|斗|羽|翼|仙|就|站|在|第|4|个|boss|的|旁|边|只|是|没|有|道|具|是|不|能|进|入|战|斗|的|r|r|羽|翼|仙|r|r|在|12|月|18|日|12|月|25|日|活|动|期|间|成|功|挑|战|羽|翼|仙|后|的|第|一|支|队|伍|就|可|以|获|得|兑|换|斩|魔|仙|者|的|道|具|烈|火|珍|珠|旗|当|然|如|果|你|在|这|场|激|烈|的|战|斗|中|不|幸|捐|躯|那|么|当|然|是|不|会|得|到|这|个|道|具|的|得|到|了|这|把|烈|火|珍|珠|旗|的|玩|家|就|可|以|到|npc|燃|烧|使|处|兑|换|称|号|了|r|r|这|样|兼|具|高|强|能|力|和|超|强|人|品|才|能|获|得|的|称|号|怎|么|能|不|人|望|而|生|畏|怎|么|能|不|让|那|些|妖|魔|胆|怯|想|要|获|得|的|玩|家|就|快|快|行|动|莫|要|让|人|先|抢|了|这|全|服|唯|一|的|斩|魔|仙|者|称|号|r|r|如|果|满|意|请|采|纳|r|谢|谢|",
Expand Down Expand Up @@ -671,7 +671,7 @@ static void testCompLucene95(CuTest *tc) {
std::vector<std::string> new_tokens;
testCutLines(datas, new_tokens);

CLUCENE_ASSERT((tokens == new_tokens));
CLUCENE_ASSERT((standard95_tokens == new_tokens));
}

CuSuite *teststandard95(void) {
Expand Down
Loading