Skip to content

Commit 9442df4

Browse files
committed
init
0 parents  commit 9442df4

21 files changed

+445
-0
lines changed

.github/workflows/main.yml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Ruby
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
pull_request:
9+
10+
jobs:
11+
build:
12+
runs-on: ubuntu-latest
13+
name: Ruby ${{ matrix.ruby }}
14+
strategy:
15+
matrix:
16+
ruby:
17+
- '3.4.2'
18+
19+
steps:
20+
- uses: actions/checkout@v4
21+
- name: Set up Ruby
22+
uses: ruby/setup-ruby@v1
23+
with:
24+
ruby-version: ${{ matrix.ruby }}
25+
bundler-cache: true
26+
- name: Install mecab
27+
run: |
28+
sudo apt-get update
29+
sudo apt-get install libmecab-dev mecab-ipadic-utf8
30+
- name: Run the default task
31+
run: bundle exec rake

.gitignore

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/.bundle/
2+
/.vscode/
3+
/.yardoc
4+
/_yardoc/
5+
/coverage/
6+
/doc/
7+
/pkg/
8+
/spec/reports/
9+
/tmp/
10+
*.bundle
11+
*.so
12+
*.o
13+
*.a
14+
mkmf.log
15+
Gemfile.lock

.rubocop.yml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
plugins:
2+
- rubocop-rake
3+
- rubocop-minitest
4+
5+
AllCops:
6+
TargetRubyVersion: 3.1
7+
NewCops: enable
8+
9+
Style/StringLiterals:
10+
EnforcedStyle: double_quotes
11+
12+
Style/StringLiteralsInInterpolation:
13+
EnforcedStyle: double_quotes

Gemfile

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# frozen_string_literal: true
2+
3+
source "https://rubygems.org"
4+
5+
# Specify your gem's dependencies in mecab.gemspec
6+
gemspec
7+
8+
gem "rake", "~> 13.0"
9+
10+
gem "rake-compiler"
11+
12+
gem "minitest", "~> 5.16"
13+
14+
gem "rubocop", "~> 1.21"
15+
gem "rubocop-minitest"
16+
gem "rubocop-rake"

LICENSE.txt

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2025 Xinyu Wang
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Mecab.rb
2+
3+
Ruby binding for [MeCab](https://github.com/markburns/mecab).
4+
5+
It only supports segmentation feature for now.

Rakefile

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# frozen_string_literal: true
2+
3+
require "bundler/gem_tasks"
4+
require "minitest/test_task"
5+
6+
Minitest::TestTask.create
7+
8+
require "rubocop/rake_task"
9+
10+
RuboCop::RakeTask.new
11+
12+
require "rake/extensiontask"
13+
14+
desc "Build Ruby Gem"
15+
task build: :compile
16+
17+
GEMSPEC = Gem::Specification.load("mecab.gemspec")
18+
19+
Rake::ExtensionTask.new("mecab", GEMSPEC) do |ext|
20+
ext.lib_dir = "lib/mecab"
21+
end
22+
23+
task default: %i[clobber compile test rubocop]

bin/console

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
require "bundler/setup"
5+
require "mecab"
6+
7+
# You can add fixtures and/or initialization code here to make experimenting
8+
# with your gem easier. You can also use a different console, if you like.
9+
10+
require "irb"
11+
IRB.start(__FILE__)

bin/setup

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
IFS=$'\n\t'
4+
set -vx
5+
6+
bundle install
7+
8+
# Do any other automated setup that you need to do here

ext/mecab/extconf.rb

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# frozen_string_literal: true
2+
3+
require "mkmf"
4+
5+
begin
6+
find_executable("mecab-config") or abort "mecab-config not found"
7+
rescue SystemExit
8+
install_text = case RUBY_PLATFORM
9+
when /linux/
10+
"apt install libmecab-dev mecab-ipadic-utf8"
11+
when /darwin/
12+
"brew install mecab mecab-ipadic"
13+
else
14+
"We don't know how to install mecab on your platform"
15+
end
16+
17+
abort <<~MSG
18+
mecab is missing. You can install it by running:
19+
#{install_text}
20+
MSG
21+
end
22+
23+
mecab_config = with_config("mecab-config", "mecab-config")
24+
enable_config("mecab-config")
25+
26+
append_cflags("-fvisibility=hidden -std=c11 -O3 -g")
27+
append_cflags(`#{mecab_config} --cflags`.chomp)
28+
29+
append_cppflags("-fvisibility=hidden -std=c++11 -O3 -g")
30+
append_cppflags(`#{mecab_config} --cflags`.chomp)
31+
32+
append_ldflags(`#{mecab_config} --libs`.chomp)
33+
34+
includes = [RbConfig::CONFIG["includedir"]]
35+
includes += `#{mecab_config} --inc-dir`.chomp.split
36+
libs = [RbConfig::CONFIG["libdir"]]
37+
dir_config("mecab", includes, libs)
38+
39+
create_makefile("mecab/mecab")

ext/mecab/mecab.c

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include <ruby.h>
2+
3+
#include <mecab.h>
4+
5+
#include "segment.h"
6+
7+
RUBY_FUNC_EXPORTED void
8+
Init_mecab(void)
9+
{
10+
VALUE rb_mMecab = rb_define_module("Mecab");
11+
rb_define_const(rb_mMecab, "CPP_VERSION", rb_str_new2(mecab_version()));
12+
13+
init_segment(rb_mMecab);
14+
}

ext/mecab/segment.cc

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#include "segment.h"
2+
3+
#include <string>
4+
#include <vector>
5+
6+
#include <ruby/encoding.h>
7+
8+
#include <mecab.h>
9+
10+
namespace ext_mecab
11+
{
12+
// Only support UTF-8 encoding
13+
static rb_encoding *utf8_encoding;
14+
15+
struct SegmentWrapper
16+
{
17+
MeCab::Model *model;
18+
MeCab::Tagger *tagger;
19+
};
20+
21+
void segment_free(void *data)
22+
{
23+
SegmentWrapper *wrapper = (SegmentWrapper *)data;
24+
25+
if (wrapper->tagger)
26+
{
27+
delete wrapper->tagger;
28+
}
29+
30+
if (wrapper->model)
31+
{
32+
delete wrapper->model;
33+
}
34+
delete wrapper;
35+
}
36+
37+
size_t segment_size(const void *data)
38+
{
39+
return sizeof(SegmentWrapper);
40+
}
41+
42+
static const rb_data_type_t segment_data_type = {
43+
.wrap_struct_name = "MeCabSegment",
44+
.function = {
45+
.dmark = NULL,
46+
.dfree = segment_free,
47+
.dsize = segment_size,
48+
},
49+
};
50+
51+
VALUE segment_alloc(VALUE self)
52+
{
53+
SegmentWrapper *wrapper = new SegmentWrapper();
54+
return TypedData_Wrap_Struct(self, &segment_data_type, wrapper);
55+
}
56+
57+
void segment_initialize(VALUE self, VALUE model_argv)
58+
{
59+
SegmentWrapper *wrapper;
60+
TypedData_Get_Struct(self, SegmentWrapper, &segment_data_type, wrapper);
61+
62+
wrapper->model = MeCab::Model::create(StringValueCStr(model_argv));
63+
if (!wrapper->model)
64+
{
65+
rb_raise(rb_eRuntimeError, "Failed to create MeCab model: %s", MeCab::getLastError());
66+
}
67+
68+
wrapper->tagger = wrapper->model->createTagger();
69+
if (!wrapper->tagger)
70+
{
71+
rb_raise(rb_eRuntimeError, "Failed to create MeCab tagger: %s", MeCab::getLastError());
72+
}
73+
}
74+
75+
VALUE segment_cut(VALUE self, VALUE text_rb_str)
76+
{
77+
std::string text = StringValueCStr(text_rb_str);
78+
79+
SegmentWrapper *wrapper;
80+
TypedData_Get_Struct(self, SegmentWrapper, &segment_data_type, wrapper);
81+
82+
VALUE surface = rb_ary_new();
83+
const MeCab::Node *node = wrapper->tagger->parseToNode(text.c_str());
84+
for (; node; node = node->next)
85+
{
86+
if (node->stat == MECAB_BOS_NODE || node->stat == MECAB_EOS_NODE)
87+
{
88+
continue;
89+
}
90+
91+
rb_ary_push(surface, rb_enc_str_new(node->surface, node->length, utf8_encoding));
92+
}
93+
94+
return surface;
95+
}
96+
} // End of namespace ext_mecab
97+
98+
extern "C"
99+
{
100+
void init_segment(VALUE rb_mMecab)
101+
{
102+
// Initialize UTF-8 encoding
103+
ext_mecab::utf8_encoding = rb_utf8_encoding();
104+
105+
VALUE cSegment = rb_define_class_under(rb_mMecab, "Segment", rb_cObject);
106+
107+
rb_define_alloc_func(cSegment, ext_mecab::segment_alloc);
108+
rb_define_method(cSegment, "_initialize", RUBY_METHOD_FUNC(ext_mecab::segment_initialize), 1);
109+
rb_define_method(cSegment, "_cut", RUBY_METHOD_FUNC(ext_mecab::segment_cut), 1);
110+
}
111+
} // End of extern "C"

ext/mecab/segment.h

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#ifndef MECAB_SEGMENT_H
2+
#define MECAB_SEGMENT_H 1
3+
4+
#include <ruby.h>
5+
6+
#ifdef __cplusplus
7+
extern "C"
8+
{
9+
#endif
10+
11+
void init_segment(VALUE rb_mMeCab);
12+
13+
#ifdef __cplusplus
14+
}
15+
#endif
16+
17+
#endif

lib/mecab.rb

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# frozen_string_literal: true
2+
3+
require_relative "mecab/version"
4+
require_relative "mecab/mecab"
5+
require_relative "mecab/segment"
6+
7+
module Mecab
8+
class Error < StandardError; end
9+
# Your code goes here...
10+
end

lib/mecab/segment.rb

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# frozen_string_literal: true
2+
3+
module Mecab
4+
# Ruby wrapper for the native extension
5+
class Segment
6+
attr_reader :model_argv
7+
8+
# https://manpages.org/mecab for argument details
9+
# Example:
10+
# Mecab::Segment.new('-d /opt/homebrew/opt/mecab-ipadic/lib/mecab/dic/ipadic')
11+
def initialize(model_argv = "")
12+
@model_argv = model_argv
13+
14+
_initialize(model_argv)
15+
end
16+
17+
def cut(sentence)
18+
_cut(sentence)
19+
end
20+
end
21+
end

lib/mecab/version.rb

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# frozen_string_literal: true
2+
3+
module Mecab
4+
VERSION = "0.1.0"
5+
end

0 commit comments

Comments
 (0)