From 49f9a1f4f46c959fd17e89b53f74bd080d33aa6d Mon Sep 17 00:00:00 2001 From: xiaoheizai <894944689@qq.com> Date: Thu, 29 Oct 2020 16:48:18 +0800 Subject: [PATCH 1/3] Update CRFSegmenter.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加流的读取方式 --- src/main/java/com/hankcs/hanlp/model/crf/CRFSegmenter.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/com/hankcs/hanlp/model/crf/CRFSegmenter.java b/src/main/java/com/hankcs/hanlp/model/crf/CRFSegmenter.java index 353ac0c56..e53a3d82d 100644 --- a/src/main/java/com/hankcs/hanlp/model/crf/CRFSegmenter.java +++ b/src/main/java/com/hankcs/hanlp/model/crf/CRFSegmenter.java @@ -23,6 +23,7 @@ import java.io.BufferedWriter; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; @@ -50,6 +51,11 @@ public CRFSegmenter(String modelPath) throws IOException perceptronSegmenter = new PerceptronSegmenter(this.model); } } + + public CRFSegmenter(InputStream inputStream) throws IOException { + super(inputStream); + perceptronSegmenter = new PerceptronSegmenter(this.model); + } @Override protected void convertCorpus(Sentence sentence, BufferedWriter bw) throws IOException From 3147628d76c4ccdc1f06ae57052971c942175097 Mon Sep 17 00:00:00 2001 From: xiaoheizai <894944689@qq.com> Date: Thu, 29 Oct 2020 16:50:38 +0800 Subject: [PATCH 2/3] Update CRFTagger.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加流的读取方式 --- src/main/java/com/hankcs/hanlp/model/crf/CRFTagger.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/com/hankcs/hanlp/model/crf/CRFTagger.java b/src/main/java/com/hankcs/hanlp/model/crf/CRFTagger.java index a5506caf7..63483f3db 100644 --- a/src/main/java/com/hankcs/hanlp/model/crf/CRFTagger.java +++ b/src/main/java/com/hankcs/hanlp/model/crf/CRFTagger.java @@ -20,6 +20,7 @@ import com.hankcs.hanlp.model.perceptron.utility.Utility; import java.io.BufferedWriter; +import java.io.InputStream; import java.io.File; import java.io.IOException; import java.util.Date; @@ -40,6 +41,10 @@ public CRFTagger(String modelPath) throws IOException if (modelPath == null) return; // 训练模式 model = new LogLinearModel(modelPath); } + + public CRFTagger(InputStream is) throws IOException { + model = new LogLinearModel(is); + } /** * 训练 From b8f348424da4c55302c0c420a9b1cc3ea8e280a5 Mon Sep 17 00:00:00 2001 From: xiaoheizai <894944689@qq.com> Date: Thu, 29 Oct 2020 16:53:48 +0800 Subject: [PATCH 3/3] Update LogLinearModel.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加流的读取方式 --- .../java/com/hankcs/hanlp/model/crf/LogLinearModel.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/com/hankcs/hanlp/model/crf/LogLinearModel.java b/src/main/java/com/hankcs/hanlp/model/crf/LogLinearModel.java index bcca2ce7e..21d3fdeda 100644 --- a/src/main/java/com/hankcs/hanlp/model/crf/LogLinearModel.java +++ b/src/main/java/com/hankcs/hanlp/model/crf/LogLinearModel.java @@ -24,6 +24,7 @@ import com.hankcs.hanlp.utility.Predefine; import java.io.DataOutputStream; +import java.io.InputStream; import java.io.IOException; import java.util.*; @@ -113,6 +114,13 @@ public LogLinearModel(String txtFile, String binFile) throws IOException super(null, null); convert(txtFile, binFile); } + + public LogLinearModel(InputStream is) throws IOException + { + super(null, null); + ByteArrayStream byteArray = ByteArrayOtherStream.createByteArrayOtherStream(inputStream); + load(byteArray); + } private void convert(String txtFile, String binFile) throws IOException {