Skip to content

Fix checkStyle error; fix bugs #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/main/java/com/hankcs/dic/CoreStopWordDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.util.List;
import java.util.ListIterator;

/**
* Project: elasticsearch-analysis-hanlp
Expand Down Expand Up @@ -69,12 +68,12 @@ public static void apply(List<Term> termList) {
try {
dictionary = new StopWordDictionary(Config.CoreStopWordDictionaryPath);
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(IOUtil.newOutputStream(Config.CoreStopWordDictionaryPath + ".bin")));
new BufferedOutputStream(IOUtil.newOutputStream(Config.CoreStopWordDictionaryPath + ".bin")));
dictionary.save(out);
out.close();
} catch (Exception var2) {
Predefine.logger.severe(
"载入停用词词典" + Config.CoreStopWordDictionaryPath + "失败" + TextUtility.exceptionToString(var2));
"载入停用词词典" + Config.CoreStopWordDictionaryPath + "失败" + TextUtility.exceptionToString(var2));
throw new RuntimeException("载入停用词词典" + Config.CoreStopWordDictionaryPath + "失败");
}
} else {
Expand Down
21 changes: 14 additions & 7 deletions src/main/java/com/hankcs/dic/ExtMonitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Properties;
Expand All @@ -37,7 +38,7 @@ public class ExtMonitor implements Runnable {
}

@Override
@SuppressWarnings("unchecked")
@SuppressWarnings({"unchecked", "rawtypes"})
public void run() {
List<DictionaryFile> originalDictionaryFileList = DictionaryFileCache.getCustomDictionaryFileList();
logger.debug("hanlp original custom dictionary: {}", Arrays.toString(originalDictionaryFileList.toArray()));
Expand Down Expand Up @@ -66,20 +67,25 @@ public void run() {
}
}

@SuppressWarnings("unchecked")
@SuppressWarnings({"unchecked", "rawtypes"})
private void reloadProperty() {
Properties p = new Properties();
try {
ClassLoader loader = AccessController.doPrivileged((PrivilegedAction<ClassLoader>) () -> Thread.currentThread().getContextClassLoader());
ClassLoader loader = AccessController.doPrivileged(
(PrivilegedAction<ClassLoader>) () -> Thread.currentThread().getContextClassLoader());
if (loader == null) {
loader = HanLP.Config.class.getClassLoader();
}
p.load(new InputStreamReader(Predefine.HANLP_PROPERTIES_PATH == null ? Objects.requireNonNull(loader.getResourceAsStream("hanlp.properties")) : new FileInputStream(Predefine.HANLP_PROPERTIES_PATH), "UTF-8"));
p.load(new InputStreamReader(Predefine.HANLP_PROPERTIES_PATH == null ?
Objects.requireNonNull(loader.getResourceAsStream("hanlp.properties"))
: new FileInputStream(Predefine.HANLP_PROPERTIES_PATH), StandardCharsets.UTF_8));
String root = p.getProperty("root", "").replaceAll("\\\\", "/");
if (root.length() > 0 && !root.endsWith("/")) {
root += "/";
}
String[] pathArray = p.getProperty("CustomDictionaryPath", "data/dictionary/custom/CustomDictionary.txt").split(";");
String[] pathArray = p.getProperty("CustomDictionaryPath",
"data/dictionary/custom/CustomDictionary.txt")
.split(";");
String prePath = root;
for (int i = 0; i < pathArray.length; ++i) {
if (pathArray[i].startsWith(" ")) {
Expand Down Expand Up @@ -112,7 +118,8 @@ private List<DictionaryFile> getCurrentDictionaryFileList(String[] customDiction
if (customDictionaryPathTuple[1] == null || customDictionaryPathTuple[1].length() == 0) {
dictionaryFileList.add(new DictionaryFile(path, file.lastModified()));
} else {
dictionaryFileList.add(new DictionaryFile(path, customDictionaryPathTuple[1].trim(), file.lastModified()));
dictionaryFileList.add(
new DictionaryFile(path, customDictionaryPathTuple[1].trim(), file.lastModified()));
}
} else {
dictionaryFileList.add(new DictionaryFile(path, file.lastModified()));
Expand Down
142 changes: 68 additions & 74 deletions src/main/java/com/hankcs/dic/RemoteMonitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.utility.LexiconUtility;
import com.hankcs.help.ESPluginLoggerFactory;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpStatus;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
Expand Down Expand Up @@ -38,11 +39,11 @@ public class RemoteMonitor implements Runnable {

private static final Logger logger = ESPluginLoggerFactory.getLogger(RemoteMonitor.class.getName());

private static CloseableHttpClient httpclient = HttpClients.createDefault();
private static final CloseableHttpClient httpclient = HttpClients.createDefault();
/**
* 上次更改时间
*/
private String last_modified;
private String lastModified;
/**
* 资源属性
*/
Expand All @@ -61,8 +62,8 @@ public class RemoteMonitor implements Runnable {
public RemoteMonitor(String location, String type) {
this.location = location;
this.type = type;
this.last_modified = null;
this.eTags = null;
this.lastModified = "";
this.eTags = "";
}

@Override
Expand Down Expand Up @@ -90,38 +91,34 @@ private void runUnprivileged() {
head.setConfig(buildRequestConfig());

// 设置请求头
if (last_modified != null) {
head.setHeader("If-Modified-Since", last_modified);
if (!lastModified.isEmpty()) {
head.setHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
}
if (eTags != null) {
head.setHeader("If-None-Match", eTags);
if (!eTags.isEmpty()) {
head.setHeader(HttpHeaders.IF_NONE_MATCH, eTags);
}

CloseableHttpResponse response = null;
try {
response = httpclient.execute(head);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
if ((response.getLastHeader("Last-Modified") != null) && !response.getLastHeader("Last-Modified").getValue().equalsIgnoreCase(last_modified)) {
try (
CloseableHttpResponse response = httpclient.execute(head)
) {
final int statusCode = response.getStatusLine().getStatusCode();
if (statusCode == HttpStatus.SC_OK) {
if ((response.getLastHeader(HttpHeaders.LAST_MODIFIED) != null)
&& !lastModified.equalsIgnoreCase(response.getLastHeader(HttpHeaders.LAST_MODIFIED).getValue())) {
loadRemoteCustomWords(response);
} else if ((response.getLastHeader("ETag") != null) && !response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) {
} else if ((response.getLastHeader(HttpHeaders.ETAG) != null)
&& !eTags.equalsIgnoreCase(response.getLastHeader(HttpHeaders.ETAG).getValue())) {
loadRemoteCustomWords(response);
}
} else if (response.getStatusLine().getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
} else if (statusCode == HttpStatus.SC_NOT_MODIFIED) {
logger.info("remote_ext_dict {} is without modified", location);
} else {
logger.info("remote_ext_dict {} return bad code {}", location, response.getStatusLine().getStatusCode());
logger.info("remote_ext_dict {} return bad code {}",
location, response.getStatusLine().getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
logger.error("remote_ext_dict {} error!", e, location);
} finally {
try {
if (response != null) {
response.close();
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
logger.error("remote_ext_dict {} error: {}", location, e.getMessage());
}
}

Expand All @@ -145,8 +142,9 @@ private void loadRemoteCustomWords(CloseableHttpResponse response) {
default:
return;
}
last_modified = response.getLastHeader("Last-Modified") == null ? null : response.getLastHeader("Last-Modified").getValue();
eTags = response.getLastHeader("ETag") == null ? null : response.getLastHeader("ETag").getValue();
lastModified = response.getLastHeader(HttpHeaders.LAST_MODIFIED) == null ?
"" : response.getLastHeader(HttpHeaders.LAST_MODIFIED).getValue();
eTags = response.getLastHeader(HttpHeaders.ETAG) == null ? "" : response.getLastHeader(HttpHeaders.ETAG).getValue();
}

/**
Expand All @@ -163,44 +161,42 @@ private void loadRemoteWordsUnprivileged(String location) {
get.setConfig(buildRequestConfig());
try {
response = httpclient.execute(get);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), analysisDefaultCharset(response)));
String line;
boolean firstLine = true;
while ((line = in.readLine()) != null) {
if (firstLine) {
line = IOUtil.removeUTF8BOM(line);
firstLine = false;
}
if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
return;
}

in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), analysisDefaultCharset(response)));
String line;
boolean firstLine = true;
while ((line = in.readLine()) != null) {
if (firstLine) {
line = IOUtil.removeUTF8BOM(line);
firstLine = false;
}

// 切分
String[] param = line.split(SPLITTER);
String word = param[0];
// 切分
String[] param = line.split(SPLITTER);
String word = param[0];

// 排除空行
if (word.length() == 0) {
continue;
}
// 排除空行
if (word.length() == 0) {
continue;
}

// 正规化
if (HanLP.Config.Normalization) {
word = CharTable.convert(word);
}
logger.debug("hanlp remote custom word: {}", word);
CustomDictionary.insert(word, analysisNatureWithFrequency(defaultInfo.v2(), param));
// 正规化
if (HanLP.Config.Normalization) {
word = CharTable.convert(word);
}
in.close();
response.close();
logger.debug("hanlp remote custom word: {}", word);
CustomDictionary.insert(word, analysisNatureWithFrequency(defaultInfo.v2(), param));
}
response.close();
} catch (IllegalStateException | IOException e) {
logger.error("get remote words {} error", e, location);
logger.error("get remote words {} error: {}", location, e.getMessage());
} finally {
try {
IOUtils.close(in);
IOUtils.close(response);
IOUtils.close(in, response);
} catch (Exception e) {
e.printStackTrace();
logger.error("Closing remote words resource error.", e);
}
}
}
Expand All @@ -218,30 +214,28 @@ private void loadRemoteStopWordsUnprivileged(String location) {
get.setConfig(buildRequestConfig());
try {
response = httpclient.execute(get);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), analysisDefaultCharset(response)));
String line;
boolean firstLine = true;
while ((line = in.readLine()) != null) {
if (firstLine) {
line = IOUtil.removeUTF8BOM(line);
firstLine = false;
}
logger.debug("hanlp remote stop word: {}", line);
CoreStopWordDictionary.add(line);
if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
return;
}

in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), analysisDefaultCharset(response)));
String line;
boolean firstLine = true;
while ((line = in.readLine()) != null) {
if (firstLine) {
line = IOUtil.removeUTF8BOM(line);
firstLine = false;
}
in.close();
response.close();
logger.debug("hanlp remote stop word: {}", line);
CoreStopWordDictionary.add(line);
}
response.close();
} catch (IllegalStateException | IOException e) {
logger.error("get remote words {} error", e, location);
logger.error("get remote words {} error: {}", location, e.getMessage());
} finally {
try {
IOUtils.close(in);
IOUtils.close(response);
IOUtils.close(in, response);
} catch (Exception e) {
e.printStackTrace();
logger.error("Closing remote stop words resource error.", e);
}
}
}
Expand Down
61 changes: 36 additions & 25 deletions src/main/java/com/hankcs/dic/cache/DictionaryFileCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.plugin.analysis.hanlp.AnalysisHanLPPlugin;

import java.io.*;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.DataOutputStream;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.security.AccessController;
import java.security.PrivilegedAction;
Expand All @@ -32,44 +37,50 @@ public class DictionaryFileCache {
private static List<DictionaryFile> customDictionaryFileList = new ArrayList<>();

public static synchronized void configCachePath(Configuration configuration) {
cachePath = configuration.getEnvironment().pluginsFile().resolve(AnalysisHanLPPlugin.PLUGIN_NAME).resolve(DICTIONARY_FILE_CACHE_RECORD_FILE);
cachePath = configuration
.getEnvironment()
.pluginsFile()
.resolve(AnalysisHanLPPlugin.PLUGIN_NAME)
.resolve(DICTIONARY_FILE_CACHE_RECORD_FILE);
}

public static void loadCache() {
File file = cachePath.toFile();
if (!file.exists()) {
return;
}
List<DictionaryFile> dictionaryFiles = AccessController.doPrivileged((PrivilegedAction<List<DictionaryFile>>) () -> {
List<DictionaryFile> dictionaryFileList = new ArrayList<>();
DataInputStream in = null;
try {
in = new DataInputStream(new FileInputStream(file));
int size = in.readInt();
for (int i = 0; i < size; i++) {
DictionaryFile dictionaryFile = new DictionaryFile();
dictionaryFile.read(in);
dictionaryFileList.add(dictionaryFile);
}
} catch (IOException e) {
logger.debug("can not load custom dictionary cache file", e);
} finally {
try {
IOUtils.close(in);
} catch (IOException e) {
e.printStackTrace();
}
}
return dictionaryFileList;
});
List<DictionaryFile> dictionaryFiles =
AccessController.doPrivileged((PrivilegedAction<List<DictionaryFile>>) () -> {
List<DictionaryFile> dictionaryFileList = new ArrayList<>();
DataInputStream in = null;
try {
in = new DataInputStream(new FileInputStream(file));
int size = in.readInt();
for (int i = 0; i < size; i++) {
DictionaryFile dictionaryFile = new DictionaryFile();
dictionaryFile.read(in);
dictionaryFileList.add(dictionaryFile);
}
} catch (IOException e) {
logger.debug("can not load custom dictionary cache file", e);
} finally {
try {
IOUtils.close(in);
} catch (IOException e) {
e.printStackTrace();
}
}
return dictionaryFileList;
});
setCustomDictionaryFileList(dictionaryFiles);
}

public static void writeCache() {
AccessController.doPrivileged((PrivilegedAction<Object>) () -> {
DataOutputStream out = null;
try {
logger.info("begin write down hanlp custom dictionary file cache, file path: {}, custom dictionary file list: {}", cachePath.toFile().getAbsolutePath(), Arrays.toString(customDictionaryFileList.toArray()));
logger.info("begin write down hanlp custom dictionary file cache, file path: {}, custom dictionary file list: {}",
cachePath.toFile().getAbsolutePath(), Arrays.toString(customDictionaryFileList.toArray()));
out = new DataOutputStream(new FileOutputStream(cachePath.toFile()));
out.writeInt(customDictionaryFileList.size());
for (DictionaryFile dictionaryFile : customDictionaryFileList) {
Expand Down
Loading