Skip to content

Commit 46cf627

Browse files
committed
Add support for spell checking roxygen comments
`roxygen2::parse_file()` parses the roxygen comments in each file. Text from relevant tags is then searched for spelling errors with `hunspell::hunspell()` to find misspelled words. Because roxygen does not store the original positions of parsed tags we then need to find the misspelled word locations in the original roxygen comment lines of the source. This is done by `find_word_positions()`.
1 parent 7f5e3f6 commit 46cf627

11 files changed

+151
-7
lines changed

DESCRIPTION

+11-5
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,16 @@ Encoding: UTF-8
1414
LazyData: true
1515
URL: https://github.com/ropensci/spelling#readme
1616
BugReports: https://github.com/ropensci/spelling/issues
17-
Imports:
18-
commonmark,
19-
xml2,
20-
hunspell,
21-
knitr
17+
Imports:
18+
commonmark,
19+
xml2,
20+
hunspell,
21+
knitr,
22+
roxygen2,
23+
Rcpp
2224
Roxygen: list(markdown = TRUE)
2325
RoxygenNote: 6.0.1
26+
LinkingTo:
27+
Rcpp
28+
Remotes:
29+
klutometis/roxygen

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ export(spell_check_setup)
88
export(spell_check_test)
99
export(spell_check_text)
1010
export(update_wordlist)
11+
importFrom(Rcpp,sourceCpp)
12+
useDynLib(spelling, .registration = TRUE)

R/RcppExports.R

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
find_word_positions <- function(lines, words) {
5+
.Call(`_spelling_find_word_positions`, lines, words)
6+
}
7+

R/check-files.R

+51
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,54 @@ spell_check_file_plain <- function(path, format, dict){
8989
text <- vapply(words, paste, character(1), collapse = " ")
9090
spell_check_plain(text, dict = dict)
9191
}
92+
93+
#' @useDynLib spelling, .registration = TRUE
94+
#' @importFrom Rcpp sourceCpp
95+
spell_check_file_roxygen <- function(path, dict, global_options = list()) {
96+
97+
parsed <- roxygen2::parse_file(file = path, global_options = global_options)
98+
99+
lines <- readLines(path)
100+
is_roxygen <- grep("^[[:space:]]*#+'", lines)
101+
roxygen_lines <- lines[is_roxygen]
102+
103+
# Some roxygen tags (such as param) have a name and a description, we only
104+
# want to spell check the latter.
105+
extract_text <- function(x) {
106+
if (is.list(x) && exists("description", x)) {
107+
return(x[["description"]])
108+
}
109+
x
110+
}
111+
112+
# roxygen tags that contain text
113+
text_tags <- c("concept", "describeIn", "description", "details", "field", "note", "param", "return", "section", "slot", "title")
114+
parse_block <- function(tags) {
115+
text <- unlist(lapply(tags[names(tags) %in% text_tags], extract_text))
116+
if (length(text) == 0) {
117+
return(data.frame(word = character(), line = integer(), start = integer(), stringsAsFactors = FALSE))
118+
}
119+
120+
# blank out rd tags, tag list derived from RdTextFilter
121+
# https://github.com/wch/r-source/blob/89ec1150299f7be62b839d5d5eb46bd9a63653bd/src/library/tools/R/Rdtools.R#L113-L126
122+
rd_tags <- c("S3method", "S4method", "command", "code", "docType", "email", "encoding", "file", "keyword", "link", "linkS4class", "method", "pkg", "var")
123+
re <- paste0("\\\\(", paste0(collapse = "|", rd_tags), ")[^}]+}")
124+
text <- blank_matches(text, re)
125+
bad_words <- hunspell::hunspell(text, dict = dict)
126+
res <- find_word_positions(roxygen_lines, unique(sort(unlist(bad_words))))
127+
128+
# Fix line numbers for real file.
129+
res$line <- is_roxygen[res$line]
130+
131+
vapply(split(res$line, res$word), paste, character(1), collapse = ", ")
132+
}
133+
134+
unlist(lapply(parsed, parse_block))
135+
}
136+
137+
blank_matches <- function(str, re) {
138+
m <- gregexpr(re, str)
139+
blanks <- function(n) strrep(" ", n)
140+
regmatches(str, m) <- Map(blanks, lapply(regmatches(str, m), nchar))
141+
str
142+
}

R/spell-check.R

+6-2
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,19 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, lang = "en_GB", use
4040
rd_files <- list.files(file.path(pkg$path, "man"), "\\.rd$", ignore.case = TRUE, full.names = TRUE)
4141
rd_lines <- lapply(sort(rd_files), spell_check_file_rd, dict = dict)
4242

43+
# Check Roxygen comments
44+
r_files <- list.files(file.path(pkg$path, "R"), "\\.R$", ignore.case = TRUE, full.names = TRUE)
45+
r_lines <- lapply(sort(r_files), spell_check_file_roxygen, dict = dict, global_options = roxygen2::load_options(pkg$path))
46+
4347
# Check 'DESCRIPTION' fields
4448
pkg_fields <- c("title", "description")
4549
pkg_lines <- lapply(pkg_fields, function(x){
4650
spell_check_file_text(textConnection(pkg[[x]]), dict = dict)
4751
})
4852

4953
# Combine
50-
all_sources <- c(rd_files, pkg_fields)
51-
all_lines <- c(rd_lines, pkg_lines)
54+
all_sources <- c(r_files, rd_files, pkg_fields)
55+
all_lines <- c(r_lines, rd_lines, pkg_lines)
5256

5357
if(isTRUE(vignettes)){
5458
# Markdown vignettes

man/spell_check_files.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/spell_check_package.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/wordlist.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.o
2+
*.so
3+
*.dll

src/RcppExports.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
#include <Rcpp.h>
5+
6+
using namespace Rcpp;
7+
8+
// find_word_positions
9+
Rcpp::DataFrame find_word_positions(CharacterVector lines, CharacterVector words);
10+
RcppExport SEXP _spelling_find_word_positions(SEXP linesSEXP, SEXP wordsSEXP) {
11+
BEGIN_RCPP
12+
Rcpp::RObject rcpp_result_gen;
13+
Rcpp::RNGScope rcpp_rngScope_gen;
14+
Rcpp::traits::input_parameter< CharacterVector >::type lines(linesSEXP);
15+
Rcpp::traits::input_parameter< CharacterVector >::type words(wordsSEXP);
16+
rcpp_result_gen = Rcpp::wrap(find_word_positions(lines, words));
17+
return rcpp_result_gen;
18+
END_RCPP
19+
}
20+
21+
static const R_CallMethodDef CallEntries[] = {
22+
{"_spelling_find_word_positions", (DL_FUNC) &_spelling_find_word_positions, 2},
23+
{NULL, NULL, 0}
24+
};
25+
26+
RcppExport void R_init_spelling(DllInfo *dll) {
27+
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
28+
R_useDynamicSymbols(dll, FALSE);
29+
}

src/find_word_positions.cpp

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#include <Rcpp.h>
2+
#include <cstring>
3+
using namespace Rcpp;
4+
5+
// [[Rcpp::export]]
6+
Rcpp::DataFrame find_word_positions(CharacterVector lines,
7+
CharacterVector words) {
8+
std::vector<const char*> found_words;
9+
std::vector<int> found_lines;
10+
std::vector<int> found_starts;
11+
12+
for (int i = 0; i < words.size(); ++i) {
13+
const char* word = words.at(i);
14+
size_t len = strlen(word);
15+
bool found = false;
16+
for (int j = 0; j < lines.size(); ++j) {
17+
const char* line = lines.at(j);
18+
for (const char* p = line; (p = strstr(p, word)) != NULL; ++p) {
19+
if ((p == line) || (p != NULL && !isalnum(p[-1]))) {
20+
if (!isalnum(p[len])) {
21+
found = true;
22+
found_words.push_back(word);
23+
found_lines.push_back(j + 1);
24+
found_starts.push_back((int)(p - lines.at(j)) + 1);
25+
}
26+
p += len;
27+
}
28+
}
29+
}
30+
if (!found) {
31+
found_words.push_back(word);
32+
found_lines.push_back(NA_INTEGER);
33+
found_starts.push_back(NA_INTEGER);
34+
}
35+
}
36+
return DataFrame::create(_["word"] = found_words, _["line"] = found_lines,
37+
_["start"] = found_starts,
38+
Rcpp::_["stringsAsFactors"] = false);
39+
}

0 commit comments

Comments
 (0)