@@ -138,10 +138,15 @@ compare_documents <- function(sample1,
138138# '
139139# ' Compare the writer profiles from two handwritten documents to predict whether
140140# ' they were written by the same person. Use either a similarity score or a
141- # ' score-based likelihood ratio as a comparison method.
141+ # ' score-based likelihood ratio (SLR) as a comparison method.
142142# '
143143# ' @param writer_profiles A dataframe of writer profiles or cluster fill rates
144144# ' calculated with [get_cluster_fill_rates]
145+ # ' @param writer_profiles2 Optional. A second dataframe of writer profiles or
146+ # ' cluster fill rates. If it is not provided, a score or SLR will be
147+ # ' calculated between every pair of rows in writer_profiles. If
148+ # ' writer_profiles2 is provided, a score or SLR will be calculated between
149+ # ' every row of writer_profiles and every row of writer_profiles2.
145150# ' @param score_only TRUE returns only the similarity score. FALSE returns the
146151# ' similarity score and a score-based likelihood ratio for that score,
147152# ' calculated using `reference_scores`.
@@ -166,12 +171,14 @@ compare_documents <- function(sample1,
166171# ' @md
167172compare_writer_profiles <- function (
168173 writer_profiles ,
174+ writer_profiles2 = NULL ,
169175 score_only = TRUE ,
170176 rforest = NULL ,
171177 reference_scores = NULL ) {
172178 params <- list (
173179 samples = NULL ,
174180 writer_profiles = writer_profiles ,
181+ writer_profiles2 = writer_profiles2 ,
175182 score_only = score_only ,
176183 rforest = rforest ,
177184 project_dir = NULL ,
@@ -184,10 +191,14 @@ compare_writer_profiles <- function(
184191 stop(" Writer profiles must be a rates dataframe created by get_writer_profiles() with measure = 'rates'." )
185192 }
186193
194+ if (! is.null(params $ writer_profiles2 ) && ! is_rates_df(params $ writer_profiles2 )) {
195+ stop(" Writer profiles must be a rates dataframe created by get_writer_profiles() with measure = 'rates'." )
196+ }
197+
187198 params <- handle_null_values(params )
188199
189200 message(" Calculating distance between samples..." )
190- params $ dist <- get_distances(df = writer_profiles , distance_measures = params $ rforest $ distance_measures )
201+ params $ dist <- get_distances(df = writer_profiles , distance_measures = params $ rforest $ distance_measures , df2 = writer_profiles2 )
191202
192203 message(" Calculating similarity score..." )
193204 params $ score <- get_score(d = params $ dist , rforest = params $ rforest )$ score
@@ -205,6 +216,21 @@ compare_writer_profiles <- function(
205216
206217# Internal Functions ------------------------------------------------------
207218
219+ # ' Handle Null Values
220+ # '
221+ # ' The following rules are applied:
222+ # ' * If reference_scores are supplied and score_only is TRUE, score_only is changed to FALSE
223+ # ' * If project_dir is NULL, the project_dir is set to tempdir() > comparison.
224+ # ' * If rforest is NULL, the default random_forest is used
225+ # ' * If reference_scores is NULL, the default ref_scores are used
226+ # '
227+ # ' @param params A list of parameters: samples, writer_profiles,
228+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
229+ # ' score, slr.
230+ # '
231+ # ' @returns A list of parameters
232+ # '
233+ # ' @noRd
208234handle_null_values <- function (params ) {
209235 if (! is.null(params $ reference_scores ) && params $ score_only ) {
210236 message(" Reference scores were supplied so score_only will be changed to FALSE." )
@@ -226,13 +252,36 @@ handle_null_values <- function(params) {
226252 return (params )
227253}
228254
229- create_dirs <- function (params , subdirs = NULL ) {
255+ # ' Create Directories
256+ # '
257+ # ' Subfolders clusters, docs, and graphs are created in the project directory.
258+ # '
259+ # ' @param params A list of parameters: samples, writer_profiles,
260+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
261+ # ' score, slr.
262+ # '
263+ # ' @returns No value returned
264+ # '
265+ # ' @noRd
266+ create_dirs <- function (params ) {
230267 create_dir(params $ project_dir )
231268 create_dir(file.path(params $ project_dir , " clusters" ))
232269 create_dir(file.path(params $ project_dir , " docs" ))
233270 create_dir(file.path(params $ project_dir , " graphs" ))
234271}
235272
273+ # ' Handle Samples with Same Name
274+ # '
275+ # ' If the input samples are in different directories but have the same
276+ # ' filenames, the samples are relabelled as sample1.png and sample2.png.
277+ # '
278+ # ' @param params A list of parameters: samples, writer_profiles,
279+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
280+ # ' score, slr.
281+ # '
282+ # ' @returns A list of parameters
283+ # '
284+ # ' @noRd
236285handle_samples_w_same_name <- function (params ) {
237286
238287 # samples in two different directories CAN have the same filename
@@ -253,6 +302,19 @@ handle_samples_w_same_name <- function(params) {
253302 return (params )
254303}
255304
305+ # ' Check Directory Contents
306+ # '
307+ # ' Check the contents of project_dir > dir_name to ensure that it does not
308+ # ' contain helper files for documents other than sample1 or sample2.
309+ # '
310+ # ' @param params A list of parameters: samples, writer_profiles,
311+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
312+ # ' score, slr.
313+ # ' @param dir_name The name of the subdirectory in project_dir to check.
314+ # '
315+ # ' @returns A list of parameters
316+ # '
317+ # ' @noRd
256318check_dir_contents <- function (params , dir_name ) {
257319 if (! is.null(params $ project_dir ) && dir.exists(file.path(params $ project_dir , dir_name ))) {
258320 actual_files <- list.files(file.path(params $ project_dir , dir_name ))
@@ -296,6 +358,18 @@ is_rates_df <- function(df) {
296358 }
297359}
298360
361+ # ' Run Checks
362+ # '
363+ # ' Check the contents of the clusters, docs, and graphs folders in the project
364+ # ' directory.
365+ # '
366+ # ' @param params A list of parameters: samples, writer_profiles,
367+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
368+ # ' score, slr.
369+ # '
370+ # ' @returns A list of parameters
371+ # '
372+ # ' @noRd
299373run_checks <- function (params ) {
300374
301375 check_dir_contents(params , " clusters" )
@@ -305,6 +379,17 @@ run_checks <- function(params) {
305379 return (params )
306380}
307381
382+ # ' Copy Samples to Project Directory
383+ # '
384+ # ' Copy the samples to the project directory.
385+ # '
386+ # ' @param params A list of parameters: samples, writer_profiles,
387+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
388+ # ' score, slr.
389+ # '
390+ # ' @returns A list of parameters
391+ # '
392+ # ' @noRd
308393copy_samples_to_project_dir <- function (params ) {
309394 # Copy samples to project_dir > docs
310395 message(" Copying samples to project directory > docs...\n " )
@@ -319,7 +404,18 @@ copy_samples_to_project_dir <- function(params) {
319404 return (params )
320405}
321406
322-
407+ # ' Get Score-based Likelihood Ratio
408+ # '
409+ # ' Calculate the score-based likelihood ratio for each score in params$score
410+ # ' using params$rforest and params$reference_scores.
411+ # '
412+ # ' @param params A list of parameters: samples, writer_profiles,
413+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
414+ # ' score, slr.
415+ # '
416+ # ' @returns A list of parameters
417+ # '
418+ # ' @noRd
323419get_slr <- function (params ) {
324420 get_slr_for_single_score <- function (score , densities ) {
325421 numerator <- eval_density_at_point(den = densities $ same_writer , x = score , type = " numerator" )
@@ -335,6 +431,18 @@ get_slr <- function(params) {
335431 return (params )
336432}
337433
434+ # ' Make Results Dataframe
435+ # '
436+ # ' Format the comparison results in a dataframe with columns docname1, writer1,
437+ # ' docname2, writer2, ground_truth, score, and optionally slr.
438+ # '
439+ # ' @param params A list of parameters: samples, writer_profiles,
440+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
441+ # ' score, slr.
442+ # '
443+ # ' @returns A dataframe
444+ # '
445+ # ' @noRd
338446make_results_df <- function (params ) {
339447 df <- params $ dist
340448
@@ -356,6 +464,17 @@ make_results_df <- function(params) {
356464 return (df )
357465}
358466
467+ # ' Cleanup
468+ # '
469+ # ' Delete the comparison folder and its contents from the tempdir().
470+ # '
471+ # ' @param params A list of parameters: samples, writer_profiles,
472+ # ' writer_profiles2, score_only, rforest, project_dir, reference_scores,
473+ # ' score, slr.
474+ # '
475+ # ' @returns No return value
476+ # '
477+ # ' @noRd
359478clean_up <- function (params ) {
360479 # Optional. Delete comparison folder and contents in tempdir()
361480 if (params $ project_dir == file.path(tempdir(), " comparison" )) {
0 commit comments