Fix #4, fix #5

nathanlesage · nathanlesage · commit 77e2dfe7e3eb · 2020-09-11T15:31:33.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+# 1.2.1
+
+- Fixed a logical error where activating strict mode would actually use the loose mode while parsing a single citation.
+- Fixed a problem where citation parts with no commas after the citekey would be reported as false and hence not parsed correctly.
+
 # 1.2.0
 
 - Refactored the code base to be more maintainable and streamlined.
diff --git a/dist/citr.js b/dist/citr.js
@@ -23,30 +23,44 @@ function parseSingle(citation, strict = false) {
         throw new Error(`Invalid Citation - Invalid citation passed: ${citation}.`);
     let returnCitations = [];
     let _citation = citation.substr(1, citation.length - 2).split(';');
+    let invalidPrefixes = [];
     for (let c of _citation) {
         if (c === '')
             continue;
-        if (!validator_1.validateCitationPart(c))
-            throw new Error(`No key or multiple keys Found - Invalid citation passed: ${c}.`);
-        let prefix = c.split('@')[0].trim();
+        if (!validator_1.validateCitationPart(c)) {
+            invalidPrefixes.push(c);
+            continue;
+        }
+        let prefix = '';
+        if (invalidPrefixes.length === 1) {
+            prefix = invalidPrefixes + ';';
+        }
+        else if (invalidPrefixes.length > 1) {
+            prefix = invalidPrefixes.join(';');
+        }
+        prefix += c.split('@')[0];
+        prefix = prefix.trim();
+        invalidPrefixes = [];
         let suppressAuthor = c.indexOf('@') > 0 && c[c.indexOf('@') - 1] === '-';
         if (suppressAuthor)
             prefix = prefix.substr(0, prefix.length - 1).trim();
         let commaIndex = c.split('@')[1].indexOf(',') + 1;
+        if (commaIndex === 0)
+            commaIndex = c.split('@')[1].indexOf(' ') + 1;
         if (commaIndex <= 0)
             commaIndex = undefined;
         let citationKeyPart = c.substr(c.indexOf('@'), commaIndex);
         let extractedKey = null;
         if (strict) {
-            extractedKey = regex_1.looseCitekeyValidatorRE.exec(citationKeyPart);
+            extractedKey = regex_1.strictCitekeyValidatorRE.exec(citationKeyPart);
         }
         else {
-            extractedKey = regex_1.strictCitekeyValidatorRE.exec(citationKeyPart);
+            extractedKey = regex_1.looseCitekeyValidatorRE.exec(citationKeyPart);
         }
         if (extractedKey === null)
             throw new Error(`Invalid Key - Invalid citation passed: ${c}`);
         let citeKey = extractedKey[1];
-        let afterKey = c.split('@')[1].substr(extractedKey[0].length + 1).trim();
+        let afterKey = c.split('@')[1].substr(extractedKey[1].length).trim();
         let { suffix, locator, label } = retrieve_locator_1.extractLocator(afterKey);
         returnCitations.push({
             prefix: prefix,
@@ -57,6 +71,9 @@ function parseSingle(citation, strict = false) {
             'suppress-author': suppressAuthor
         });
     }
+    if (returnCitations.length === 0 && _citation.length > 0) {
+        throw new Error(`Invalid citation passed: ${citation}`);
+    }
     return returnCitations;
 }
 exports.parseSingle = parseSingle;
diff --git a/lib/citr.ts b/lib/citr.ts
@@ -81,18 +81,42 @@ export function parseSingle(citation: string, strict: boolean = false): Citation
   // they are simply square brackets. Additionally, split the citation along delimiters.
   let _citation = citation.substr(1, citation.length - 2).split(';')
 
+  let invalidPrefixes: string[] = []
+
   // Now iterate over all citations the user passed in to return them as an array.
   for (let c of _citation) {
     // It could be that the user just ended his/her citation with a ;
     if (c === '') continue
 
     // Make sure there is exactly one @ available.
-    if (!validateCitationPart(c)) throw new Error(`No key or multiple keys Found - Invalid citation passed: ${c}.`)
+    if (!validateCitationPart(c)) {
+      // If the validator fails, this means that there's no @ or multiple @,
+      // and hence no valid citation key in there. This means that the user has
+      // written something along the lines of [as we can see here; further
+      // @citekey1234; @citekey4321] or [see the corresponding mails
+      // hello@example.com and webmaster@example.com; further @citekey1234].
+      // --> treat it as a part of the prefix for the next citation part.
+      invalidPrefixes.push(c)
+      continue
+    }
 
-    // The Prefix is defined as everything before the citation key, so the first index of
-    // the split array will contain the Prefix (If @ is the first character, the string will
-    // be empty).
-    let prefix = c.split('@')[0].trim()
+    // The Prefix is defined as everything before the citation key, so the
+    // first index of the split array will contain the Prefix (If @ is the
+    // first character, the string will be empty). Make sure to add possible
+    // invalid prefixes from before
+    let prefix = ''
+    if (invalidPrefixes.length === 1) {
+      prefix = invalidPrefixes + ';'
+    }
+    else if (invalidPrefixes.length > 1) {
+      prefix = invalidPrefixes.join(';')
+    }
+
+    prefix += c.split('@')[0] // Add the actual prefix
+    prefix = prefix.trim() // Trim whitespaces
+
+    // Reset the additional prefixes here.
+    invalidPrefixes = []
 
     // Next, the user can decide to omit the author from the citation by prepending the
     // @-character with a minus (-). We cannot look for the end of the prefix because
@@ -106,18 +130,21 @@ export function parseSingle(citation: string, strict: boolean = false): Citation
 
     // Now we need to extract the citation key. We'll be reusing the citation
     // validator regular expression. But as the secondHalf also contains the
-    // suffix, locator, etc., we have to first cut it down. Therefore, we'll
-    // assume a comma to separate the citekey from the rest of the suffix (or
-    // extract everything, if there is no comma in there.)
+    // suffix, locator, etc., we have to first cut it down. The citation key
+    // can either be terminated with a comma or with a space.
     let commaIndex: number | undefined = c.split('@')[1].indexOf(',') + 1
+    // If the commaIndex is 0, this means there was no comma - check for space
+    if (commaIndex === 0) commaIndex = c.split('@')[1].indexOf(' ') + 1
     // Pass undefined to extract everything
     if (commaIndex <= 0) commaIndex = undefined
+
+    // Now extract the key
     let citationKeyPart = c.substr(c.indexOf('@'), commaIndex)
     let extractedKey: RegExpExecArray | null = null
     if (strict) {
-      extractedKey = looseCitekeyValidatorRE.exec(citationKeyPart)
-    } else {
       extractedKey = strictCitekeyValidatorRE.exec(citationKeyPart)
+    } else {
+      extractedKey = looseCitekeyValidatorRE.exec(citationKeyPart)
     }
 
     // If the match has not been found, abort
@@ -128,7 +155,7 @@ export function parseSingle(citation: string, strict: boolean = false): Citation
 
     // The final two things that could possibly still be in the citation are a
     // locator and a suffix. Let us first extract everything after the key.
-   let afterKey = c.split('@')[1].substr(extractedKey[0].length + 1).trim()
+   let afterKey = c.split('@')[1].substr(extractedKey[1].length).trim()
 
     // The logic to get the locator is extremely difficult, as the locator
     // mainly is written in natural language. We'll offload the work to
@@ -146,6 +173,12 @@ export function parseSingle(citation: string, strict: boolean = false): Citation
     })
   }
 
+  // Indicate that no citation has been found, which is a good indicator
+  // that there is no valid citation (even excluding the invalid prefixes)
+  if (returnCitations.length === 0 && _citation.length > 0) {
+    throw new Error(`Invalid citation passed: ${citation}`)
+  }
+
   // After everything has run, return all citations found.
   return returnCitations
 }
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@zettlr/citr",
-    "version": "1.2.0",
+    "version": "1.2.1",
     "description": "A small library to parse Markdown Citeproc notation as CSL JSON",
     "author": "Hendrik Erz <hendrik@zettlr.com>",
     "license": "GPL-3.0",
diff --git a/test/parse-single.js b/test/parse-single.js
@@ -294,6 +294,35 @@ let singleCitations = [
       }
     ]
   },
+  // These tests test whether Citr works even if there is no comma immediately
+  // after the citation key
+  {
+    'input': '[vgl. @Koselleck2006 für einen Überblick]',
+    'expected': [
+      {
+        prefix: 'vgl.',
+        suffix: 'für einen Überblick',
+        id: 'Koselleck2006',
+        locator: '',
+        label: 'page',
+        'suppress-author': false
+      }
+    ]
+  },
+  // Tests to check if invalid citation parts end up being prefixes of the following ones
+  {
+    'input': '[ein invalider prefix; außerdem @Volk2017, Abschn. 2-3]',
+    'expected': [
+      {
+        prefix: 'ein invalider prefix; außerdem',
+        suffix: '',
+        id: 'Volk2017',
+        locator: '2-3',
+        label: 'section',
+        'suppress-author': false
+      }
+    ]
+  },
   // expected = undefined indicates that the function should throw
   {
     'input': 'Should not @work out',
@@ -310,7 +339,7 @@ let singleCitations = [
   {
     'input': '[Malformed ID inside @.this key]',
     'expected': undefined
-  },
+  }
 ]
 
 describe('Citr#parseSingle()', function () {

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@zettlr/citr",`
`3`		`- "version": "1.2.0",`
	`3`	`+ "version": "1.2.1",`
`4`	`4`	`"description": "A small library to parse Markdown Citeproc notation as CSL JSON",`
`5`	`5`	`"author": "Hendrik Erz <[email protected]>",`
`6`	`6`	`"license": "GPL-3.0",`