diff --git a/src/js/common/identifiers.js b/src/js/common/identifiers.js index 74bc55e4..0e49ceb1 100644 --- a/src/js/common/identifiers.js +++ b/src/js/common/identifiers.js @@ -1,15 +1,67 @@ -const cleanDOI = text => { - const doi = text.match(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/); - return doi ? doi[0] : null; +/* eslint-disable no-useless-escape */ +/* eslint-disable no-cond-assign */ +// `cleanISBN`, `cleanDOI` and `extractIdentifiers` adapted from https://github.com/zotero/utilities/blob/43142236a282e5e1a3190694628f329aa2e0ba8e/utilities.js + +/** + * Strip info:doi prefix and any suffixes from a DOI + * @type String + */ +function cleanDOI(/**String**/ x) { + if (typeof (x) != "string") { + throw new Error("cleanDOI: argument must be a string"); + } + // If it's a URL, try to decode it + if (/^https?:/.test(x)) { + try { + x = decodeURIComponent(x); + } + catch (e) { + // URI contains an invalid escape sequence + console.warn("Not decoding URL-like DOI because of invalid escape sequence: " + x); + } + } + // Even if it's not a URL, decode %3C followed by %3E as < > + var openingPos = x.indexOf("%3C"); + if (openingPos != -1 && openingPos < x.indexOf("%3E")) { + x = x.replace(/%3C/g, "<"); + x = x.replace(/%3E/g, ">"); + } + var doi = x.match(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/); + if (!doi) { + return null; + } + var result = doi[0]; + + // Check if the DOI ends with a bracket + var trailingBracket = result.slice(-1); + if ([']', ')', '}'].includes(trailingBracket)) { + // Check the portion of the string before the matched DOI for an unclosed bracket + let beforeDOI = x.slice(0, doi.index); + let openingBracket = { + ']': '[', + ')': '(', + '}': '{' + }[trailingBracket]; + if (beforeDOI.lastIndexOf(openingBracket) > beforeDOI.lastIndexOf(trailingBracket)) { + // Remove the trailing bracket from the DOI + result = result.slice(0, -1); + } + } + return result; } -const cleanISBN = (isbnStr, dontValidate) => { +/** + * Clean and validate ISBN. + * Return isbn if valid, otherwise return false + * @param {String} isbn + * @param {Boolean} [dontValidate=false] Do not validate check digit + * @return {String|Boolean} Valid ISBN or false + */ +function cleanISBN(isbnStr, dontValidate) { isbnStr = isbnStr.toUpperCase() .replace(/[\x2D\xAD\u2010-\u2015\u2043\u2212]+/g, ''); // Ignore dashes var isbnRE = /\b(?:97[89]\s*(?:\d\s*){9}\d|(?:\d\s*){9}[\dX])\b/g, isbnMatch; - - // eslint-disable-next-line no-cond-assign while (isbnMatch = isbnRE.exec(isbnStr)) { var isbn = isbnMatch[0].replace(/\s+/g, ''); @@ -43,10 +95,9 @@ const cleanISBN = (isbnStr, dontValidate) => { return false; } -// https://github.com/zotero/zotero/blob/57989260935703f0c7d570a39bcf6516b8c61df6/chrome/content/zotero/xpcom/utilities_internal.js#L1409 -const extractIdentifiers = text => { - const identifiers = []; - const foundIDs = new Set(); // keep track of identifiers to avoid duplicates +function extractIdentifiers(text) { + var identifiers = []; + var foundIDs = new Set(); // keep track of identifiers to avoid duplicates // First look for DOIs var ids = text.split(/[\s\u00A0]+/); // whitespace + non-breaking space @@ -67,8 +118,6 @@ const extractIdentifiers = text => { .toUpperCase(); let ISBN_RE = /(?:\D|^)(97[89]\d{10}|\d{9}[\dX])(?!\d)/g; let isbn; - - // eslint-disable-next-line no-cond-assign while (isbn = ISBN_RE.exec(ids)) { isbn = cleanISBN(isbn[1]); if (isbn && !foundIDs.has(isbn)) { @@ -82,8 +131,6 @@ const extractIdentifiers = text => { // Next try spaces if (!identifiers.length) { ids = ids.replace(/[ \u00A0]+/g, ""); // space + non-breaking space - - // eslint-disable-next-line no-cond-assign while (isbn = ISBN_RE.exec(ids)) { isbn = cleanISBN(isbn[1]); if (isbn && !foundIDs.has(isbn)) { @@ -112,7 +159,20 @@ const extractIdentifiers = text => { } } - // Finally try for PMID + // Next, try ADS Bibcodes + if (!identifiers.length) { + // regex as in the ADS Bibcode translator + let adsBibcode_RE = /\b(\d{4}\D\S{13}[A-Z.:])\b/g; + let adsBibcode; + while ((adsBibcode = adsBibcode_RE.exec(text)) && !foundIDs.has(adsBibcode)) { + identifiers.push({ + adsBibcode: adsBibcode[1] + }); + foundIDs.add(adsBibcode); + } + } + + // Finally, try PMID if (!identifiers.length) { // PMID; right now, the longest PMIDs are 8 digits, so it doesn't seem like we'll // need to discriminate for a fairly long time diff --git a/src/js/component/item/actions/add-by-identifier.jsx b/src/js/component/item/actions/add-by-identifier.jsx index 42353207..c3b53dfa 100644 --- a/src/js/component/item/actions/add-by-identifier.jsx +++ b/src/js/component/item/actions/add-by-identifier.jsx @@ -194,7 +194,7 @@ const AddByIdentifier = props => {

diff --git a/src/js/component/modal/add-by-identifier.jsx b/src/js/component/modal/add-by-identifier.jsx index 1201c63a..4882cb7a 100644 --- a/src/js/component/modal/add-by-identifier.jsx +++ b/src/js/component/modal/add-by-identifier.jsx @@ -149,7 +149,7 @@ const AddByIdentifierModal = () => { onChange={ handleInputChange } onCommit={ handleInputCommit } onPaste={ handlePaste } - placeholder="URL, ISBNs, DOIs, PMIDs, or arXiv IDs" + placeholder="URL, ISBNs, DOIs, PMIDs, arXiv IDs, or ADS Bibcodes" ref={ inputEl } tabIndex={ 0 } value={ identifier } diff --git a/src/js/component/modal/create-parent-item.jsx b/src/js/component/modal/create-parent-item.jsx index a4c88d57..6dc3f7a2 100644 --- a/src/js/component/modal/create-parent-item.jsx +++ b/src/js/component/modal/create-parent-item.jsx @@ -200,7 +200,7 @@ const CreateParentItemModal = () => { >