Line 6: |
Line 6: |
| ]] | | ]] |
| | | |
− | local is_set, in_array, set_error, select_one, add_maint_cat, substitute; -- functions in Module:Citation/CS1/Utilities | + | local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities |
| | | |
| local z; -- table of tables defined in Module:Citation/CS1/Utilities | | local z; -- table of tables defined in Module:Citation/CS1/Utilities |
Line 12: |
Line 12: |
| local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration |
| | | |
| + | local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or ''; |
| + | |
| + | |
| + | --============================<< H E L P E R F U N C T I O N S >>============================================ |
| | | |
| --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | | --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- |
Line 22: |
Line 26: |
| local url_string = options.id; | | local url_string = options.id; |
| local ext_link; | | local ext_link; |
| + | local this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org |
| + | local wd_article; -- article title from wikidata |
| | | |
| if options.encode == true or options.encode == nil then | | if options.encode == true or options.encode == nil then |
| url_string = mw.uri.encode( url_string ); | | url_string = mw.uri.encode( url_string ); |
| end | | end |
| + | |
| + | ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)); |
| + | if is_set(options.access) then |
| + | ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock |
| + | end |
| + | |
| + | this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain) |
| | | |
− | ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)); | + | if string.match (mw.site.server, 'wikidata') then |
− | if options.free then
| + | this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on wikidata so use interface language setting instead |
− | ext_link = substitute (cfg.presentation['free to read'], ext_link); -- add the free-to-read lock | |
| end | | end |
| | | |
− | return mw.ustring.format( '[[%s|%s]]%s%s', options.link, options.label, options.separator or " ", ext_link); | + | if is_set (options.q) then |
| + | wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd |
| + | if wd_article then |
| + | wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- make interwiki link if taken from wd; leading colon required |
| + | end |
| + | end |
| | | |
− | -- return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
| + | return table.concat ({ |
− | -- options.link, options.label, options.separator or " ",
| + | make_wikilink (wd_article or options.link, options.label), -- wikidata link or locally specified |
− | -- options.prefix, url_string, options.suffix or "",
| + | options.separator or ' ', |
− | -- mw.text.nowiki(options.id)
| + | ext_link |
− | -- );
| + | }); |
| end | | end |
| | | |
Line 49: |
Line 66: |
| | | |
| local function internal_link_id(options) | | local function internal_link_id(options) |
− | return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]', | + | |
− | options.link, options.label, options.separator or " ", | + | return table.concat ( |
− | options.prefix, options.id, options.suffix or "", | + | { |
− | mw.text.nowiki(options.id)
| + | make_wikilink (options.link, options.label), |
− | ); | + | options.separator or ' ', |
| + | make_wikilink ( |
| + | table.concat ( |
| + | { |
| + | options.prefix, |
| + | options.id, |
| + | options.suffix or '' |
| + | }), |
| + | mw.text.nowiki (options.id) |
| + | ); |
| + | }); |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ |
| + | |
| + | Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is |
| + | in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because |
| + | |embargo= was not set in this cite. |
| + | |
| + | ]] |
| + | |
| + | local function is_embargoed (embargo) |
| + | if is_set (embargo) then |
| + | local lang = mw.getContentLanguage(); |
| + | local good1, embargo_date, good2, todays_date; |
| + | good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); |
| + | good2, todays_date = pcall( lang.formatDate, lang, 'U' ); |
| + | |
| + | if good1 and good2 then -- if embargo date and today's date are good dates |
| + | if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future? |
| + | return embargo; -- still embargoed |
| + | else |
| + | add_maint_cat ('embargo') |
| + | return ''; -- unset because embargo has expired |
| + | end |
| + | end |
| + | end |
| + | return ''; -- |embargo= not set return empty string |
| end | | end |
| | | |
Line 60: |
Line 115: |
| | | |
| ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. | | ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. |
− | ISBN-13 is checked in check_isbn(). | + | ISBN-13 is checked in isbn(). |
| | | |
| If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length | | If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length |
Line 69: |
Line 124: |
| local function is_valid_isxn (isxn_str, len) | | local function is_valid_isxn (isxn_str, len) |
| local temp = 0; | | local temp = 0; |
− | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | + | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 |
| len = len+1; -- adjust to be a loop counter | | len = len+1; -- adjust to be a loop counter |
| for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum |
Line 82: |
Line 137: |
| | | |
| | | |
− | --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >---------------------------------------------- | + | --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- |
| | | |
| ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. | | ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. |
Line 93: |
Line 148: |
| local temp=0; | | local temp=0; |
| | | |
− | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | + | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 |
| for i, v in ipairs( isxn_str ) do | | for i, v in ipairs( isxn_str ) do |
| temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | | temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit |
Line 101: |
Line 156: |
| | | |
| | | |
− | --[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------ | + | --[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- |
| + | |
| + | lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization) |
| + | 1. Remove all blanks. |
| + | 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. |
| + | 3. If there is a hyphen in the string: |
| + | a. Remove it. |
| + | b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out): |
| + | 1. All these characters should be digits, and there should be six or less. (not done in this function) |
| + | 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. |
| + | |
| + | Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. |
| + | ]] |
| + | |
| + | local function normalize_lccn (lccn) |
| + | lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace |
| + | |
| + | if nil ~= string.find (lccn,'/') then |
| + | lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it |
| + | end |
| + | |
| + | local prefix |
| + | local suffix |
| + | prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix |
| + | |
| + | if nil ~= suffix then -- if there was a hyphen |
| + | suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 |
| + | lccn=prefix..suffix; -- reassemble the lccn |
| + | end |
| + | |
| + | return lccn; |
| + | end |
| + | |
| + | --============================<< I D E N T I F I E R F U N C T I O N S >>==================================== |
| + | |
| + | --[[--------------------------< A R X I V >-------------------------------------------------------------------- |
| + | |
| + | See: http://arxiv.org/help/arxiv_identifier |
| + | |
| + | format and error check arXiv identifier. There are three valid forms of the identifier: |
| + | the first form, valid only between date codes 9108 and 0703 is: |
| + | arXiv:<archive>.<class>/<date code><number><version> |
| + | where: |
| + | <archive> is a string of alpha characters - may be hyphenated; no other punctuation |
| + | <class> is a string of alpha characters - may be hyphenated; no other punctuation; not the same as |class= parameter which is not supported in this form |
| + | <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 |
| + | first digit of YY for this form can only 9 and 0 |
| + | <number> is a three-digit number |
| + | <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented) |
| + | |
| + | the second form, valid from April 2007 through December 2014 is: |
| + | arXiv:<date code>.<number><version> |
| + | where: |
| + | <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 |
| + | <number> is a four-digit number |
| + | <version> is a 1 or more digit number preceded with a lowercase v; no spaces |
| + | |
| + | the third form, valid from January 2015 is: |
| + | arXiv:<date code>.<number><version> |
| + | where: |
| + | <date code> and <version> are as defined for 0704-1412 |
| + | <number> is a five-digit number |
| + | ]] |
| + | |
| + | local function arxiv (id, class) |
| + | local handler = cfg.id_handlers['ARXIV']; |
| + | local year, month, version; |
| + | local err_cat = false; -- assume no error message |
| + | local text; -- output text |
| + | |
| + | if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version |
| + | year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); |
| + | year = tonumber(year); |
| + | month = tonumber(month); |
| + | if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month |
| + | ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? |
| + | err_cat = true; -- flag for error message |
| + | end |
| + | |
| + | elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version |
| + | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); |
| + | year = tonumber(year); |
| + | month = tonumber(month); |
| + | if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) |
| + | ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)? |
| + | err_cat = true; -- flag for error message |
| + | end |
| + | |
| + | elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version |
| + | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); |
| + | year = tonumber(year); |
| + | month = tonumber(month); |
| + | if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) |
| + | err_cat = true; -- flag for error message |
| + | end |
| + | |
| + | else |
| + | err_cat = true; -- not a recognized format; flag for error message |
| + | end |
| + | |
| + | err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true |
| + | |
| + | text = external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
| + | |
| + | if is_set (class) then |
| + | if id:match ('^%d+') then |
| + | text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink |
| + | else |
| + | text = table.concat ({text, ' ', set_error ('class_ignored')}); |
| + | end |
| + | end |
| + | |
| + | return text; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< B I B C O D E >-------------------------------------------------------------------- |
| + | |
| + | Validates (sort of) and formats a bibcode id. |
| + | |
| + | Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes |
| + | |
| + | But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters |
| + | and first four digits must be a year. This function makes these tests: |
| + | length must be 19 characters |
| + | characters in position |
| + | 1–4 must be digits and must represent a year in the range of 1000 – next year |
| + | 5 must be a letter |
| + | 6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. ) |
| + | 7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) |
| + | 9–18 must be letter, digit, or dot |
| + | 19 must be a letter or dot |
| + | |
| + | ]] |
| + | |
| + | local function bibcode (id, access) |
| + | local handler = cfg.id_handlers['BIBCODE']; |
| + | local err_type; |
| + | local year; |
| + | |
| + | local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, |
| + | access=access}); |
| + | |
| + | if 19 ~= id:len() then |
| + | err_type = 'length'; |
| + | else |
| + | year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") -- |
| + | if not year then -- if nil then no pattern match |
| + | err_type = 'value'; -- so value error |
| + | else |
| + | local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year |
| + | year = tonumber (year); -- convert year portion of bibcode to a number |
| + | if (1000 > year) or (year > next_year) then |
| + | err_type = 'year'; -- year out of bounds |
| + | end |
| + | if id:find('&%.') then |
| + | err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter) |
| + | end |
| + | end |
| + | end |
| + | |
| + | if is_set (err_type) then -- if there was an error detected |
| + | text = text .. ' ' .. set_error( 'bad_bibcode', {err_type}); |
| + | end |
| + | return text; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< B I O R X I V >----------------------------------------------------------------- |
| + | |
| + | Format bioRxiv id and do simple error checking. BiorXiv ids are exactly 6 digits. |
| + | The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI: |
| + | https://doi.org/10.1101/078733 -> 078733 |
| + | |
| + | ]] |
| + | |
| + | local function biorxiv(id) |
| + | local handler = cfg.id_handlers['BIORXIV']; |
| + | local err_cat = ''; -- presume that bioRxiv id is valid |
| + | |
| + | if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits |
| + | err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message |
| + | end |
| + | |
| + | return external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| + | prefix=handler.prefix,id=id,separator=handler.separator, |
| + | encode=handler.encode, access=handler.access}) .. err_cat; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< C I T E S E E R X >------------------------------------------------------------ |
| + | |
| + | CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org). |
| + | |
| + | The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure |
| + | ]] |
| + | |
| + | local function citeseerx (id) |
| + | local handler = cfg.id_handlers['CITESEERX']; |
| + | local matched; |
| + | |
| + | local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, |
| + | access=handler.access}); |
| + | |
| + | matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); |
| + | if not matched then |
| + | text = text .. ' ' .. set_error( 'bad_citeseerx' ); |
| + | end |
| + | return text; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< D O I >------------------------------------------------------------------------ |
| + | |
| + | Formats a DOI and checks for DOI errors. |
| + | |
| + | DOI names contain two parts: prefix and suffix separated by a forward slash. |
| + | Prefix: directory indicator '10.' followed by a registrant code |
| + | Suffix: character string of any length chosen by the registrant |
| + | |
| + | This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends |
| + | with a period or a comma, this function will emit a bad_doi error message. |
| + | |
| + | DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash, |
| + | and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely |
| + | if ever used in doi names. |
| + | |
| + | ]] |
| + | |
| + | local function doi(id, inactive, access) |
| + | local cat = "" |
| + | local handler = cfg.id_handlers['DOI']; |
| + | |
| + | local text; |
| + | if is_set(inactive) then |
| + | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date |
| + | if is_set(inactive_year) then |
| + | table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); |
| + | else |
| + | table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year |
| + | end |
| + | inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" |
| + | end |
| + | text = external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') |
| + | |
| + | if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma |
| + | cat = ' ' .. set_error( 'bad_doi' ); |
| + | end |
| + | |
| + | return text .. cat |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< H D L >------------------------------------------------------------------------ |
| + | |
| + | Formats an HDL with minor error checking. |
| + | |
| + | HDL names contain two parts: prefix and suffix separated by a forward slash. |
| + | Prefix: character string using any character in the UCS-2 character set except '/' |
| + | Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant |
| + | |
| + | This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends |
| + | with a period or a comma, this function will emit a bad_hdl error message. |
| + | |
| + | HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and |
| + | terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely |
| + | if ever used in HDLs. |
| + | |
| + | ]] |
| + | |
| + | local function hdl(id, access) |
| + | local handler = cfg.id_handlers['HDL']; |
| + | |
| + | local text = external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) |
| + | |
| + | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma |
| + | text = text .. ' ' .. set_error( 'bad_hdl' ); |
| + | end |
| + | return text; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< I S B N >---------------------------------------------------------------------- |
| | | |
| Determines whether an ISBN string is valid | | Determines whether an ISBN string is valid |
Line 107: |
Line 449: |
| ]] | | ]] |
| | | |
− | local function check_isbn( isbn_str ) | + | local function isbn( isbn_str ) |
| if nil ~= isbn_str:match("[^%s-0-9X]") then | | if nil ~= isbn_str:match("[^%s-0-9X]") then |
| return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X | | return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X |
Line 132: |
Line 474: |
| return is_valid_isxn_13 (isbn_str), 'checksum'; | | return is_valid_isxn_13 (isbn_str), 'checksum'; |
| end | | end |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< A M A Z O N >------------------------------------------------------------------ |
| + | |
| + | Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha |
| + | characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit |
| + | isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=. |
| + | Error message if not 10 characters, if not isbn10, if mixed and first character is a digit. |
| + | |
| + | This function is positioned here because it calls isbn() |
| + | |
| + | ]] |
| + | |
| + | local function asin(id, domain) |
| + | local err_cat = "" |
| + | |
| + | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then |
| + | err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters |
| + | else |
| + | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) |
| + | if isbn( id ) then -- see if asin value is isbn10 |
| + | add_maint_cat ('ASIN'); |
| + | elseif not is_set (err_cat) then |
| + | err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10 |
| + | end |
| + | elseif not id:match("^%u[%d%u]+$") then |
| + | err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha |
| + | end |
| + | end |
| + | if not is_set(domain) then |
| + | domain = "com"; |
| + | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom |
| + | domain = "co." .. domain; |
| + | elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico |
| + | domain = "com." .. domain; |
| + | end |
| + | local handler = cfg.id_handlers['ASIN']; |
| + | return external_link_id({link=handler.link, |
| + | label=handler.label, q = handler.q, prefix=handler.prefix .. domain .. "/dp/", |
| + | id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; |
| end | | end |
| | | |
Line 147: |
Line 530: |
| local text; | | local text; |
| local valid_ismn = true; | | local valid_ismn = true; |
| + | local id_copy; |
| | | |
| + | id_copy = id; -- save a copy because this testing is destructive |
| id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn | | id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn |
| | | |
Line 159: |
Line 544: |
| -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | | -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
| | | |
− | text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id; -- because no place to link to yet | + | text = table.concat ( |
| + | { |
| + | make_wikilink (handler.link, handler.label), |
| + | handler.separator, |
| + | id_copy |
| + | }); -- because no place to link to yet |
| | | |
| if false == valid_ismn then | | if false == valid_ismn then |
Line 175: |
Line 565: |
| like this: | | like this: |
| | | |
− | |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link | + | |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link |
| | | |
| This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length | | This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length |
Line 185: |
Line 575: |
| | | |
| local function issn(id, e) | | local function issn(id, e) |
− | local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate | + | local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate |
| local handler; | | local handler; |
| local text; | | local text; |
Line 196: |
Line 586: |
| end | | end |
| | | |
− | id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn | + | id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn |
| | | |
− | if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position | + | if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position |
− | valid_issn=false; -- wrong length or improper character | + | valid_issn=false; -- wrong length or improper character |
| else | | else |
− | valid_issn=is_valid_isxn(id, 8); -- validate issn | + | valid_issn=is_valid_isxn(id, 8); -- validate issn |
| end | | end |
| | | |
| if true == valid_issn then | | if true == valid_issn then |
− | id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version | + | id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version |
| else | | else |
− | id = issn_copy; -- if not valid, use the show the invalid issn with error message | + | id = issn_copy; -- if not valid, use the show the invalid issn with error message |
| end | | end |
| | | |
− | text = external_link_id({link = handler.link, label = handler.label, | + | text = external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
| | | |
Line 221: |
Line 611: |
| | | |
| | | |
− | --[[--------------------------< A M A Z O N >------------------------------------------------------------------ | + | --[[--------------------------< J F M >----------------------------------------------------------------------- |
| | | |
− | Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
| + | A numerical identifier in the form nn.nnnn.nn |
− | characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
| |
− | isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
| |
− | Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
| |
| | | |
| ]] | | ]] |
| | | |
− | local function amazon(id, domain) | + | local function jfm (id) |
− | local err_cat = "" | + | local handler = cfg.id_handlers['JFM']; |
− | | + | local id_num; |
− | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
| + | local err_cat = ''; |
− | err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
| |
− | else
| |
− | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
| |
− | if check_isbn( id ) then -- see if asin value is isbn10
| |
− | add_maint_cat ('ASIN');
| |
− | elseif not is_set (err_cat) then
| |
− | err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
| |
− | end
| |
− | elseif not id:match("^%u[%d%u]+$") then
| |
− | err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
| |
− | end
| |
− | end | |
− | if not is_set(domain) then
| |
− | domain = "com";
| |
− | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
| |
− | domain = "co." .. domain;
| |
− | elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
| |
− | domain = "com." .. domain;
| |
− | end
| |
− | local handler = cfg.id_handlers['ASIN']; | |
− | return external_link_id({link=handler.link,
| |
− | label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
| |
− | id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
| |
− | end
| |
− | | |
− | | |
− | --[[--------------------------< A R X I V >--------------------------------------------------------------------
| |
− | | |
− | See: http://arxiv.org/help/arxiv_identifier
| |
− | | |
− | format and error check arXiv identifier. There are three valid forms of the identifier:
| |
− | the first form, valid only between date codes 9108 and 0703 is:
| |
− | arXiv:<archive>.<class>/<date code><number><version>
| |
− | where:
| |
− | <archive> is a string of alpha characters - may be hyphenated; no other punctuation
| |
− | <class> is a string of alpha characters - may be hyphenated; no other punctuation
| |
− | <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
| |
− | first digit of YY for this form can only 9 and 0
| |
− | <number> is a three-digit number
| |
− | <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
| |
| | | |
− | the second form, valid from April 2007 through December 2014 is:
| + | id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier |
− | arXiv:<date code>.<number><version> | |
− | where:
| |
− | <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
| |
− | <number> is a four-digit number
| |
− | <version> is a 1 or more digit number preceded with a lowercase v; no spaces
| |
| | | |
− | the third form, valid from January 2015 is:
| + | if is_set (id_num) then |
− | arXiv:<date code>.<number><version>
| + | add_maint_cat ('jfm_format'); |
− | where:
| + | else -- plain number without mr prefix |
− | <date code> and <version> are as defined for 0704-1412
| + | id_num = id; -- if here id does not have prefix |
− | <number> is a five-digit number
| |
− | ]]
| |
− | | |
− | local function arxiv (id, class)
| |
− | local handler = cfg.id_handlers['ARXIV'];
| |
− | local year, month, version;
| |
− | local err_cat = '';
| |
− | local text;
| |
− |
| |
− | if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version | |
− | year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | |
− | year = tonumber(year);
| |
− | month = tonumber(month);
| |
− | if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
| |
− | ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
| |
− | err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
| |
− | end
| |
− | elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version | |
− | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | |
− | year = tonumber(year);
| |
− | month = tonumber(month);
| |
− | if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
| |
− | ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
| |
− | err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
| |
− | end
| |
− | elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
| |
− | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
| |
− | year = tonumber(year);
| |
− | month = tonumber(month);
| |
− | if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
| |
− | err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
| |
− | end
| |
− | else
| |
− | err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format
| |
| end | | end |
| | | |
− | text = external_link_id({link = handler.link, label = handler.label, | + | if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then |
− | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, free=handler.free}) .. err_cat;
| + | id = id_num; -- jfm matches pattern |
− | | |
− | if is_set (class) then
| |
− | class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink | |
| else | | else |
− | class = ''; -- empty string for concatenation | + | err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message |
| end | | end |
| | | |
− | return text .. class; | + | return external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
| end | | end |
− |
| |
− |
| |
− | --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
| |
− |
| |
− | lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
| |
− | 1. Remove all blanks.
| |
− | 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
| |
− | 3. If there is a hyphen in the string:
| |
− | a. Remove it.
| |
− | b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
| |
− | 1. All these characters should be digits, and there should be six or less. (not done in this function)
| |
− | 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
| |
− |
| |
− | Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
| |
− | ]]
| |
− |
| |
− | local function normalize_lccn (lccn)
| |
− | lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
| |
− |
| |
− | if nil ~= string.find (lccn,'/') then
| |
− | lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
| |
− | end
| |
− |
| |
− | local prefix
| |
− | local suffix
| |
− | prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
| |
− |
| |
− | if nil ~= suffix then -- if there was a hyphen
| |
− | suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
| |
− | lccn=prefix..suffix; -- reassemble the lccn
| |
− | end
| |
− |
| |
− | return lccn;
| |
− | end
| |
| | | |
| | | |
Line 385: |
Line 657: |
| local function lccn(lccn) | | local function lccn(lccn) |
| local handler = cfg.id_handlers['LCCN']; | | local handler = cfg.id_handlers['LCCN']; |
− | local err_cat = ''; -- presume that LCCN is valid | + | local err_cat = ''; -- presume that LCCN is valid |
− | local id = lccn; -- local copy of the lccn | + | local id = lccn; -- local copy of the lccn |
| | | |
− | id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) | + | id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) |
− | local len = id:len(); -- get the length of the lccn | + | local len = id:len(); -- get the length of the lccn |
| | | |
| if 8 == len then | | if 8 == len then |
− | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | + | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message |
| end | | end |
− | elseif 9 == len then -- LCCN should be adddddddd | + | elseif 9 == len then -- LCCN should be adddddddd |
− | if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | + | if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message |
| end | | end |
− | elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | + | elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd |
− | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | + | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... |
− | if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | + | if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
| end | | end |
| end | | end |
− | elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | + | elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd |
| if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | | if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
| end | | end |
− | elseif 12 == len then -- LCCN should be aadddddddddd | + | elseif 12 == len then -- LCCN should be aadddddddddd |
− | if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | + | if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
| end | | end |
| else | | else |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message |
| end | | end |
| | | |
| if not is_set (err_cat) and nil ~= lccn:find ('%s') then | | if not is_set (err_cat) and nil ~= lccn:find ('%s') then |
− | err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message | + | err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message |
| end | | end |
| | | |
− | return external_link_id({link = handler.link, label = handler.label, | + | return external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | | prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; |
| end | | end |
| | | |
| | | |
− | --[[--------------------------< P M I D >---------------------------------------------------------------------- | + | --[[--------------------------< M R >-------------------------------------------------------------------------- |
| | | |
− | Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This
| + | A seven digit number; if not seven digits, zero-fill leading digits to make seven digits. |
− | code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
| |
− | test_limit will need to be updated periodically as more PMIDs are issued.
| |
| | | |
| ]] | | ]] |
| | | |
− | local function pmid(id) | + | local function mr (id) |
− | local test_limit = 30000000; -- update this value as PMIDs approach
| + | local handler = cfg.id_handlers['MR']; |
− | local handler = cfg.id_handlers['PMID']; | + | local id_num; |
− | local err_cat = ''; -- presume that PMID is valid | + | local id_len; |
| + | local err_cat = ''; |
| | | |
− | if id:match("[^%d]") then -- if PMID has anything but digits | + | id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix |
− | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | + | |
− | else -- PMID is only digits | + | if is_set (id_num) then |
− | local id_num = tonumber(id); -- convert id to a number for range testing | + | add_maint_cat ('mr_format'); |
− | if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
| + | else -- plain number without mr prefix |
− | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
| + | id_num = id:match ('^%d+$'); -- if here id is all digits |
− | end
| + | end |
| + | |
| + | id_len = id_num and id_num:len() or 0; |
| + | if (7 >= id_len) and (0 ~= id_len) then |
| + | id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits |
| + | else |
| + | err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message |
| end | | end |
| | | |
− | return external_link_id({link = handler.link, label = handler.label, | + | return external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
| end | | end |
| | | |
| | | |
− | --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | + | --[[--------------------------< O C L C >---------------------------------------------------------------------- |
| | | |
− | Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
| + | Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html |
− | in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
| |
− | |embargo= was not set in this cite.
| |
| | | |
| ]] | | ]] |
| | | |
− | local function is_embargoed (embargo) | + | local function oclc (id) |
− | if is_set (embargo) then | + | local handler = cfg.id_handlers['OCLC']; |
− | local lang = mw.getContentLanguage();
| + | local number; |
− | local good1, embargo_date, good2, todays_date;
| + | local err_msg = ''; -- empty string for concatenation |
− | good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
| |
− | good2, todays_date = pcall( lang.formatDate, lang, 'U' );
| |
| | | |
− | if good1 and good2 then -- if embargo date and today's date are good dates
| + | if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters) |
− | if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
| + | number = id:match('ocm(%d+)'); -- get the number |
− | return embargo; -- still embargoed
| + | elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters) |
− | else
| + | number = id:match('ocn(%d+)'); -- get the number |
− | add_maint_cat ('embargo')
| + | elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters) |
− | return ''; -- unset because embargo has expired
| + | number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number |
− | end | + | elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field |
| + | number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number |
| + | if 9 < number:len() then |
| + | number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers |
| + | end |
| + | elseif id:match('^%d+$') then -- no prefix |
| + | number = id; -- get the number |
| + | if 10 < number:len() then |
| + | number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers |
| end | | end |
| end | | end |
− | return ''; -- |embargo= not set return empty string | + | |
| + | if number then -- proper format |
| + | id = number; -- exclude prefix, if any, from external link |
| + | else |
| + | err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed |
| + | end |
| + | |
| + | local text = external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; |
| + | |
| + | return text; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< O P E N L I B R A R Y >-------------------------------------------------------- |
| + | |
| + | Formats an OpenLibrary link, and checks for associated errors. |
| + | |
| + | ]] |
| + | |
| + | local function openlibrary(id, access) |
| + | local code; |
| + | local handler = cfg.id_handlers['OL']; |
| + | local ident; |
| + | |
| + | ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix |
| + | |
| + | if not is_set (ident) then -- if malformed return an error |
| + | return external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix .. 'OL', |
| + | id=id, separator=handler.separator, encode = handler.encode, |
| + | access = access}) .. ' ' .. set_error( 'bad_ol' ); |
| + | end |
| + | |
| + | id = ident; -- use ident without the optional OL prefix (it has been removed) |
| + | |
| + | if ( code == "A" ) then |
| + | return external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix .. 'authors/OL', |
| + | id=id, separator=handler.separator, encode = handler.encode, |
| + | access = access}) |
| + | end |
| + | |
| + | if ( code == "M" ) then |
| + | return external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix .. 'books/OL', |
| + | id=id, separator=handler.separator, encode = handler.encode, |
| + | access = access}) |
| + | end |
| + | |
| + | if ( code == "W" ) then |
| + | return external_link_id({link=handler.link, label=handler.label, q = handler.q, |
| + | prefix=handler.prefix .. 'works/OL', |
| + | id=id, separator=handler.separator, encode = handler.encode, |
| + | access = access}) |
| + | end |
| end | | end |
| | | |
Line 499: |
Line 835: |
| | | |
| local function pmc(id, embargo) | | local function pmc(id, embargo) |
− | local test_limit = 5000000; -- update this value as PMCs approach | + | local test_limit = 6500000; -- update this value as PMCs approach |
| local handler = cfg.id_handlers['PMC']; | | local handler = cfg.id_handlers['PMC']; |
− | local err_cat = ''; -- presume that PMC is valid | + | local err_cat = ''; -- presume that PMC is valid |
| + | local id_num; |
| + | local text; |
| | | |
− | local text; | + | id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix |
| + | |
| + | if is_set (id_num) then |
| + | add_maint_cat ('pmc_format'); |
| + | else -- plain number without pmc prefix |
| + | id_num = id:match ('^%d+$'); -- if here id is all digits |
| + | end |
| | | |
− | if id:match("[^%d]") then -- if PMC has anything but digits | + | if is_set (id_num) then -- id_num has a value so test it |
− | err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
| + | id_num = tonumber(id_num); -- convert id_num to a number for range testing |
− | else -- PMC is only digits
| + | if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries |
− | local id_num = tonumber(id); -- convert id to a number for range testing | + | err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message |
− | if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries | + | else |
− | err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | + | id = tostring (id_num); -- make sure id is a string |
| end | | end |
| + | else -- when id format incorrect |
| + | err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message |
| end | | end |
| | | |
| if is_set (embargo) then -- is PMC is still embargoed? | | if is_set (embargo) then -- is PMC is still embargoed? |
− | text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; -- still embargoed so no external link | + | text = table.concat ( -- still embargoed so no external link |
| + | { |
| + | make_wikilink (handler.link, handler.label), |
| + | handler.separator, |
| + | id, |
| + | err_cat |
| + | }); |
| else | | else |
− | text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article | + | text = external_link_id({link = handler.link, label = handler.label, q = handler.q, -- no embargo date or embargo has expired, ok to link to article |
− | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, free=handler.free}) .. err_cat; | + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
| end | | end |
| return text; | | return text; |
Line 524: |
Line 876: |
| | | |
| | | |
− | --[[--------------------------< D O I >------------------------------------------------------------------------ | + | --[[--------------------------< P M I D >---------------------------------------------------------------------- |
| | | |
− | Formats a DOI and checks for DOI errors.
| + | Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This |
− | | + | code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable |
− | DOI names contain two parts: prefix and suffix separated by a forward slash.
| + | test_limit will need to be updated periodically as more PMIDs are issued. |
− | Prefix: directory indicator '10.' followed by a registrant code
| |
− | Suffix: character string of any length chosen by the registrant
| |
− | | |
− | This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends
| |
− | with a period or a comma, this function will emit a bad_doi error message.
| |
− | | |
− | DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
| |
− | and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
| |
− | if ever used in doi names.
| |
| | | |
| ]] | | ]] |
| | | |
− | local function doi(id, inactive) | + | local function pmid(id) |
− | local cat = "" | + | local test_limit = 32000000; -- update this value as PMIDs approach |
− | local handler = cfg.id_handlers['DOI']; | + | local handler = cfg.id_handlers['PMID']; |
| + | local err_cat = ''; -- presume that PMID is valid |
| | | |
− | local text;
| + | if id:match("[^%d]") then -- if PMID has anything but digits |
− | if is_set(inactive) then | + | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message |
− | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
| + | else -- PMID is only digits |
− | text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id; | + | local id_num = tonumber(id); -- convert id to a number for range testing |
− | if is_set(inactive_year) then | + | if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries |
− | table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); | + | err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message |
− | else
| |
− | table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
| |
| end | | end |
− | inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
| |
− | else
| |
− | text = external_link_id({link = handler.link, label = handler.label,
| |
− | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
| |
− | inactive = ""
| |
| end | | end |
− | | + | |
− | if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | + | return external_link_id({link = handler.link, label = handler.label, q = handler.q, |
− | cat = ' ' .. set_error( 'bad_doi' );
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
− | end
| |
− | return text .. inactive .. cat
| |
| end | | end |
| | | |
| | | |
− | --[[--------------------------< H D L >------------------------------------------------------------------------ | + | --[[--------------------------< S S R N >---------------------------------------------------------------------- |
| | | |
− | Formats an HDL with minor error checking.
| + | Format an ssrn, do simple error checking |
| | | |
− | HDL names contain two parts: prefix and suffix separated by a forward slash.
| + | SSRNs are sequential numbers beginning at 100? and counting up. This code checks the ssrn to see that it is |
− | Prefix: character string using any character in the UCS-2 character set except '/'
| + | only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need |
− | Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant
| + | to be updated periodically as more SSRNs are issued. |
− | | |
− | This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends | |
− | with a period or a comma, this function will emit a bad_hdl error message.
| |
− | | |
− | HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and
| |
− | terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
| |
− | if ever used in HDLs.
| |
| | | |
| ]] | | ]] |
| | | |
− | local function hdl(id) | + | local function ssrn (id) |
− | local handler = cfg.id_handlers['HDL']; | + | local test_limit = 3500000; -- update this value as SSRNs approach |
| + | local handler = cfg.id_handlers['SSRN']; |
| + | local err_cat = ''; -- presume that SSRN is valid |
| + | local id_num; |
| + | local text; |
| | | |
− | local text = external_link_id({link = handler.link, label = handler.label, | + | id_num = id:match ('^%d+$'); -- id must be all digits |
− | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
| |
| | | |
− | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | + | if is_set (id_num) then -- id_num has a value so test it |
− | text = text .. ' ' .. set_error( 'bad_hdl' ); | + | id_num = tonumber(id_num); -- convert id_num to a number for range testing |
| + | if 100 > id_num or test_limit < id_num then -- if SSRN is outside test limit boundaries |
| + | err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message |
| + | end |
| + | else -- when id format incorrect |
| + | err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message |
| end | | end |
| + | |
| + | text = external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
| + | |
| return text; | | return text; |
| end | | end |
| | | |
| | | |
− | --[[--------------------------< O P E N L I B R A R Y >-------------------------------------------------------- | + | --[[--------------------------< U S E N E T _ I D >------------------------------------------------------------ |
− | | |
− | Formats an OpenLibrary link, and checks for associated errors.
| |
− | | |
− | ]]
| |
− | | |
− | local function openlibrary(id)
| |
− | local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
| |
− | local handler = cfg.id_handlers['OL'];
| |
− | | |
− | if ( code == "A" ) then
| |
− | return external_link_id({link=handler.link, label=handler.label,
| |
− | prefix=handler.prefix .. 'authors/OL',
| |
− | id=id, separator=handler.separator, encode = handler.encode})
| |
− | elseif ( code == "M" ) then
| |
− | return external_link_id({link=handler.link, label=handler.label,
| |
− | prefix=handler.prefix .. 'books/OL',
| |
− | id=id, separator=handler.separator, encode = handler.encode})
| |
− | elseif ( code == "W" ) then
| |
− | return external_link_id({link=handler.link, label=handler.label,
| |
− | prefix=handler.prefix .. 'works/OL',
| |
− | id=id, separator=handler.separator, encode = handler.encode})
| |
− | else
| |
− | return external_link_id({link=handler.link, label=handler.label,
| |
− | prefix=handler.prefix .. 'OL',
| |
− | id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
| |
− | end
| |
− | end
| |
− | | |
− | | |
− | --[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
| |
| | | |
| Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in | | Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in |
Line 635: |
Line 945: |
| ]] | | ]] |
| | | |
− | local function message_id (id) | + | local function usenet_id (id) |
| local handler = cfg.id_handlers['USENETID']; | | local handler = cfg.id_handlers['USENETID']; |
| | | |
− | local text = external_link_id({link = handler.link, label = handler.label, | + | local text = external_link_id({link = handler.link, label = handler.label, q = handler.q, |
| prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
| | | |
| if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' | | if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' |
− | text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid | + | text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid |
| end | | end |
| | | |
Line 649: |
Line 959: |
| | | |
| | | |
− | --[[--------------------------< O C L C >---------------------------------------------------------------------- | + | --[[--------------------------< Z B L >----------------------------------------------------------------------- |
| | | |
− | Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html
| + | A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional |
| | | |
| ]] | | ]] |
| | | |
− | local function oclc (id) | + | local function zbl (id) |
− | local handler = cfg.id_handlers['OCLC']; | + | local handler = cfg.id_handlers['ZBL']; |
− | local number; | + | local id_num; |
− | local err_msg = ''; -- empty string for concatenation | + | local err_cat = ''; |
| | | |
− | if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters) | + | id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier |
− | number = id:match('ocm(%d+)'); -- get the number
| + | |
− | elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters) | + | if is_set (id_num) then |
− | number = id:match('ocn(%d+)'); -- get the number | + | add_maint_cat ('zbl_format'); |
− | elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters) | + | else -- plain number without zbl prefix |
− | number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number
| + | id_num = id; -- if here id does not have prefix |
− | elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field
| |
− | number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number | |
− | if 9 < number:len() then
| |
− | number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers
| |
− | end
| |
− | elseif id:match('^%d+$') then -- no prefix
| |
− | number = id; -- get the number
| |
− | if 10 < number:len() then
| |
− | number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers
| |
− | end
| |
| end | | end |
| | | |
− | if number then -- proper format | + | if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then |
− | id = number; -- exclude prefix, if any, from external link | + | id = id_num; -- id matches pattern |
| else | | else |
− | err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed | + | err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message |
| end | | end |
| | | |
− | local text = external_link_id({link=handler.link, label=handler.label, | + | return external_link_id({link = handler.link, label = handler.label, q = handler.q, |
− | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
| + | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
− | | |
− | return text;
| |
| end | | end |
| | | |
| | | |
− | --[[--------------------------< B I B C O D E >-------------------------------------------------------------------- | + | --============================<< I N T E R F A C E F U N C T I O N S >>========================================== |
− | | |
− | Validates (sort of) and formats a bibcode id.
| |
− | | |
− | Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
| |
− | | |
− | But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters
| |
− | and first four digits must be a year. This function makes these tests:
| |
− | length must be 19 characters
| |
− | characters in position
| |
− | 1–4 must be digits and must represent a year in the range of 1000 – next year
| |
− | 5 must be a letter
| |
− | 6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
| |
− | 7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
| |
− | 9–18 must be letter, digit, or dot
| |
− | 19 must be a letter or dot
| |
− | | |
− | ]]
| |
− | | |
− | local function bibcode (id)
| |
− | local handler = cfg.id_handlers['BIBCODE'];
| |
− | local err_type;
| |
− | local year;
| |
− | | |
− | local text = external_link_id({link=handler.link, label=handler.label,
| |
− | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode});
| |
− |
| |
− | if 19 ~= id:len() then
| |
− | err_type = 'length';
| |
− | else
| |
− | year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --
| |
− | if not year then -- if nil then no pattern match
| |
− | err_type = 'value'; -- so value error
| |
− | else
| |
− | local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year
| |
− | year = tonumber (year); -- convert year portion of bibcode to a number
| |
− | if (1000 > year) or (year > next_year) then
| |
− | err_type = 'year'; -- year out of bounds
| |
− | end
| |
− | if id:find('&%.') then
| |
− | err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter)
| |
− | end
| |
− | end
| |
− | end
| |
− | | |
− | if is_set (err_type) then -- if there was an error detected
| |
− | text = text .. ' ' .. set_error( 'bad_bibcode', {err_type});
| |
− | end
| |
− | return text;
| |
− | end
| |
− | | |
| | | |
| --[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- | | --[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- |
Line 760: |
Line 1,008: |
| for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | | for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table |
| -- fallback to read-only cfg | | -- fallback to read-only cfg |
− | handler = setmetatable( { ['id'] = v }, fallback(k) ); | + | handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); |
| | | |
| if handler.mode == 'external' then | | if handler.mode == 'external' then |
Line 768: |
Line 1,016: |
| elseif handler.mode ~= 'manual' then | | elseif handler.mode ~= 'manual' then |
| error( cfg.messages['unknown_ID_mode'] ); | | error( cfg.messages['unknown_ID_mode'] ); |
− | elseif k == 'BIBCODE' then
| |
− | table.insert( new_list, {handler.label, bibcode( v ) } );
| |
− | elseif k == 'DOI' then
| |
− | table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } );
| |
− | elseif k == 'HDL' then
| |
− | table.insert( new_list, {handler.label, hdl( v ) } );
| |
| elseif k == 'ARXIV' then | | elseif k == 'ARXIV' then |
| table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); | | table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); |
| elseif k == 'ASIN' then | | elseif k == 'ASIN' then |
− | table.insert( new_list, {handler.label, amazon( v, options.ASINTLD ) } ); | + | table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } ); |
− | elseif k == 'LCCN' then | + | elseif k == 'BIBCODE' then |
− | table.insert( new_list, {handler.label, lccn( v ) } ); | + | table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); |
− | elseif k == 'OL' or k == 'OLA' then
| + | elseif k == 'BIORXIV' then |
− | table.insert( new_list, {handler.label, openlibrary( v ) } );
| + | table.insert( new_list, {handler.label, biorxiv( v ) } ); |
− | elseif k == 'PMC' then | + | elseif k == 'CITESEERX' then |
− | table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); | + | table.insert( new_list, {handler.label, citeseerx( v ) } ); |
− | elseif k == 'PMID' then | + | elseif k == 'DOI' then |
− | table.insert( new_list, {handler.label, pmid( v ) } ); | + | table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); |
− | elseif k == 'OCLC' then | |
− | table.insert( new_list, {handler.label, oclc( v ) } ); | |
− | elseif k == 'ISMN' then
| |
− | table.insert( new_list, {handler.label, ismn( v ) } );
| |
− | elseif k == 'ISSN' then
| |
− | table.insert( new_list, {handler.label, issn( v ) } );
| |
| elseif k == 'EISSN' then | | elseif k == 'EISSN' then |
| table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn | | table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn |
| + | elseif k == 'HDL' then |
| + | table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); |
| elseif k == 'ISBN' then | | elseif k == 'ISBN' then |
| local ISBN = internal_link_id( handler ); | | local ISBN = internal_link_id( handler ); |
| local check; | | local check; |
| local err_type = ''; | | local err_type = ''; |
− | -- if not check_isbn( v ) and not is_set(options.IgnoreISBN) then
| + | check, err_type = isbn( v ); |
− | -- ISBN = ISBN .. set_error( 'bad_isbn', {}, false, " ", "" );
| |
− | -- end
| |
− | check, err_type = check_isbn( v ); | |
| if not check then | | if not check then |
| if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set | | if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set |
Line 809: |
Line 1,044: |
| end | | end |
| elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set | | elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set |
− | add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary
| + | add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary |
| end | | end |
| table.insert( new_list, {handler.label, ISBN } ); | | table.insert( new_list, {handler.label, ISBN } ); |
| + | elseif k == 'ISMN' then |
| + | table.insert( new_list, {handler.label, ismn( v ) } ); |
| + | elseif k == 'ISSN' then |
| + | table.insert( new_list, {handler.label, issn( v ) } ); |
| + | elseif k == 'JFM' then |
| + | table.insert( new_list, {handler.label, jfm( v ) } ); |
| + | elseif k == 'LCCN' then |
| + | table.insert( new_list, {handler.label, lccn( v ) } ); |
| + | elseif k == 'MR' then |
| + | table.insert( new_list, {handler.label, mr( v ) } ); |
| + | elseif k == 'OCLC' then |
| + | table.insert( new_list, {handler.label, oclc( v ) } ); |
| + | elseif k == 'OL' or k == 'OLA' then |
| + | table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } ); |
| + | elseif k == 'PMC' then |
| + | table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); |
| + | elseif k == 'PMID' then |
| + | table.insert( new_list, {handler.label, pmid( v ) } ); |
| + | elseif k == 'SSRN' then |
| + | table.insert( new_list, {handler.label, ssrn( v ) } ); |
| elseif k == 'USENETID' then | | elseif k == 'USENETID' then |
− | table.insert( new_list, {handler.label, message_id( v ) } ); | + | table.insert( new_list, {handler.label, usenet_id( v ) } ); |
| + | elseif k == 'ZBL' then |
| + | table.insert( new_list, {handler.label, zbl( v ) } ); |
| else | | else |
| error( cfg.messages['unknown_manual_ID'] ); | | error( cfg.messages['unknown_manual_ID'] ); |
Line 820: |
Line 1,077: |
| | | |
| local function comp( a, b ) -- used in following table.sort() | | local function comp( a, b ) -- used in following table.sort() |
− | return a[1] < b[1]; | + | return a[1]:lower() < b[1]:lower(); |
| end | | end |
| | | |
Line 847: |
Line 1,104: |
| end | | end |
| return id_list; | | return id_list; |
| + | end |
| + | |
| + | |
| + | --[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >-------------------------------------- |
| + | |
| + | Fetches custom id access levels from arguments using configuration settings. |
| + | Parameters which have a predefined access level (e.g. arxiv) do not use this |
| + | function as they are directly rendered as free without using an additional parameter. |
| + | |
| + | ]] |
| + | |
| + | local function extract_id_access_levels( args, id_list ) |
| + | local id_accesses_list = {}; |
| + | for k, v in pairs( cfg.id_handlers ) do |
| + | local access_param = v.custom_access; |
| + | local k_lower = string.lower(k); |
| + | if is_set(access_param) then |
| + | local access_level = args[access_param]; |
| + | if is_set(access_level) then |
| + | if not in_array (access_level:lower(), cfg.keywords['id-access']) then |
| + | table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); |
| + | access_level = nil; |
| + | end |
| + | if not is_set(id_list[k]) then |
| + | table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } ); |
| + | end |
| + | if is_set(access_level) then |
| + | access_level = access_level:lower(); |
| + | end |
| + | id_accesses_list[k] = access_level; |
| + | end |
| + | end |
| + | end |
| + | return id_accesses_list; |
| end | | end |
| | | |
Line 865: |
Line 1,156: |
| add_maint_cat = utilities_page_ptr.add_maint_cat; | | add_maint_cat = utilities_page_ptr.add_maint_cat; |
| substitute = utilities_page_ptr.substitute; | | substitute = utilities_page_ptr.substitute; |
| + | make_wikilink = utilities_page_ptr.make_wikilink; |
| | | |
| z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities | | z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities |
| end | | end |
− |
| |
| | | |
| | | |
Line 874: |
Line 1,165: |
| build_id_list = build_id_list, | | build_id_list = build_id_list, |
| extract_ids = extract_ids, | | extract_ids = extract_ids, |
| + | extract_id_access_levels = extract_id_access_levels, |
| is_embargoed = is_embargoed; | | is_embargoed = is_embargoed; |
| set_selected_modules = set_selected_modules; | | set_selected_modules = set_selected_modules; |
| } | | } |