Changes

Jump to navigation Jump to search
synch from sandbox;
Line 6: Line 6:  
]]
 
]]
   −
local is_set, in_array, set_error, select_one, add_maint_cat, substitute; -- functions in Module:Citation/CS1/Utilities
+
local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities
    
local z; -- table of tables defined in Module:Citation/CS1/Utilities
 
local z; -- table of tables defined in Module:Citation/CS1/Utilities
Line 12: Line 12:  
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
 
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
    +
local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or '';
 +
 +
 +
--============================<< H E L P E R  F U N C T I O N S >>============================================
    
--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
 
--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
Line 22: Line 26:  
local url_string = options.id;
 
local url_string = options.id;
 
local ext_link;
 
local ext_link;
 +
local this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org
 +
local wd_article; -- article title from wikidata
 
 
 
if options.encode == true or options.encode == nil then
 
if options.encode == true or options.encode == nil then
 
url_string = mw.uri.encode( url_string );
 
url_string = mw.uri.encode( url_string );
 
end
 
end
+
 
 
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id));
 
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id));
 
if is_set(options.access) then
 
if is_set(options.access) then
ext_link = substitute (cfg.presentation['access-signal'], {ext_link, cfg.presentation[options.access]}); -- add the free-to-read / paywall lock
+
ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock
 
end
 
end
 +
 +
this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain)
 
 
return mw.ustring.format( '[[%s|%s]]%s%s', options.link, options.label, options.separator or "&nbsp;", ext_link);
+
if string.match (mw.site.server, 'wikidata') then
 +
this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on wikidata so use interface language setting instead
 +
end
 +
 +
if is_set (options.q) then
 +
wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd
 +
if wd_article then
 +
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- make interwiki link if taken from wd; leading colon required
 +
end
 +
end
 +
 
 +
return table.concat ({
 +
make_wikilink (wd_article or options.link, options.label), -- wikidata link or locally specified
 +
options.separator or '&nbsp;',
 +
ext_link
 +
});
 
end
 
end
   Line 43: Line 66:     
local function internal_link_id(options)
 
local function internal_link_id(options)
return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',
+
 
options.link, options.label, options.separator or "&nbsp;",
+
return table.concat (
options.prefix, options.id, options.suffix or "",
+
{
mw.text.nowiki(options.id)
+
make_wikilink (options.link, options.label),
);
+
options.separator or '&nbsp;',
 +
make_wikilink (
 +
table.concat (
 +
{
 +
options.prefix,
 +
options.id,
 +
options.suffix or ''
 +
}),
 +
mw.text.nowiki (options.id)
 +
);
 +
});
 +
end
 +
 
 +
 
 +
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
 +
 
 +
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date.  If embargo date is
 +
in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
 +
|embargo= was not set in this cite.
 +
 
 +
]]
 +
 
 +
local function is_embargoed (embargo)
 +
if is_set (embargo) then
 +
local lang = mw.getContentLanguage();
 +
local good1, embargo_date, good2, todays_date;
 +
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
 +
good2, todays_date = pcall( lang.formatDate, lang, 'U' );
 +
 +
if good1 and good2 then -- if embargo date and today's date are good dates
 +
if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
 +
return embargo; -- still embargoed
 +
else
 +
add_maint_cat ('embargo')
 +
return ''; -- unset because embargo has expired
 +
end
 +
end
 +
end
 +
return ''; -- |embargo= not set return empty string
 
end
 
end
   Line 54: Line 115:     
ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.
 
ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.
ISBN-13 is checked in check_isbn().
+
ISBN-13 is checked in isbn().
    
If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length
 
If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length
Line 63: Line 124:  
local function is_valid_isxn (isxn_str, len)
 
local function is_valid_isxn (isxn_str, len)
 
local temp = 0;
 
local temp = 0;
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
+
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
 
len = len+1; -- adjust to be a loop counter
 
len = len+1; -- adjust to be a loop counter
 
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
 
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
Line 76: Line 137:       −
--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------
+
--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------
    
ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
 
ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
Line 87: Line 148:  
local temp=0;
 
local temp=0;
 
 
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
+
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
 
for i, v in ipairs( isxn_str ) do
 
for i, v in ipairs( isxn_str ) do
 
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
 
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
Line 95: Line 156:       −
--[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------
+
--[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
   −
Determines whether an ISBN string is valid
+
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
 +
1. Remove all blanks.
 +
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
 +
3. If there is a hyphen in the string:
 +
a. Remove it.
 +
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
 +
1. All these characters should be digits, and there should be six or less. (not done in this function)
 +
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
    +
Returns a normalized lccn for lccn() to validate.  There is no error checking (step 3.b.1) performed in this function.
 
]]
 
]]
   −
local function check_isbn( isbn_str )
+
local function normalize_lccn (lccn)
if nil ~= isbn_str:match("[^%s-0-9X]") then
+
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
  −
end
  −
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
  −
local len = isbn_str:len();
  −
  −
if len ~= 10 and len ~= 13 then
  −
return false, 'length'; -- fail if incorrect length
  −
end
     −
if len == 10 then
+
if nil ~= string.find (lccn,'/') then
if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position
+
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
return false, 'invalid form';
  −
end
  −
return is_valid_isxn(isbn_str, 10), 'checksum';
  −
else
  −
if isbn_str:match( "^%d+$" ) == nil then
  −
return false, 'invalid character'; -- fail if isbn13 is not all digits
  −
end
  −
if isbn_str:match( "^97[89]%d*$" ) == nil then
  −
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
  −
end
  −
return is_valid_isxn_13 (isbn_str), 'checksum';
   
end
 
end
end
      +
local prefix
 +
local suffix
 +
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
   −
--[[--------------------------< I S M N >----------------------------------------------------------------------
+
if nil ~= suffix then -- if there was a hyphen
 
+
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
Determines whether an ISMN string is valid.  Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
+
lccn=prefix..suffix; -- reassemble the lccn
same check digit calculations.  See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
  −
section 2, pages 9–12.
  −
 
  −
]]
  −
 
  −
local function ismn (id)
  −
local handler = cfg.id_handlers['ISMN'];
  −
local text;
  −
local valid_ismn = true;
  −
local id_copy;
  −
 
  −
id_copy = id; -- save a copy because this testing is destructive
  −
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
  −
 
  −
if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
  −
valid_ismn = false;
  −
else
  −
valid_ismn=is_valid_isxn_13 (id); -- validate ismn
   
end
 
end
  −
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
  −
-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  −
  −
text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id_copy; -- because no place to link to yet
  −
  −
if false == valid_ismn then
  −
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid
  −
end
   
 
return text;
+
return lccn;
end
  −
 
  −
 
  −
--[[--------------------------< I S S N >----------------------------------------------------------------------
  −
 
  −
Validate and format an issn.  This code fixes the case where an editor has included an ISSN in the citation but
  −
has separated the two groups of four digits with a space.  When that condition occurred, the resulting link looked
  −
like this:
  −
 
  −
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327]  -- can't have spaces in an external link
  −
  −
This code now prevents that by inserting a hyphen at the issn midpoint.  It also validates the issn for length
  −
and makes sure that the checkdigit agrees with the calculated value.  Incorrect length (8 digits), characters
  −
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message.  The
  −
issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
  −
 
  −
]]
  −
 
  −
local function issn(id, e)
  −
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
  −
local handler;
  −
local text;
  −
local valid_issn = true;
  −
  −
if e then
  −
handler = cfg.id_handlers['EISSN'];
  −
else
  −
handler = cfg.id_handlers['ISSN'];
  −
end
  −
 
  −
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
  −
 
  −
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
  −
valid_issn=false; -- wrong length or improper character
  −
else
  −
valid_issn=is_valid_isxn(id, 8); -- validate issn
  −
end
  −
 
  −
if true == valid_issn then
  −
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
  −
else
  −
id = issn_copy; -- if not valid, use the show the invalid issn with error message
  −
end
  −
  −
text = external_link_id({link = handler.link, label = handler.label,
  −
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  −
  −
if false == valid_issn then
  −
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid
  −
end
  −
  −
return text
  −
end
  −
 
  −
 
  −
--[[--------------------------< A M A Z O N >------------------------------------------------------------------
  −
 
  −
Formats a link to Amazon.  Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
  −
characters.  If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
  −
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
  −
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
  −
 
  −
]]
  −
 
  −
local function amazon(id, domain)
  −
local err_cat = ""
  −
 
  −
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
  −
err_cat =  ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
  −
else
  −
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
  −
if check_isbn( id ) then -- see if asin value is isbn10
  −
add_maint_cat ('ASIN');
  −
elseif not is_set (err_cat) then
  −
err_cat =  ' ' .. set_error ('bad_asin'); -- asin is not isbn10
  −
end
  −
elseif not id:match("^%u[%d%u]+$") then
  −
err_cat =  ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
  −
end
   
end
 
end
if not is_set(domain) then
  −
domain = "com";
  −
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
  −
domain = "co." .. domain;
  −
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
  −
domain = "com." .. domain;
  −
end
  −
local handler = cfg.id_handlers['ASIN'];
  −
return external_link_id({link=handler.link,
  −
label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
  −
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
  −
end
      +
--============================<< I D E N T I F I E R  F U N C T I O N S >>====================================
    
--[[--------------------------< A R X I V >--------------------------------------------------------------------
 
--[[--------------------------< A R X I V >--------------------------------------------------------------------
Line 265: Line 200:  
where:
 
where:
 
<archive> is a string of alpha characters - may be hyphenated; no other punctuation
 
<archive> is a string of alpha characters - may be hyphenated; no other punctuation
<class> is a string of alpha characters - may be hyphenated; no other punctuation
+
<class> is a string of alpha characters - may be hyphenated; no other punctuation; not the same as |class= parameter which is not supported in this form
 
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
 
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
 
first digit of YY for this form can only 9 and 0
 
first digit of YY for this form can only 9 and 0
Line 288: Line 223:  
local handler = cfg.id_handlers['ARXIV'];
 
local handler = cfg.id_handlers['ARXIV'];
 
local year, month, version;
 
local year, month, version;
local err_cat = '';
+
local err_cat = false; -- assume no error message
local text;
+
local text; -- output text
 
 
 
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
 
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
Line 297: Line 232:  
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
 
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
 
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
 
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
+
err_cat = true; -- flag for error message
 
end
 
end
 +
 
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
 
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
 
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
 
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
Line 305: Line 241:  
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
 
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
 
((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
 
((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
+
err_cat = true; -- flag for error message
 
end
 
end
 +
 
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
 
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
 
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
 
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
Line 312: Line 249:  
month = tonumber(month);
 
month = tonumber(month);
 
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
 
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
+
err_cat = true; -- flag for error message
 
end
 
end
 +
 
else
 
else
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format
+
err_cat = true; -- not a recognized format; flag for error message
 
end
 
end
   −
text = external_link_id({link = handler.link, label = handler.label,
+
err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true
 +
 +
text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
    
if is_set (class) then
 
if is_set (class) then
class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink
+
if id:match ('^%d+') then
else
+
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
class = ''; -- empty string for concatenation
+
else
 +
text = table.concat ({text, ' ', set_error ('class_ignored')});
 +
end
 
end
 
end
+
 
return text .. class;
+
return text;
 
end
 
end
      −
--[[--------------------------< B I O R X I V >-----------------------------------------------------------------
+
--[[--------------------------< B I B C O D E >--------------------------------------------------------------------
   −
Format bioRxiv id and do simple error checking.  BiorXiv ids are exactly 6 digits.
+
Validates (sort of) and formats a bibcode id.
The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI:
  −
https://doi.org/10.1101/078733 -> 078733
     −
]]
+
Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
   −
local function biorxiv(id)
+
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters
local handler = cfg.id_handlers['BIORXIV'];
+
and first four digits must be a year. This function makes these tests:
local err_cat =  ''; -- presume that bioRxiv id is valid
+
length must be 19 characters
+
characters in position
if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits
+
1–4 must be digits and must represent a year in the range of 1000 – next year
err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message
+
5 must be a letter
    end
+
6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
+
7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
return external_link_id({link = handler.link, label = handler.label,
+
9–18 must be letter, digit, or dot
prefix=handler.prefix,id=id,separator=handler.separator,
+
19 must be a letter or dot
encode=handler.encode, access=handler.access}) .. err_cat;
  −
end
  −
 
  −
 
  −
--[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
  −
 
  −
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
  −
1. Remove all blanks.
  −
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
  −
3. If there is a hyphen in the string:
  −
a. Remove it.
  −
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
  −
1. All these characters should be digits, and there should be six or less. (not done in this function)
  −
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
     −
Returns a normalized lccn for lccn() to validate.  There is no error checking (step 3.b.1) performed in this function.
   
]]
 
]]
   −
local function normalize_lccn (lccn)
+
local function bibcode (id, access)
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
+
local handler = cfg.id_handlers['BIBCODE'];
 +
local err_type;
 +
local year;
   −
if nil ~= string.find (lccn,'/') then
+
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
+
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
end
+
access=access});
 
  −
local prefix
  −
local suffix
  −
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
  −
 
  −
if nil ~= suffix then -- if there was a hyphen
  −
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
  −
lccn=prefix..suffix; -- reassemble the lccn
  −
end
   
 
return lccn;
+
if 19 ~= id:len() then
end
+
err_type = 'length';
 
+
else
 
+
year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --
--[[--------------------------< L C C N >----------------------------------------------------------------------
+
if not year then -- if nil then no pattern match
 
+
err_type = 'value'; -- so value error
Format LCCN link and do simple error checking.  LCCN is a character string 8-12 characters long. The length of
+
else
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits.
+
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year
http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
+
year = tonumber (year); -- convert year portion of bibcode to a number
 
+
if (1000 > year) or (year > next_year) then
length = 8 then all digits
+
err_type = 'year'; -- year out of bounds
length = 9 then lccn[1] is lower case alpha
+
end
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
+
if id:find('&%.') then
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
+
err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter)
length = 12 then lccn[1] and lccn[2] are both lower case alpha
  −
 
  −
]]
  −
 
  −
local function lccn(lccn)
  −
local handler = cfg.id_handlers['LCCN'];
  −
local err_cat =  ''; -- presume that LCCN is valid
  −
local id = lccn; -- local copy of the lccn
  −
 
  −
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
  −
local len = id:len(); -- get the length of the lccn
  −
 
  −
if 8 == len then
  −
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
  −
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
  −
end
  −
elseif 9 == len then -- LCCN should be adddddddd
  −
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
  −
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
  −
end
  −
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
  −
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
  −
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
  −
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
   
end
 
end
 
end
 
end
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
  −
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
  −
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  −
end
  −
elseif 12 == len then -- LCCN should be aadddddddddd
  −
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
  −
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  −
end
  −
else
  −
err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
   
end
 
end
   −
if not is_set (err_cat) and nil ~= lccn:find ('%s') then
+
if is_set (err_type) then -- if there was an error detected
err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
+
text = text .. ' ' .. set_error( 'bad_bibcode', {err_type});
 
end
 
end
 
+
return text;
return external_link_id({link = handler.link, label = handler.label,
  −
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
   
end
 
end
      −
--[[--------------------------< P M I D >----------------------------------------------------------------------
+
--[[--------------------------< B I O R X I V >-----------------------------------------------------------------
   −
Format PMID and do simple error checking.  PMIDs are sequential numbers beginning at 1 and counting up. This
+
Format bioRxiv id and do simple error checking.  BiorXiv ids are exactly 6 digits.
code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
+
The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI:
test_limit will need to be updated periodically as more PMIDs are issued.
+
https://doi.org/10.1101/078733 -> 078733
    
]]
 
]]
   −
local function pmid(id)
+
local function biorxiv(id)
local test_limit = 30000000; -- update this value as PMIDs approach
+
local handler = cfg.id_handlers['BIORXIV'];
local handler = cfg.id_handlers['PMID'];
+
local err_cat = ''; -- presume that bioRxiv id is valid
local err_cat = ''; -- presume that PMID is valid
   
 
if id:match("[^%d]") then -- if PMID has anything but digits
+
if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
+
err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message
else -- PMID is only digits
  −
local id_num = tonumber(id); -- convert id to a number for range testing
  −
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
  −
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
  −
end
   
end
 
end
 
 
return external_link_id({link = handler.link, label = handler.label,
+
return external_link_id({link = handler.link, label = handler.label, q = handler.q,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
+
prefix=handler.prefix,id=id,separator=handler.separator,
 +
encode=handler.encode, access=handler.access}) .. err_cat;
 
end
 
end
      −
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
+
--[[--------------------------< C I T E S E E R X >------------------------------------------------------------
   −
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
+
CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org).
in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
  −
|embargo= was not set in this cite.
      +
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure
 
]]
 
]]
   −
local function is_embargoed (embargo)
+
local function citeseerx (id)
if is_set (embargo) then
+
local handler = cfg.id_handlers['CITESEERX'];
local lang = mw.getContentLanguage();
+
local matched;
local good1, embargo_date, good2, todays_date;
  −
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
  −
good2, todays_date = pcall( lang.formatDate, lang, 'U' );
   
 
if good1 and good2 then -- if embargo date and today's date are good dates
+
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,
if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
+
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
return embargo; -- still embargoed
+
access=handler.access});
else
  −
add_maint_cat ('embargo')
  −
return ''; -- unset because embargo has expired
  −
end
  −
end
  −
end
  −
return ''; -- |embargo= not set return empty string
  −
end
  −
 
  −
 
  −
--[[--------------------------< P M C >------------------------------------------------------------------------
  −
 
  −
Format a PMC, do simple error checking, and check for embargoed articles.
  −
 
  −
The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
  −
be linked to the article.  If the embargo date is today or in the past, or if it is empty or omitted, then the
  −
PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
  −
 
  −
PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
  −
has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
  −
returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
  −
 
  −
PMCs are sequential numbers beginning at 1 and counting up.  This code checks the PMC to see that it contains only digits and is less
  −
than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
  −
 
  −
]]
  −
 
  −
local function pmc(id, embargo)
  −
local test_limit = 6000000; -- update this value as PMCs approach
  −
local handler = cfg.id_handlers['PMC'];
  −
local err_cat =  ''; -- presume that PMC is valid
  −
local id_num;
  −
local text;
   
 
id_num = id:match ('^[Pp][Mm][Cc](%d+)$');                                  -- identifier with pmc prefix
+
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
 
+
if not matched then
if is_set (id_num) then
+
text = text .. ' ' .. set_error( 'bad_citeseerx' );
add_maint_cat ('pmc_format');
  −
else         -- plain number without pmc prefix
  −
id_num = id:match ('^%d+$'); -- if here id is all digits
  −
end
  −
 
  −
if is_set (id_num) then -- id_num has a value so test it
  −
id_num = tonumber(id_num); -- convert id_num to a number for range testing
  −
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
  −
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
  −
else
  −
id = tostring (id_num); -- make sure id is a string
  −
end
  −
else -- when id format incorrect
  −
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
  −
end
  −
  −
if is_set (embargo) then -- is PMC is still embargoed?
  −
text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id .. err_cat; -- still embargoed so no external link
  −
else
  −
text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
  −
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
   
end
 
end
 
return text;
 
return text;
Line 575: Line 394:  
local text;
 
local text;
 
if is_set(inactive) then
 
if is_set(inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
+
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;
   
if is_set(inactive_year) then
 
if is_set(inactive_year) then
 
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
 
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
 
else
 
else
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
+
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
 
end
 
end
 
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"  
 
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"  
else
  −
text = external_link_id({link = handler.link, label = handler.label,
  −
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})
  −
inactive = ""
   
end
 
end
 +
text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
 +
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')
   −
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
+
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
 
cat = ' ' .. set_error( 'bad_doi' );
 
cat = ' ' .. set_error( 'bad_doi' );
 
end
 
end
return text .. inactive .. cat  
+
 
 +
return text .. cat  
 
end
 
end
   Line 616: Line 433:  
local handler = cfg.id_handlers['HDL'];
 
local handler = cfg.id_handlers['HDL'];
 
 
local text = external_link_id({link = handler.link, label = handler.label,
+
local text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})
   Line 626: Line 443:       −
--[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
+
--[[--------------------------< I S B N >----------------------------------------------------------------------
 +
 
 +
Determines whether an ISBN string is valid
 +
 
 +
]]
 +
 
 +
local function isbn( isbn_str )
 +
if nil ~= isbn_str:match("[^%s-0-9X]") then
 +
return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
 +
end
 +
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
 +
local len = isbn_str:len();
 +
 +
if len ~= 10 and len ~= 13 then
 +
return false, 'length'; -- fail if incorrect length
 +
end
 +
 
 +
if len == 10 then
 +
if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position
 +
return false, 'invalid form';
 +
end
 +
return is_valid_isxn(isbn_str, 10), 'checksum';
 +
else
 +
if isbn_str:match( "^%d+$" ) == nil then
 +
return false, 'invalid character'; -- fail if isbn13 is not all digits
 +
end
 +
if isbn_str:match( "^97[89]%d*$" ) == nil then
 +
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
 +
end
 +
return is_valid_isxn_13 (isbn_str), 'checksum';
 +
end
 +
end
 +
 
 +
 
 +
--[[--------------------------< A M A Z O N >------------------------------------------------------------------
 +
 
 +
Formats a link to Amazon.  Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
 +
characters.  If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
 +
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
 +
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
 +
 
 +
This function is positioned here because it calls isbn()
 +
 
 +
]]
 +
 
 +
local function asin(id, domain)
 +
local err_cat = ""
 +
 
 +
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
 +
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
 +
else
 +
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
 +
if isbn( id ) then -- see if asin value is isbn10
 +
add_maint_cat ('ASIN');
 +
elseif not is_set (err_cat) then
 +
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
 +
end
 +
elseif not id:match("^%u[%d%u]+$") then
 +
err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
 +
end
 +
end
 +
if not is_set(domain) then
 +
domain = "com";
 +
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
 +
domain = "co." .. domain;
 +
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
 +
domain = "com." .. domain;
 +
end
 +
local handler = cfg.id_handlers['ASIN'];
 +
return external_link_id({link=handler.link,
 +
label=handler.label, q = handler.q, prefix=handler.prefix .. domain .. "/dp/",
 +
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
 +
end
 +
 
 +
 
 +
--[[--------------------------< I S M N >----------------------------------------------------------------------
   −
Formats an OpenLibrary link, and checks for associated errors.
+
Determines whether an ISMN string is valid.  Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
 +
same check digit calculations.  See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
 +
section 2, pages 9–12.
    
]]
 
]]
   −
local function openlibrary(id, access)
+
local function ismn (id)
local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
+
local handler = cfg.id_handlers['ISMN'];
local handler = cfg.id_handlers['OL'];
+
local text;
 +
local valid_ismn = true;
 +
local id_copy;
 +
 
 +
id_copy = id; -- save a copy because this testing is destructive
 +
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
   −
if ( code == "A" ) then
+
if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
return external_link_id({link=handler.link, label=handler.label,
+
valid_ismn = false;
prefix=handler.prefix .. 'authors/OL',
  −
id=id, separator=handler.separator, encode = handler.encode,
  −
access = access})
  −
elseif ( code == "M" ) then
  −
return external_link_id({link=handler.link, label=handler.label,
  −
prefix=handler.prefix .. 'books/OL',
  −
id=id, separator=handler.separator, encode = handler.encode,
  −
access = access})
  −
elseif ( code == "W" ) then
  −
return external_link_id({link=handler.link, label=handler.label,
  −
prefix=handler.prefix .. 'works/OL',
  −
id=id, separator=handler.separator, encode = handler.encode,
  −
access = access})
   
else
 
else
return external_link_id({link=handler.link, label=handler.label,
+
valid_ismn=is_valid_isxn_13 (id); -- validate ismn
prefix=handler.prefix .. 'OL',
  −
id=id, separator=handler.separator, encode = handler.encode,
  −
access = access}) .. ' ' .. set_error( 'bad_ol' );
   
end
 
end
 +
 +
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
 +
-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
 +
 +
text = table.concat (
 +
{
 +
make_wikilink (handler.link, handler.label),
 +
handler.separator,
 +
id_copy
 +
}); -- because no place to link to yet
 +
 +
if false == valid_ismn then
 +
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid
 +
end
 +
 +
return text;
 
end
 
end
      −
--[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
+
--[[--------------------------< I S S N >----------------------------------------------------------------------
   −
Validate and format a usenet message idSimple error checking, looks for 'id-left@id-right' not enclosed in
+
Validate and format an issn.  This code fixes the case where an editor has included an ISSN in the citation but
'<' and/or '>' angle brackets.
+
has separated the two groups of four digits with a spaceWhen that condition occurred, the resulting link looked
 +
like this:
 +
 
 +
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
 +
 +
This code now prevents that by inserting a hyphen at the issn midpoint.  It also validates the issn for length
 +
and makes sure that the checkdigit agrees with the calculated value.  Incorrect length (8 digits), characters
 +
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message.  The
 +
issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
    
]]
 
]]
   −
local function message_id (id)
+
local function issn(id, e)
local handler = cfg.id_handlers['USENETID'];
+
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
 +
local handler;
 +
local text;
 +
local valid_issn = true;
 +
 +
if e then
 +
handler = cfg.id_handlers['EISSN'];
 +
else
 +
handler = cfg.id_handlers['ISSN'];
 +
end
 +
 
 +
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
 +
 
 +
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
 +
valid_issn=false; -- wrong length or improper character
 +
else
 +
valid_issn=is_valid_isxn(id, 8); -- validate issn
 +
end
   −
local text = external_link_id({link = handler.link, label = handler.label,
+
if true == valid_issn then
 +
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
 +
else
 +
id = issn_copy; -- if not valid, use the show the invalid issn with error message
 +
end
 +
 +
text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
 
   
 
   
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
+
if false == valid_issn then
text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid
+
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid
 
end  
 
end  
 
 
 
return text
 
return text
 +
end
 +
 +
 +
--[[--------------------------< J F M >-----------------------------------------------------------------------
 +
 +
A numerical identifier in the form nn.nnnn.nn
 +
 +
]]
 +
 +
local function jfm (id)
 +
local handler = cfg.id_handlers['JFM'];
 +
local id_num;
 +
local err_cat = '';
 +
 +
id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier
 +
 +
if is_set (id_num) then
 +
add_maint_cat ('jfm_format');
 +
else -- plain number without mr prefix
 +
id_num = id; -- if here id does not have prefix
 +
end
 +
 +
if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then
 +
id = id_num; -- jfm matches pattern
 +
else
 +
err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message
 +
end
 +
 +
return external_link_id({link = handler.link, label = handler.label, q = handler.q,
 +
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
 +
end
 +
 +
 +
--[[--------------------------< L C C N >----------------------------------------------------------------------
 +
 +
Format LCCN link and do simple error checking.  LCCN is a character string 8-12 characters long. The length of
 +
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits.
 +
http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
 +
 +
length = 8 then all digits
 +
length = 9 then lccn[1] is lower case alpha
 +
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
 +
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
 +
length = 12 then lccn[1] and lccn[2] are both lower case alpha
 +
 +
]]
 +
 +
local function lccn(lccn)
 +
local handler = cfg.id_handlers['LCCN'];
 +
local err_cat = ''; -- presume that LCCN is valid
 +
local id = lccn; -- local copy of the lccn
 +
 +
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
 +
local len = id:len(); -- get the length of the lccn
 +
 +
if 8 == len then
 +
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
 +
end
 +
elseif 9 == len then -- LCCN should be adddddddd
 +
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
 +
end
 +
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
 +
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
 +
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
 +
end
 +
end
 +
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
 +
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
 +
end
 +
elseif 12 == len then -- LCCN should be aadddddddddd
 +
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
 +
end
 +
else
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
 +
end
 +
 +
if not is_set (err_cat) and nil ~= lccn:find ('%s') then
 +
err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
 +
end
 +
 +
return external_link_id({link = handler.link, label = handler.label, q = handler.q,
 +
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
 +
end
 +
 +
 +
--[[--------------------------< M R >--------------------------------------------------------------------------
 +
 +
A seven digit number; if not seven digits, zero-fill leading digits to make seven digits.
 +
 +
]]
 +
 +
local function mr (id)
 +
local handler = cfg.id_handlers['MR'];
 +
local id_num;
 +
local id_len;
 +
local err_cat = '';
 +
 +
id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix
 +
 +
if is_set (id_num) then
 +
add_maint_cat ('mr_format');
 +
else -- plain number without mr prefix
 +
id_num = id:match ('^%d+$'); -- if here id is all digits
 +
end
 +
 +
id_len = id_num and id_num:len() or 0;
 +
if (7 >= id_len) and (0 ~= id_len) then
 +
id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits
 +
else
 +
err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message
 +
end
 +
 +
return external_link_id({link = handler.link, label = handler.label, q = handler.q,
 +
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
 
end
 
end
   Line 716: Line 765:  
end
 
end
 
 
local text = external_link_id({link=handler.link, label=handler.label,
+
local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,
 
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
 
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
   Line 723: Line 772:       −
--[[--------------------------< B I B C O D E >--------------------------------------------------------------------
+
--[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
 +
 
 +
Formats an OpenLibrary link, and checks for associated errors.
 +
 
 +
]]
 +
 
 +
local function openlibrary(id, access)
 +
local code;
 +
local handler = cfg.id_handlers['OL'];
 +
local ident;
 +
 +
ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix
 +
 
 +
if not is_set (ident) then -- if malformed return an error
 +
return external_link_id({link=handler.link, label=handler.label, q = handler.q,
 +
prefix=handler.prefix .. 'OL',
 +
id=id, separator=handler.separator, encode = handler.encode,
 +
access = access}) .. ' ' .. set_error( 'bad_ol' );
 +
end
 +
 +
id = ident; -- use ident without the optional OL prefix (it has been removed)
 +
 +
if ( code == "A" ) then
 +
return external_link_id({link=handler.link, label=handler.label, q = handler.q,
 +
prefix=handler.prefix .. 'authors/OL',
 +
id=id, separator=handler.separator, encode = handler.encode,
 +
access = access})
 +
end
 +
 +
if ( code == "M" ) then
 +
return external_link_id({link=handler.link, label=handler.label, q = handler.q,
 +
prefix=handler.prefix .. 'books/OL',
 +
id=id, separator=handler.separator, encode = handler.encode,
 +
access = access})
 +
end
 +
 
 +
if ( code == "W" ) then
 +
return external_link_id({link=handler.link, label=handler.label, q = handler.q,
 +
prefix=handler.prefix .. 'works/OL',
 +
id=id, separator=handler.separator, encode = handler.encode,
 +
access = access})
 +
end
 +
end
 +
 
 +
 
 +
--[[--------------------------< P M C >------------------------------------------------------------------------
 +
 
 +
Format a PMC, do simple error checking, and check for embargoed articles.
   −
Validates (sort of) and formats a bibcode id.
+
The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
 +
be linked to the article.  If the embargo date is today or in the past, or if it is empty or omitted, then the
 +
PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
   −
Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
+
PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
 +
has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
 +
returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
   −
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters
+
PMCs are sequential numbers beginning at 1 and counting up.  This code checks the PMC to see that it contains only digits and is less
and first four digits must be a year.  This function makes these tests:
+
than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
length must be 19 characters
  −
characters in position
  −
1–4 must be digits and must represent a year in the range of 1000 – next year
  −
5 must be a letter
  −
6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
  −
7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
  −
9–18 must be letter, digit, or dot
  −
19 must be a letter or dot
      
]]
 
]]
   −
local function bibcode (id, access)
+
local function pmc(id, embargo)
local handler = cfg.id_handlers['BIBCODE'];
+
local test_limit = 6500000; -- update this value as PMCs approach
local err_type;
+
local handler = cfg.id_handlers['PMC'];
local year;
+
local err_cat = ''; -- presume that PMC is valid
 +
local id_num;
 +
local text;
 +
 +
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix
 +
 
 +
if is_set (id_num) then
 +
add_maint_cat ('pmc_format');
 +
else -- plain number without pmc prefix
 +
id_num = id:match ('^%d+$'); -- if here id is all digits
 +
end
   −
local text = external_link_id({link=handler.link, label=handler.label,
+
if is_set (id_num) then -- id_num has a value so test it
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
+
id_num = tonumber(id_num); -- convert id_num to a number for range testing
access=access});
+
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
+
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
if 19 ~= id:len() then
  −
err_type = 'length';
  −
else
  −
year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --  
  −
if not year then -- if nil then no pattern match
  −
err_type = 'value'; -- so value error
   
else
 
else
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year
+
id = tostring (id_num); -- make sure id is a string
year = tonumber (year); -- convert year portion of bibcode to a number
  −
if (1000 > year) or (year > next_year) then
  −
err_type = 'year'; -- year out of bounds
  −
end
  −
if id:find('&%.') then
  −
err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter)
  −
end
   
end
 
end
 +
else -- when id format incorrect
 +
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
 
end
 
end
 
+
if is_set (err_type) then -- if there was an error detected
+
if is_set (embargo) then -- is PMC is still embargoed?
text = text .. ' ' .. set_error( 'bad_bibcode', {err_type});
+
text = table.concat ( -- still embargoed so no external link
 +
{
 +
make_wikilink (handler.link, handler.label),
 +
handler.separator,
 +
id,
 +
err_cat
 +
});
 +
else
 +
text = external_link_id({link = handler.link, label = handler.label, q = handler.q, -- no embargo date or embargo has expired, ok to link to article
 +
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
 
end
 
end
 
return text;
 
return text;
Line 776: Line 876:       −
--[[--------------------------< C I T E S E E R X >------------------------------------------------------------
+
--[[--------------------------< P M I D >----------------------------------------------------------------------
   −
CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org).
+
Format PMID and do simple error checking.  PMIDs are sequential numbers beginning at 1 and counting up.  This
 +
code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
 +
test_limit will need to be updated periodically as more PMIDs are issued.
   −
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure
   
]]
 
]]
   −
local function citeseerx (id)
+
local function pmid(id)
local handler = cfg.id_handlers['CITESEERX'];
+
local test_limit = 32000000; -- update this value as PMIDs approach
local matched;
+
local handler = cfg.id_handlers['PMID'];
 +
local err_cat = ''; -- presume that PMID is valid
 
 
local text = external_link_id({link=handler.link, label=handler.label,
+
if id:match("[^%d]") then -- if PMID has anything but digits
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
+
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
access=handler.access});
+
else -- PMID is only digits
 +
local id_num = tonumber(id); -- convert id to a number for range testing
 +
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
 +
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
 +
end
 +
end
 
 
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
+
return external_link_id({link = handler.link, label = handler.label, q = handler.q,
if not matched then
+
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
text = text .. ' ' .. set_error( 'bad_citeseerx' );
  −
end
  −
return text;
   
end
 
end
   Line 812: Line 916:  
local test_limit = 3500000; -- update this value as SSRNs approach
 
local test_limit = 3500000; -- update this value as SSRNs approach
 
local handler = cfg.id_handlers['SSRN'];
 
local handler = cfg.id_handlers['SSRN'];
local err_cat = ''; -- presume that SSRN is valid
+
local err_cat = ''; -- presume that SSRN is valid
 
local id_num;
 
local id_num;
 
local text;
 
local text;
Line 827: Line 931:  
end
 
end
 
 
text = external_link_id({link = handler.link, label = handler.label,
+
text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
 
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
   Line 833: Line 937:  
end
 
end
    +
 +
--[[--------------------------< U S E N E T _ I D >------------------------------------------------------------
 +
 +
Validate and format a usenet message id.  Simple error checking, looks for 'id-left@id-right' not enclosed in
 +
'<' and/or '>' angle brackets.
 +
 +
]]
 +
 +
local function usenet_id (id)
 +
local handler = cfg.id_handlers['USENETID'];
 +
 +
local text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
 +
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
 +
 +
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
 +
text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid
 +
end
 +
 +
return text
 +
end
 +
 +
 +
--[[--------------------------< Z B L >-----------------------------------------------------------------------
 +
 +
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
 +
 +
]]
 +
 +
local function zbl (id)
 +
local handler = cfg.id_handlers['ZBL'];
 +
local id_num;
 +
local err_cat = '';
 +
 +
id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier
 +
 +
if is_set (id_num) then
 +
add_maint_cat ('zbl_format');
 +
else -- plain number without zbl prefix
 +
id_num = id; -- if here id does not have prefix
 +
end
 +
 +
if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then
 +
id = id_num; -- id matches pattern
 +
else
 +
err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message
 +
end
 +
 +
return external_link_id({link = handler.link, label = handler.label, q = handler.q,
 +
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
 +
end
 +
 +
 +
--============================<< I N T E R F A C E  F U N C T I O N S >>==========================================
    
--[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
 
--[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
Line 850: Line 1,007:  
 
 
for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
 
for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
   
-- fallback to read-only cfg
 
-- fallback to read-only cfg
 
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );
 
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );
Line 860: Line 1,016:  
elseif handler.mode ~= 'manual' then
 
elseif handler.mode ~= 'manual' then
 
error( cfg.messages['unknown_ID_mode'] );
 
error( cfg.messages['unknown_ID_mode'] );
 +
elseif k == 'ARXIV' then
 +
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );
 +
elseif k == 'ASIN' then
 +
table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } );
 
elseif k == 'BIBCODE' then
 
elseif k == 'BIBCODE' then
 
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } );
 
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } );
Line 868: Line 1,028:  
elseif k == 'DOI' then
 
elseif k == 'DOI' then
 
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );
 
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );
 +
elseif k == 'EISSN' then
 +
table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn
 
elseif k == 'HDL' then
 
elseif k == 'HDL' then
 
table.insert( new_list, {handler.label, hdl( v, handler.access ) } );
 
table.insert( new_list, {handler.label, hdl( v, handler.access ) } );
elseif k == 'ARXIV' then
  −
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );
  −
elseif k == 'ASIN' then
  −
table.insert( new_list, {handler.label, amazon( v, options.ASINTLD ) } );
  −
elseif k == 'LCCN' then
  −
table.insert( new_list, {handler.label, lccn( v ) } );
  −
elseif k == 'OL' or k == 'OLA' then
  −
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } );
  −
elseif k == 'PMC' then
  −
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
  −
elseif k == 'PMID' then
  −
table.insert( new_list, {handler.label, pmid( v ) } );
  −
elseif k == 'OCLC' then
  −
table.insert( new_list, {handler.label, oclc( v ) } );
  −
elseif k == 'SSRN' then
  −
table.insert( new_list, {handler.label, ssrn( v ) } );
  −
elseif k == 'ISMN' then
  −
table.insert( new_list, {handler.label, ismn( v ) } );
  −
elseif k == 'ISSN' then
  −
table.insert( new_list, {handler.label, issn( v ) } );
  −
elseif k == 'EISSN' then
  −
table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn
   
elseif k == 'ISBN' then
 
elseif k == 'ISBN' then
 
local ISBN = internal_link_id( handler );
 
local ISBN = internal_link_id( handler );
 
local check;
 
local check;
 
local err_type = '';
 
local err_type = '';
check, err_type = check_isbn( v );
+
check, err_type = isbn( v );
 
if not check then
 
if not check then
 
if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set
 
if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set
Line 904: Line 1,044:  
end
 
end
 
elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set
 
elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set
add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary
+
add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary
 
end
 
end
 
table.insert( new_list, {handler.label, ISBN } );
 
table.insert( new_list, {handler.label, ISBN } );
 +
elseif k == 'ISMN' then
 +
table.insert( new_list, {handler.label, ismn( v ) } );
 +
elseif k == 'ISSN' then
 +
table.insert( new_list, {handler.label, issn( v ) } );
 +
elseif k == 'JFM' then
 +
table.insert( new_list, {handler.label, jfm( v ) } );
 +
elseif k == 'LCCN' then
 +
table.insert( new_list, {handler.label, lccn( v ) } );
 +
elseif k == 'MR' then
 +
table.insert( new_list, {handler.label, mr( v ) } );
 +
elseif k == 'OCLC' then
 +
table.insert( new_list, {handler.label, oclc( v ) } );
 +
elseif k == 'OL' or k == 'OLA' then
 +
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } );
 +
elseif k == 'PMC' then
 +
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
 +
elseif k == 'PMID' then
 +
table.insert( new_list, {handler.label, pmid( v ) } );
 +
elseif k == 'SSRN' then
 +
table.insert( new_list, {handler.label, ssrn( v ) } );
 
elseif k == 'USENETID' then
 
elseif k == 'USENETID' then
table.insert( new_list, {handler.label, message_id( v ) } );
+
table.insert( new_list, {handler.label, usenet_id( v ) } );
 +
elseif k == 'ZBL' then
 +
table.insert( new_list, {handler.label, zbl( v ) } );
 
else
 
else
 
error( cfg.messages['unknown_manual_ID'] );
 
error( cfg.messages['unknown_manual_ID'] );
Line 915: Line 1,077:  
 
 
local function comp( a, b ) -- used in following table.sort()
 
local function comp( a, b ) -- used in following table.sort()
return a[1] < b[1];
+
return a[1]:lower() < b[1]:lower();
 
end
 
end
 
 
Line 994: Line 1,156:  
add_maint_cat = utilities_page_ptr.add_maint_cat;
 
add_maint_cat = utilities_page_ptr.add_maint_cat;
 
substitute = utilities_page_ptr.substitute;
 
substitute = utilities_page_ptr.substitute;
 +
make_wikilink = utilities_page_ptr.make_wikilink;
    
z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities
 
z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities

Navigation menu