Changes

10,729 bytes added , 15:57, 31 December 2018

m

1 revision imported

Line 6: Line 6:

]]

−

local is_set, in_array, set_error, select_one, add_maint_cat, substitute; -- functions in Module:Citation/CS1/Utilities

+

local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities

local z; -- table of tables defined in Module:Citation/CS1/Utilities

Line 12: Line 12:

local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration

+

local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or '';

+

--============================<< H E L P E R F U N C T I O N S >>============================================

--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------

Line 22: Line 26:

local url_string = options.id;

local ext_link;

+

local this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org

+

local wd_article; -- article title from wikidata

if options.encode == true or options.encode == nil then

url_string = mw.uri.encode( url_string );

end

+

ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id));

+

if is_set(options.access) then

+

ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock

+

end

+

this_wiki_code = mw.language.getContentLanguage():getCode(); -- get this wikipedia's language code (subdomain)

−

~~ext_link =~~ mw.~~ustring~~.~~format (~~'~~[%s%s%s %s]~~'~~, options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)~~);

+

if string.match (mw.site.server, 'wikidata') then

−

~~if options.free~~ then

+

this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on wikidata so use interface language setting instead

−

~~ext_link~~ = ~~substitute~~ (~~cfg.presentation[~~'~~free to read~~'~~], ext_link~~); -- ~~add the free-to-read lock~~

end

−

~~return~~ mw.~~ustring~~.~~format~~( '~~[[%s|%s]]%s%s~~'~~, options~~.~~link~~, ~~options.label~~, ~~options.separator or " "~~, ~~ext_link~~);

+

if is_set (options.q) then

+

wd_article = mw.wikibase.getEntity (options.q):getSitelink (this_wiki_code .. 'wiki'); -- fetch article title from wd

+

if wd_article then

+

wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- make interwiki link if taken from wd; leading colon required

+

end

+

end

−

-- return ~~mw.ustring~~.~~format~~( ~~'[[%s|%s]]%s[%s%s%s %s]',~~

+

return table.concat ({

−

-- options.link, options.label, options.separator or " ",

+

make_wikilink (wd_article or options.link, options.label), -- wikidata link or locally specified

−

~~-- options.prefix, url_string, options.suffix or "",~~

+

options.separator or ' ',

−

~~-- mw.text.nowiki(options.id)~~

+

ext_link

−

-- );

+

});

end

Line 49: Line 66:

local function internal_link_id(options)

−

return mw.~~ustring.format~~( ~~'[[%s|%s]]%s[[%s%s%s|%s]]',~~

+

−

options.link, options.label, options.separator or " ",

+

return table.concat (

−

options.prefix, options.id, options.suffix or "",

+

{

−

mw.text.nowiki(options.id)

+

make_wikilink (options.link, options.label),

−

);

+

options.separator or ' ',

+

make_wikilink (

+

table.concat (

+

{

+

options.prefix,

+

options.id,

+

options.suffix or ''

+

}),

+

mw.text.nowiki (options.id)

+

);

+

});

+

end

+

--[[--------------------------------------------------------------------------------

+

Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is

+

in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because

+

|embargo= was not set in this cite.

+

]]

+

local function is_embargoed (embargo)

+

if is_set (embargo) then

+

local lang = mw.getContentLanguage();

+

local good1, embargo_date, good2, todays_date;

+

good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );

+

good2, todays_date = pcall( lang.formatDate, lang, 'U' );

+

if good1 and good2 then -- if embargo date and today's date are good dates

+

if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?

+

return embargo; -- still embargoed

+

else

+

add_maint_cat ('embargo')

+

return ''; -- unset because embargo has expired

+

end

+

end

+

end

+

return ''; -- |embargo= not set return empty string

end

Line 60: Line 115:

ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.

−

ISBN-13 is checked in ~~check_isbn~~().

+

ISBN-13 is checked in isbn().

If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length

Line 69: Line 124:

local function is_valid_isxn (isxn_str, len)

local temp = 0;

−

isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58

+

isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58

len = len+1; -- adjust to be a loop counter

for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum

Line 82: Line 137: −

--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------

+

--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------

ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.

Line 93: Line 148:

local temp=0;

−

isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39

+

isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39

for i, v in ipairs( isxn_str ) do

temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit

Line 101: Line 156: −

--[[--------------------------< C H E C ~~K _~~ I S B N >------------------------------------------------------------

+

--[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------

+

lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)

+

1. Remove all blanks.

+

2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.

+

3. If there is a hyphen in the string:

+

a. Remove it.

+

b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):

+

1. All these characters should be digits, and there should be six or less. (not done in this function)

+

2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.

+

Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.

+

]]

+

local function normalize_lccn (lccn)

+

lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace

+

if nil ~= string.find (lccn,'/') then

+

lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it

+

end

+

local prefix

+

local suffix

+

prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix

+

if nil ~= suffix then -- if there was a hyphen

+

suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6

+

lccn=prefix..suffix; -- reassemble the lccn

+

end

+

return lccn;

+

end

+

--============================<>====================================

+

--[[--------------------------< A R X I V >--------------------------------------------------------------------

+

See: http://arxiv.org/help/arxiv_identifier

+

format and error check arXiv identifier. There are three valid forms of the identifier:

+

the first form, valid only between date codes 9108 and 0703 is:

+

arXiv:<archive>.<class>/<date code><number><version>

+

where:

+

<archive> is a string of alpha characters - may be hyphenated; no other punctuation

+

<class> is a string of alpha characters - may be hyphenated; no other punctuation; not the same as |class= parameter which is not supported in this form

+

<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01

+

first digit of YY for this form can only 9 and 0

+

<number> is a three-digit number

+

<version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)

+

the second form, valid from April 2007 through December 2014 is:

+

arXiv:<date code>.<number><version>

+

where:

+

<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01

+

<number> is a four-digit number

+

<version> is a 1 or more digit number preceded with a lowercase v; no spaces

+

the third form, valid from January 2015 is:

+

arXiv:<date code>.<number><version>

+

where:

+

<date code> and <version> are as defined for 0704-1412

+

<number> is a five-digit number

+

]]

+

local function arxiv (id, class)

+

local handler = cfg.id_handlers['ARXIV'];

+

local year, month, version;

+

local err_cat = false; -- assume no error message

+

local text; -- output text

+

if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version

+

year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");

+

year = tonumber(year);

+

month = tonumber(month);

+

if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month

+

((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?

+

err_cat = true; -- flag for error message

+

end

+

elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version

+

year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");

+

year = tonumber(year);

+

month = tonumber(month);

+

if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)

+

((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?

+

err_cat = true; -- flag for error message

+

end

+

elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version

+

year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");

+

year = tonumber(year);

+

month = tonumber(month);

+

if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)

+

err_cat = true; -- flag for error message

+

end

+

else

+

err_cat = true; -- not a recognized format; flag for error message

+

end

+

err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true

+

text = external_link_id({link = handler.link, label = handler.label, q = handler.q,

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;

+

if is_set (class) then

+

if id:match ('^%d+') then

+

text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink

+

else

+

text = table.concat ({text, ' ', set_error ('class_ignored')});

+

end

+

end

+

return text;

+

end

+

--[[----------------------------------------------------------------------------------------------

+

Validates (sort of) and formats a bibcode id.

+

Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes

+

But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters

+

and first four digits must be a year. This function makes these tests:

+

length must be 19 characters

+

characters in position

+

1–4 must be digits and must represent a year in the range of 1000 – next year

+

5 must be a letter

+

6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )

+

7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )

+

9–18 must be letter, digit, or dot

+

19 must be a letter or dot

+

]]

+

local function bibcode (id, access)

+

local handler = cfg.id_handlers['BIBCODE'];

+

local err_type;

+

local year;

+

local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,

+

access=access});

+

if 19 ~= id:len() then

+

err_type = 'length';

+

else

+

year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --

+

if not year then -- if nil then no pattern match

+

err_type = 'value'; -- so value error

+

else

+

local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year

+

year = tonumber (year); -- convert year portion of bibcode to a number

+

if (1000 > year) or (year > next_year) then

+

err_type = 'year'; -- year out of bounds

+

end

+

if id:find('&%.') then

+

err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter)

+

end

+

end

+

end

+

if is_set (err_type) then -- if there was an error detected

+

text = text .. ' ' .. set_error( 'bad_bibcode', {err_type});

+

end

+

return text;

+

end

+

--[[-------------------------------------------------------------------------------------------

+

Format bioRxiv id and do simple error checking. BiorXiv ids are exactly 6 digits.

+

The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI:

+

https://doi.org/10.1101/078733 -> 078733

+

]]

+

local function biorxiv(id)

+

local handler = cfg.id_handlers['BIORXIV'];

+

local err_cat = ''; -- presume that bioRxiv id is valid

+

if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits

+

err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message

+

end

+

return external_link_id({link = handler.link, label = handler.label, q = handler.q,

+

prefix=handler.prefix,id=id,separator=handler.separator,

+

encode=handler.encode, access=handler.access}) .. err_cat;

+

end

+

--[[--------------------------< C I T E S E E R X >------------------------------------------------------------

+

CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org).

+

The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure

+

]]

+

local function citeseerx (id)

+

local handler = cfg.id_handlers['CITESEERX'];

+

local matched;

+

local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,

+

access=handler.access});

+

matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");

+

if not matched then

+

text = text .. ' ' .. set_error( 'bad_citeseerx' );

+

end

+

return text;

+

end

+

--[[--------------------------< D O I >------------------------------------------------------------------------

+

Formats a DOI and checks for DOI errors.

+

DOI names contain two parts: prefix and suffix separated by a forward slash.

+

Prefix: directory indicator '10.' followed by a registrant code

+

Suffix: character string of any length chosen by the registrant

+

This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends

+

with a period or a comma, this function will emit a bad_doi error message.

+

DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,

+

and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely

+

if ever used in doi names.

+

]]

+

local function doi(id, inactive, access)

+

local cat = ""

+

local handler = cfg.id_handlers['DOI'];

+

local text;

+

if is_set(inactive) then

+

local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date

+

if is_set(inactive_year) then

+

table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );

+

else

+

table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year

+

end

+

inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"

+

end

+

text = external_link_id({link = handler.link, label = handler.label, q = handler.q,

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')

+

if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma

+

cat = ' ' .. set_error( 'bad_doi' );

+

end

+

return text .. cat

+

end

+

--[[--------------------------< H D L >------------------------------------------------------------------------

+

Formats an HDL with minor error checking.

+

HDL names contain two parts: prefix and suffix separated by a forward slash.

+

Prefix: character string using any character in the UCS-2 character set except '/'

+

Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant

+

This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends

+

with a period or a comma, this function will emit a bad_hdl error message.

+

HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and

+

terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely

+

if ever used in HDLs.

+

]]

+

local function hdl(id, access)

+

local handler = cfg.id_handlers['HDL'];

+

local text = external_link_id({link = handler.link, label = handler.label, q = handler.q,

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})

+

if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma

+

text = text .. ' ' .. set_error( 'bad_hdl' );

+

end

+

return text;

+

end

+

--[[------------------------------------------------------------------------------------------------

Determines whether an ISBN string is valid

Line 107: Line 449:

]]

−

local function ~~check_isbn~~( isbn_str )

+

local function isbn( isbn_str )

if nil ~= isbn_str:match("[^%s-0-9X]") then

return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X

Line 132: Line 474:

return is_valid_isxn_13 (isbn_str), 'checksum';

end

+

end

+

--[[--------------------------< A M A Z O N >------------------------------------------------------------------

+

Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha

+

characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit

+

isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.

+

Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.

+

This function is positioned here because it calls isbn()

+

]]

+

local function asin(id, domain)

+

local err_cat = ""

+

if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then

+

err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters

+

else

+

if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)

+

if isbn( id ) then -- see if asin value is isbn10

+

add_maint_cat ('ASIN');

+

elseif not is_set (err_cat) then

+

err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10

+

end

+

elseif not id:match("^%u[%d%u]+$") then

+

err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha

+

end

+

end

+

if not is_set(domain) then

+

domain = "com";

+

elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom

+

domain = "co." .. domain;

+

elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico

+

domain = "com." .. domain;

+

end

+

local handler = cfg.id_handlers['ASIN'];

+

return external_link_id({link=handler.link,

+

label=handler.label, q = handler.q, prefix=handler.prefix .. domain .. "/dp/",

+

id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;

end

Line 147: Line 530:

local text;

local valid_ismn = true;

+

local id_copy;

+

id_copy = id; -- save a copy because this testing is destructive

id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn

Line 159: Line 544:

-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})

−

text=~~"[[" .~~. handler.link ~~.. "|" ..~~ handler.label ~~.. "]]" ..~~ handler.separator ~~.. id~~; -- because no place to link to yet

+

text = table.concat (

+

{

+

make_wikilink (handler.link, handler.label),

+

handler.separator,

+

id_copy

+

}); -- because no place to link to yet

if false == valid_ismn then

Line 175: Line 565:

like this:

−

|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link

+

|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link

This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length

Line 185: Line 575:

local function issn(id, e)

−

local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate

+

local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate

local handler;

local text;

Line 196: Line 586:

end

−

id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn

+

id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn

−

if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position

+

if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position

−

valid_issn=false; -- wrong length or improper character

+

valid_issn=false; -- wrong length or improper character

else

−

valid_issn=is_valid_isxn(id, 8); -- validate issn

+

valid_issn=is_valid_isxn(id, 8); -- validate issn

end

if true == valid_issn then

−

id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version

+

id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version

else

−

id = issn_copy; -- if not valid, use the show the invalid issn with error message

+

id = issn_copy; -- if not valid, use the show the invalid issn with error message

end

−

text = external_link_id({link = handler.link, label = handler.label,

+

text = external_link_id({link = handler.link, label = handler.label, q = handler.q,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})

Line 221: Line 611: −

--[[--------------------------< A M ~~A Z O N~~ >------------------------------------------------------------------

+

--[[--------------------------< J F M >-----------------------------------------------------------------------

−

~~Formats a link to Amazon~~. ~~Do simple error checking: asin must be mix of 10 numeric or uppercase alpha~~

+

A numerical identifier in the form nn.nnnn.nn

−

~~characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit~~

−

~~isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.~~

−

~~Error message if not 10 characters, if not isbn10, if mixed and first character is a digit~~.

]]

−

local function ~~amazon~~(id~~, domain~~)

+

local function jfm (id)

−

local ~~err_cat~~ = ""

+

local handler = cfg.id_handlers['JFM'];

−

+

local id_num;

−

~~if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then~~

+

local err_cat = '';

−

~~err_cat = ' '~~ .~~. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters~~

−

~~else~~

−

~~if id:match("^%d%d%d%d%d%d%d%d%d~~[~~%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)~~

−

~~if check_isbn( id ) then -- see if asin value is isbn10~~

−

~~add_maint_cat ('ASIN');~~

−

~~elseif not is_set (err_cat) then~~

−

~~err_cat =~~ ' ' ~~.. set_error ('bad_asin'); -- asin is not isbn10~~

−

~~end~~

−

~~elseif not id:match("^%u[%d%u~~]~~+$") then~~

−

~~err_cat = ' ' .. set_error ('bad_asin')~~; ~~-- asin doesn't begin with uppercase alpha~~

−

~~end~~

−

~~end~~

−

~~if not is_set(domain) then~~

−

~~domain = "com"~~;

−

~~elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom~~

−

~~domain = "co." .. domain;~~

−

~~elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico~~

−

~~domain = "com." .. domain;~~

−

~~end~~

−

local ~~handler~~ = ~~cfg.id_handlers[~~'~~ASIN~~'];

−

~~return external_link_id({link=handler.link,~~

−

~~label=handler.label, prefix=handler.prefix .. domain .. "/dp/",~~

−

~~id=id, encode=handler.encode, separator = handler.separator}) .. err_cat~~;

−

~~end~~

−

~~--[[--------------------------< A R X I V >--------------------------------------------------------------------~~

−

~~See: http://arxiv.org/help/arxiv_identifier~~

−

~~format and error check arXiv identifier. There are three valid forms of the identifier:~~

−

~~the first form, valid only between date codes 9108 and 0703 is:~~

−

~~arXiv:<archive>.<class>/<date code><number><version>~~

−

~~where:~~

−

~~<archive> is a string of alpha characters - may be hyphenated; no other punctuation~~

−

~~<class> is a string of alpha characters - may be hyphenated; no other punctuation~~

−

~~<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01~~

−

~~first digit of YY for this form can only 9 and 0~~

−

~~<number> is a three-digit number~~

−

~~<version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)~~

−

~~the second form, valid from April 2007 through December 2014 is:~~

+

id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier

−

~~arXiv~~:~~<date code>~~.~~<number><version>~~

−

~~where:~~

−

~~<date code> is four digits in the form YYMM where YY is the last two digits of the four~~-~~digit year and MM is the month number January = 01~~

−

~~<number> is a four~~-~~digit number~~

−

~~<version> is a 1 or more digit number preceded~~ with ~~a lowercase v~~; ~~no spaces~~

−

~~the third form, valid from January 2015 is:~~

+

if is_set (id_num) then

−

~~arXiv:<date code>.<number><version>~~

+

add_maint_cat ('jfm_format');

−

~~where:~~

+

else -- plain number without mr prefix

−

~~<date code> and <version> are as defined for 0704-1412~~

+

id_num = id; -- if here id does not have prefix

−

~~<number> is a five-digit number~~

−

]]

−

~~local function arxiv (id, class)~~

−

~~local handler = cfg.id_handlers['ARXIV'];~~

−

~~local year, month, version;~~

−

~~local err_cat = '';~~

−

~~local text;~~

−

if ~~id:match~~(~~"^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$"~~) then ~~-- test for the 9108-0703 format w/ & w/o version~~

−

~~year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");~~

−

~~year = tonumber(year);~~

−

~~month = tonumber(month);~~

−

~~if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month~~

−

~~((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?~~

−

~~err_cat = ' ' .. set_error~~( '~~bad_arxiv~~' ); ~~-- set error message~~

−

~~end~~

−

~~elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then~~ -- ~~test for the 0704-1412 w/ & w/o version~~

−

~~year, month~~ = id~~:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$")~~;

−

~~year = tonumber(year);~~

−

~~month = tonumber(month);~~

−

~~if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -~~- ~~is year invalid or is month invalid? (doesn't test for future years)~~

−

~~((7 == year) and (4 > month)) then~~ -~~-or -- when year is 07, is month invalid (before April)?~~

−

~~err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message~~

−

~~end~~

−

~~elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version~~

−

~~year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");~~

−

~~year = tonumber(year);~~

−

~~month = tonumber(month);~~

−

if ~~((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)~~

−

~~err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message~~

−

~~end~~

−

~~else~~

−

~~err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv~~ id ~~doesn't match any format~~

end

−

~~text = external_link_id~~(~~{link = handler~~.~~link, label = handler~~.~~label,~~

+

if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then

−

~~prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, free=handler.free}) .. err_cat;~~

+

id = id_num; -- jfm matches pattern

−

~~if is_set (class~~) then

−

~~class~~ = ~~' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'~~; -- ~~external link within square brackets, not wikilink~~

else

−

~~class~~ = ''; -- ~~empty string for concatenation~~

+

err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message

end

−

return ~~text~~ .. ~~class~~;

+

return external_link_id({link = handler.link, label = handler.label, q = handler.q,

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

end

−

~~--[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------~~

−

~~lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)~~

−

~~1. Remove all blanks.~~

−

~~2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.~~

−

~~3. If there is a hyphen in the string:~~

−

~~a. Remove it.~~

−

~~b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):~~

−

~~1. All these characters should be digits, and there should be six or less. (not done in this function)~~

−

~~2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.~~

−

~~Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.~~

−

]]

−

~~local function normalize_lccn (lccn)~~

−

~~lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace~~

−

~~if nil ~= string.find (lccn,'/') then~~

−

~~lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it~~

−

~~end~~

−

~~local prefix~~

−

~~local suffix~~

−

~~prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix~~

−

~~if nil ~= suffix then -- if there was a hyphen~~

−

~~suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6~~

−

~~lccn=prefix..suffix; -- reassemble the lccn~~

−

~~end~~

−

~~return lccn;~~

−

~~end~~

Line 385: Line 657:

local function lccn(lccn)

local handler = cfg.id_handlers['LCCN'];

−

local err_cat = ''; -- presume that LCCN is valid

+

local err_cat = ''; -- presume that LCCN is valid

−

local id = lccn; -- local copy of the lccn

+

local id = lccn; -- local copy of the lccn

−

id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)

+

id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)

−

local len = id:len(); -- get the length of the lccn

+

local len = id:len(); -- get the length of the lccn

if 8 == len then

−

if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)

+

if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message

end

−

elseif 9 == len then -- LCCN should be adddddddd

+

elseif 9 == len then -- LCCN should be adddddddd

−

if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?

+

if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message

end

−

elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd

+

elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd

−

if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...

+

if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...

−

if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern

+

if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message

end

−

elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd

+

elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd

if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message

end

−

elseif 12 == len then -- LCCN should be aadddddddddd

+

elseif 12 == len then -- LCCN should be aadddddddddd

−

if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern

+

if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message

end

else

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message

end

if not is_set (err_cat) and nil ~= lccn:find ('%s') then

−

err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message

+

err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message

end

−

return external_link_id({link = handler.link, label = handler.label,

+

return external_link_id({link = handler.link, label = handler.label, q = handler.q,

prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;

end

−

--[[------------------------------------------------------------------------------------------------

+

--[[--------------------------< M R >--------------------------------------------------------------------------

−

~~Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This~~

+

A seven digit number; if not seven digits, zero-fill leading digits to make seven digits.

−

~~code checks the PMID~~ to ~~see that it contains only~~ digits ~~and is less than test_limit; the value in local variable~~

−

~~test_limit will need to be updated periodically as more PMIDs are issued~~.

]]

−

local function ~~pmid~~(id)

+

local function mr (id)

−

~~local test_limit = 30000000; -- update this value as PMIDs approach~~

+

local handler = cfg.id_handlers['MR'];

−

local handler = cfg.id_handlers['~~PMID~~'];

+

local id_num;

−

local err_cat = ''; ~~-- presume that PMID is valid~~

+

local id_len;

+

local err_cat = '';

−

if id:match("[^%d]") ~~then~~ -- if ~~PMID has anything but digits~~

+

id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix

−

~~err_cat = ' ' .. set_error~~( '~~bad_pmid~~' ); ~~-- set an error message~~

+

−

else -- ~~PMID is only digits~~

+

if is_set (id_num) then

−

~~local~~ id_num = ~~tonumber~~(id); -- ~~convert~~ id ~~to a number for range testing~~

+

add_maint_cat ('mr_format');

−

if 1 > id_num ~~or test_limit < id_num then~~ -- ~~if PMID is outside test limit boundaries~~

+

else -- plain number without mr prefix

−

err_cat = ' ' .. set_error( '~~bad_pmid~~' ); -- set an error message

+

id_num = id:match ('^%d+$'); -- if here id is all digits

−

~~end~~

+

end

+

id_len = id_num and id_num:len() or 0;

+

if (7 >= id_len) and (0 ~= id_len) then

+

id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits

+

else

+

err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message

end

−

return external_link_id({link = handler.link, label = handler.label,

+

return external_link_id({link = handler.link, label = handler.label, q = handler.q,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

end

−

--[[--------------------------< ~~I S _ E M B A R G~~ O ~~E D~~ >------------------------------------------------------

+

--[[--------------------------< O C L C >----------------------------------------------------------------------

−

~~Determines if a PMC identifier's online version is embargoed~~. ~~Compares the date in |embargo= against today's date~~. ~~If embargo date is~~

+

Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html

−

~~in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because~~

−

~~|embargo= was not set in this cite~~.

]]

−

local function ~~is_embargoed~~ (~~embargo~~)

+

local function oclc (id)

−

~~if is_set (embargo) then~~

+

local handler = cfg.id_handlers['OCLC'];

−

local ~~lang~~ = mw.~~getContentLanguage()~~;

+

local number;

−

local ~~good1, embargo_date, good2, todays_date~~;

+

local err_msg = ''; -- empty string for concatenation

−

~~good1, embargo_date~~ = ~~pcall( lang.formatDate, lang, 'U', embargo );~~

−

~~good2, todays_date = pcall( lang.formatDate, lang,~~ 'U' );

−

if ~~good1~~ and ~~good2~~ then -- ~~if embargo date~~ and ~~today~~'~~s date are good dates~~

+

if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters)

−

~~if tonumber~~( ~~embargo_date~~ ) >= ~~tonumber~~( ~~todays_date~~ ) then -- ~~is embargo date is in~~ the ~~future?~~

+

number = id:match('ocm(%d+)'); -- get the number

−

~~return embargo~~; -- ~~still embargoed~~

+

elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters)

−

~~else~~

+

number = id:match('ocn(%d+)'); -- get the number

−

~~add_maint_cat~~ ('~~embargo~~')

+

elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters)

−

~~return ''~~; -- ~~unset because embargo has expired~~

+

number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number

−

~~end~~

+

elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field

+

number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number

+

if 9 < number:len() then

+

number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers

+

end

+

elseif id:match('^%d+$') then -- no prefix

+

number = id; -- get the number

+

if 10 < number:len() then

+

number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers

end

−

return ''; -- ~~|embargo~~= ~~not set~~ return ~~empty string~~

+

if number then -- proper format

+

id = number; -- exclude prefix, if any, from external link

+

else

+

err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed

+

end

+

local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;

+

return text;

+

end

+

--[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------

+

Formats an OpenLibrary link, and checks for associated errors.

+

]]

+

local function openlibrary(id, access)

+

local code;

+

local handler = cfg.id_handlers['OL'];

+

local ident;

+

ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix

+

if not is_set (ident) then -- if malformed return an error

+

return external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix .. 'OL',

+

id=id, separator=handler.separator, encode = handler.encode,

+

access = access}) .. ' ' .. set_error( 'bad_ol' );

+

end

+

id = ident; -- use ident without the optional OL prefix (it has been removed)

+

if ( code == "A" ) then

+

return external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix .. 'authors/OL',

+

id=id, separator=handler.separator, encode = handler.encode,

+

access = access})

+

end

+

if ( code == "M" ) then

+

return external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix .. 'books/OL',

+

id=id, separator=handler.separator, encode = handler.encode,

+

access = access})

+

end

+

if ( code == "W" ) then

+

return external_link_id({link=handler.link, label=handler.label, q = handler.q,

+

prefix=handler.prefix .. 'works/OL',

+

id=id, separator=handler.separator, encode = handler.encode,

+

access = access})

+

end

Line 499: Line 835:

local function pmc(id, embargo)

−

local test_limit = ~~5000000~~; -- update this value as PMCs approach

+

local test_limit = 6500000; -- update this value as PMCs approach

local handler = cfg.id_handlers['PMC'];

−

local err_cat = ''; -- presume that PMC is valid

+

local err_cat = ''; -- presume that PMC is valid

+

local id_num;

+

local text;

−

~~local text~~;

+

id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix

+

if is_set (id_num) then

+

add_maint_cat ('pmc_format');

+

else -- plain number without pmc prefix

+

id_num = id:match ('^%d+$'); -- if here id is all digits

+

end

−

if ~~id:match~~(~~"[^%d]"~~) then -- ~~if PMC~~ has ~~anything but digits~~

+

if is_set (id_num) then -- id_num has a value so test it

−

~~err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message~~

+

id_num = tonumber(id_num); -- convert id_num to a number for range testing

−

~~else -- PMC is only digits~~

+

if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries

−

~~local~~ id_num = tonumber(id); -- convert id to a number for range testing

+

err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message

−

if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries

+

else

−

err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message

+

id = tostring (id_num); -- make sure id is a string

end

+

else -- when id format incorrect

+

err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message

end

if is_set (embargo) then -- is PMC is still embargoed?

−

text=~~"[[" .~~. handler.link ~~.. "|" ..~~ handler.label ~~.. "]]:" ..~~ handler.separator .. id .. err_cat; ~~-- still embargoed so no external link~~

+

text = table.concat ( -- still embargoed so no external link

+

{

+

make_wikilink (handler.link, handler.label),

+

handler.separator,

+

id,

+

err_cat

+

});

else

−

text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article

+

text = external_link_id({link = handler.link, label = handler.label, q = handler.q, -- no embargo date or embargo has expired, ok to link to article

−

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, ~~free~~=handler.~~free~~}) .. err_cat;

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;

end

return text;

Line 524: Line 876: −

--[[--------------------------< D ~~O I~~ >------------------------------------------------------------------------

+

--[[------------------------------------------------------------------------------------------------

−

~~Formats a DOI~~ and ~~checks for DOI errors~~.

+

Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This

−

+

code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable

−

~~DOI names contain two parts: prefix~~ and ~~suffix separated by a forward slash~~.

+

test_limit will need to be updated periodically as more PMIDs are issued.

−

~~Prefix: directory indicator '10.' followed by a registrant~~ code

−

~~Suffix: character string of any length chosen by the registrant~~

−

~~This function~~ checks ~~a DOI name for: prefix/suffix. If~~ the ~~doi name~~ contains ~~spaces or endashes, or, if it ends~~

−

~~with a period or a comma, this function will emit a bad_doi error message.~~

−

~~DOI names are case-insensitive~~ and ~~can incorporate any printable Unicode characters so~~ the ~~test for spaces, endash,~~

−

~~and terminal punctuation may not~~ be ~~technically correct but it appears, that in practice these characters~~ are ~~rarely~~

−

~~if ever used in doi names~~.

]]

−

local function ~~doi~~(id~~, inactive~~)

+

local function pmid(id)

−

local ~~cat~~ = ""

+

local test_limit = 32000000; -- update this value as PMIDs approach

−

local handler = cfg.id_handlers['~~DOI~~'];

+

local handler = cfg.id_handlers['PMID'];

+

local err_cat = ''; -- presume that PMID is valid

−

~~local text;~~

+

if id:match("[^%d]") then -- if PMID has anything but digits

−

if ~~is_set(inactive) then~~

+

err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message

−

~~local inactive_year = inactive~~:match("%d~~%d%d%d~~") or ''; -- ~~try to get the year portion from the inactive date~~

+

else -- PMID is only digits

−

~~text~~ = ~~"[[" .. handler.link .. "|" .. handler.label .. "]]:" ..~~ id;

+

local id_num = tonumber(id); -- convert id to a number for range testing

−

if ~~is_set(inactive_year)~~ then

+

if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries

−

~~table.insert( z.error_categories, "Pages with DOIs inactive since " .~~. ~~inactive_year );~~

+

err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message

−

~~else~~

−

~~table~~.~~insert~~( ~~z.error_categories, "Pages with inactive DOIs"~~ ); -- ~~when inactive doesn't contain a recognizable year~~

end

−

~~inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"~~

−

~~else~~

−

~~text = external_link_id({link = handler.link, label = handler.label,~~

−

~~prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})~~

−

~~inactive = ""~~

end

−

+

−

~~if nil~~ == ~~id:match("^10%~~.~~[^%s–]-/[^%s–]-[^%~~.,~~]$") then -- doi must begin with '10~~.', ~~must contain a fwd slash~~, ~~must not contain spaces or endashes~~, ~~and must not end with period or comma~~

+

return external_link_id({link = handler.link, label = handler.label, q = handler.q,

−

~~cat~~ = ~~' '~~ .~~. set_error( 'bad_doi'~~ );

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

−

~~end~~

−

~~return text~~ .. ~~inactive .. cat~~

end

−

--[[--------------------------< ~~H D L~~ >------------------------------------------------------------------------

+

--[[--------------------------< S S R N >----------------------------------------------------------------------

−

~~Formats~~ an ~~HDL with minor~~ error checking.

+

Format an ssrn, do simple error checking

−

~~HDL names contain two parts: prefix~~ and ~~suffix separated by a forward slash~~.

+

SSRNs are sequential numbers beginning at 100? and counting up. This code checks the ssrn to see that it is

−

~~Prefix: character string using any character in the UCS-2 character set except '/'~~

+

only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need

−

~~Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant~~

+

to be updated periodically as more SSRNs are issued.

−

This ~~function~~ checks ~~a HDL name for: prefix/suffix. If~~ the ~~HDL name contains spaces, endashes, or, if~~ it ~~ends~~

−

~~with a period or a comma, this function will emit a bad_hdl error message.~~

−

~~HDL names are case-insensitive~~ and ~~can incorporate any printable Unicode characters so~~ the ~~test for endashes and~~

−

~~terminal punctuation may not~~ be ~~technically correct but it appears, that in practice these characters~~ are ~~rarely~~

−

~~if ever used in HDLs~~.

]]

−

local function ~~hdl~~(id)

+

local function ssrn (id)

−

local handler = cfg.id_handlers['~~HDL~~'];

+

local test_limit = 3500000; -- update this value as SSRNs approach

+

local handler = cfg.id_handlers['SSRN'];

+

local err_cat = ''; -- presume that SSRN is valid

+

local id_num;

+

local text;

−

~~local text~~ = ~~external_link_id~~(~~{link = handler.link, label = handler.label,~~

+

id_num = id:match ('^%d+$'); -- id must be all digits

−

~~prefix=handler.prefix,id=~~id~~,separator=handler.separator, encode=handler.encode})~~

−

if ~~nil~~ =~~= id:match~~(~~"^[^%s–]~~-~~/[^%s–]~~-~~[^%~~.~~,]$"~~) ~~then~~ -- ~~hdl must contain a fwd slash, must not contain spaces, endashes, and must not~~ end ~~with period or comma~~

+

if is_set (id_num) then -- id_num has a value so test it

−

~~text~~ = ~~text ..~~ ' ' .. set_error( '~~bad_hdl~~' );

+

id_num = tonumber(id_num); -- convert id_num to a number for range testing

+

if 100 > id_num or test_limit < id_num then -- if SSRN is outside test limit boundaries

+

err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message

+

end

+

else -- when id format incorrect

+

err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message

end

+

text = external_link_id({link = handler.link, label = handler.label, q = handler.q,

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;

+

return text;

end

−

--[[--------------------------< ~~O P~~ E N ~~L I B R A R Y >--------------------------------------------------------~~

+

--[[--------------------------------------------------------------------------------------

−

~~Formats an OpenLibrary link, and checks for associated errors.~~

−

]]

−

~~local function openlibrary(id)~~

−

~~local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'~~

−

~~local handler = cfg.id_handlers['OL'];~~

−

~~if ( code == "A" ) then~~

−

~~return external_link_id({link=handler.link, label=handler.label,~~

−

~~prefix=handler.prefix .. 'authors/OL',~~

−

~~id=id, separator=handler.separator, encode = handler.encode})~~

−

~~elseif ( code == "M" ) then~~

−

~~return external_link_id({link=handler.link, label=handler.label,~~

−

~~prefix=handler.prefix .. 'books/OL',~~

−

~~id=id, separator=handler.separator, encode = handler.encode})~~

−

~~elseif ( code == "W" ) then~~

−

~~return external_link_id({link=handler.link, label=handler.label,~~

−

~~prefix=handler.prefix .. 'works/OL',~~

−

~~id=id, separator=handler.separator, encode = handler.encode})~~

−

~~else~~

−

~~return external_link_id({link=handler.link, label=handler.label,~~

−

~~prefix=handler.prefix .. 'OL',~~

−

~~id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );~~

−

~~end~~

−

~~end~~

−

~~--[[--------------------------< M E S S A G~~ E _ I D >----------------------------------------------------------

Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in

Line 635: Line 945:

]]

−

local function ~~message_id~~ (id)

+

local function usenet_id (id)

local handler = cfg.id_handlers['USENETID'];

−

local text = external_link_id({link = handler.link, label = handler.label,

+

local text = external_link_id({link = handler.link, label = handler.label, q = handler.q,

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})

if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'

−

text = text .. ' ' .. set_error( '~~bad_message_id~~' ) -- add an error message if the message id is invalid

+

text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid

end

Line 649: Line 959: −

--[[--------------------------< ~~O C~~ L C >----------------------------------------------------------------------

+

--[[--------------------------< Z B L >-----------------------------------------------------------------------

−

~~Validate and format an oclc id~~. ~~https://www.oclc.org/batchload/controlnumber.en.html~~

+

A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional

]]

−

local function ~~oclc~~ (id)

+

local function zbl (id)

−

local handler = cfg.id_handlers['~~OCLC~~'];

+

local handler = cfg.id_handlers['ZBL'];

−

local ~~number~~;

+

local id_num;

−

local ~~err_msg~~ = ''; ~~-- empty string for concatenation~~

+

local err_cat = '';

−

if id:match('^~~ocm%d%d%d%d%d%d%d%d~~$') ~~then~~ -- ~~ocm~~ prefix ~~and 8 digits~~; ~~001 field (12 characters)~~

+

id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier

−

~~number = id:match('ocm(%d+)'); -- get the number~~

+

−

~~elseif id:match~~(~~'^ocn%d%d%d%d%d%d%d%d%d$'~~) then ~~-- ocn prefix and 9 digits; 001 field (12 characters)~~

+

if is_set (id_num) then

−

~~number = id:match~~('~~ocn(%d+)~~'); ~~-- get the number~~

+

add_maint_cat ('zbl_format');

−

~~elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then~~ -- ~~on prefix and 10 or more digits; 001 field (12 characters)~~

+

else -- plain number without zbl prefix

−

number ~~= id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number~~

+

id_num = id; -- if here id does not have prefix

−

~~elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC)~~ prefix ~~and variable number digits; no leading zeros; 035 field~~

−

~~number~~ = id~~:match('%(OCoLC%)([1-9]%d*)')~~; -- ~~get the number~~

−

if ~~9 < number:len() then~~

−

~~number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers~~

−

~~end~~

−

~~elseif~~ id~~:match('^%d+$') then -- no~~ prefix

−

~~number = id; -- get the number~~

−

~~if 10 < number:len() then~~

−

~~number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers~~

−

~~end~~

end

−

if ~~number~~ then ~~-- proper format~~

+

if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then

−

id = ~~number~~; -- ~~exclude prefix, if any, from external link~~

+

id = id_num; -- id matches pattern

else

−

~~err_msg~~ = ' ' .. set_error( '~~bad_oclc~~' ) -- ~~add~~ an error message ~~if the id is malformed~~

+

err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message

end

−

~~local text =~~ external_link_id({link=handler.link, label=handler.label,

+

return external_link_id({link = handler.link, label = handler.label, q = handler.q,

−

prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. ~~err_msg;~~

+

prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;

−

~~return text~~;

end

−

--~~[[--------------------------~~~~--------------------------------------------------------------------~~

+

--============================<>==========================================

−

~~Validates (sort of) and formats a bibcode id.~~

−

~~Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes~~

−

~~But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters~~

−

~~and first four digits must be a year. This function makes these tests:~~

−

~~length must be 19 characters~~

−

~~characters in position~~

−

~~1–4 must be digits and must represent a year in the range of 1000 – next year~~

−

~~5 must be a letter~~

−

~~6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )~~

−

~~7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )~~

−

~~9–18 must be letter, digit, or dot~~

−

~~19 must be a letter or dot~~

−

]]

−

~~local function bibcode (id)~~

−

~~local handler~~ = ~~cfg.id_handlers['BIBCODE'];~~

−

~~local err_type;~~

−

~~local year;~~

−

~~local text~~ = ~~external_link_id({link~~=~~handler.link, label~~=~~handler.label,~~

−

~~prefix~~=~~handler.prefix, id~~=~~id, separator~~=~~handler.separator, encode~~=~~handler.encode});~~

−

~~if 19 ~~~= ~~id:len() then~~

−

~~err_type~~ = ~~'length';~~

−

~~else~~

−

~~year~~ = ~~id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --~~

−

~~if not year then -- if nil then no pattern match~~

−

~~err_type~~ = ~~'value'; -- so value error~~

−

~~else~~

−

~~local next_year~~ = ~~tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year~~

−

~~year~~ = ~~tonumber (year); -- convert year portion of bibcode to a number~~

−

~~if (1000 > year) or (year > next_year) then~~

−

~~err_type~~ = ~~'year'; -- year out of bounds~~

−

~~end~~

−

~~if id:find('&%.') then~~

−

~~err_type~~ = ~~'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter)~~

−

~~end~~

−

~~end~~

−

~~end~~

−

~~if is_set (err_type) then -- if there was an error detected~~

−

~~text~~ = ~~text .. ' ' .. set_error( 'bad_bibcode', {err_type});~~

−

~~end~~

−

~~return text;~~

−

~~end~~

−

--[[----------------------------------------------------------------------------------

Line 760: Line 1,008:

for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table

-- fallback to read-only cfg

−

handler = setmetatable( { ['id'] = v }, fallback(k) );

+

handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );

if handler.mode == 'external' then

Line 768: Line 1,016:

elseif handler.mode ~= 'manual' then

error( cfg.messages['unknown_ID_mode'] );

−

~~elseif k == 'BIBCODE' then~~

−

~~table.insert( new_list, {handler.label, bibcode( v ) } );~~

−

~~elseif k == 'DOI' then~~

−

~~table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } );~~

−

~~elseif k == 'HDL' then~~

−

~~table.insert( new_list, {handler.label, hdl( v ) } );~~

elseif k == 'ARXIV' then

table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );

elseif k == 'ASIN' then

−

table.insert( new_list, {handler.label, ~~amazon~~( v, options.ASINTLD ) } );

+

table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } );

−

elseif k == '~~LCCN~~' then

+

elseif k == 'BIBCODE' then

−

table.insert( new_list, {handler.label, ~~lccn~~( v ~~) } );~~

+

table.insert( new_list, {handler.label, bibcode( v, handler.access ) } );

−

~~elseif k == 'OL' or k == 'OLA' then~~

+

elseif k == 'BIORXIV' then

−

~~table.insert( new_list~~, {handler.~~label, openlibrary( v~~ ) } );

+

table.insert( new_list, {handler.label, biorxiv( v ) } );

−

elseif k == '~~PMC~~' then

+

elseif k == 'CITESEERX' then

−

table.insert( new_list, {handler.label, ~~pmc~~( v~~, options.Embargo~~ ) } );

+

table.insert( new_list, {handler.label, citeseerx( v ) } );

−

elseif k == '~~PMID~~' then

+

elseif k == 'DOI' then

−

table.insert( new_list, {handler.label, ~~pmid~~( v ) } );

+

table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );

−

elseif k == '~~OCLC~~' then

−

table.insert( new_list, {handler.label, ~~oclc~~( v ~~) } );~~

−

~~elseif k == 'ISMN' then~~

−

~~table.insert( new_list~~, ~~{handler.label, ismn( v ) } );~~

−

~~elseif k == 'ISSN' then~~

−

~~table~~.~~insert( new_list~~, {handler.~~label, issn( v~~ ) } );

elseif k == 'EISSN' then

table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn

+

elseif k == 'HDL' then

+

table.insert( new_list, {handler.label, hdl( v, handler.access ) } );

elseif k == 'ISBN' then

local ISBN = internal_link_id( handler );

local check;

local err_type = '';

−

~~-- if not check_isbn( v ) and not is_set(options.IgnoreISBN) then~~

+

check, err_type = isbn( v );

−

~~-- ISBN = ISBN .. set_error( 'bad_isbn', {}, false, " ", "" );~~

−

~~-- end~~

−

check, err_type = ~~check_isbn~~( v );

if not check then

if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set

Line 809: Line 1,044:

end

elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set

−

add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary

+

add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary

end

table.insert( new_list, {handler.label, ISBN } );

+

elseif k == 'ISMN' then

+

table.insert( new_list, {handler.label, ismn( v ) } );

+

elseif k == 'ISSN' then

+

table.insert( new_list, {handler.label, issn( v ) } );

+

elseif k == 'JFM' then

+

table.insert( new_list, {handler.label, jfm( v ) } );

+

elseif k == 'LCCN' then

+

table.insert( new_list, {handler.label, lccn( v ) } );

+

elseif k == 'MR' then

+

table.insert( new_list, {handler.label, mr( v ) } );

+

elseif k == 'OCLC' then

+

table.insert( new_list, {handler.label, oclc( v ) } );

+

elseif k == 'OL' or k == 'OLA' then

+

table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } );

+

elseif k == 'PMC' then

+

table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );

+

elseif k == 'PMID' then

+

table.insert( new_list, {handler.label, pmid( v ) } );

+

elseif k == 'SSRN' then

+

table.insert( new_list, {handler.label, ssrn( v ) } );

elseif k == 'USENETID' then

−

table.insert( new_list, {handler.label, ~~message_id~~( v ) } );

+

table.insert( new_list, {handler.label, usenet_id( v ) } );

+

elseif k == 'ZBL' then

+

table.insert( new_list, {handler.label, zbl( v ) } );

else

error( cfg.messages['unknown_manual_ID'] );

Line 820: Line 1,077:

local function comp( a, b ) -- used in following table.sort()

−

return a[1] < b[1];

+

return a[1]:lower() < b[1]:lower();

end

Line 847: Line 1,104:

end

return id_list;

+

end

+

--[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >--------------------------------------

+

Fetches custom id access levels from arguments using configuration settings.

+

Parameters which have a predefined access level (e.g. arxiv) do not use this

+

function as they are directly rendered as free without using an additional parameter.

+

]]

+

local function extract_id_access_levels( args, id_list )

+

local id_accesses_list = {};

+

for k, v in pairs( cfg.id_handlers ) do

+

local access_param = v.custom_access;

+

local k_lower = string.lower(k);

+

if is_set(access_param) then

+

local access_level = args[access_param];

+

if is_set(access_level) then

+

if not in_array (access_level:lower(), cfg.keywords['id-access']) then

+

table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } );

+

access_level = nil;

+

end

+

if not is_set(id_list[k]) then

+

table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } );

+

end

+

if is_set(access_level) then

+

access_level = access_level:lower();

+

end

+

id_accesses_list[k] = access_level;

+

end

+

end

+

end

+

return id_accesses_list;

end

Line 865: Line 1,156:

add_maint_cat = utilities_page_ptr.add_maint_cat;

substitute = utilities_page_ptr.substitute;

+

make_wikilink = utilities_page_ptr.make_wikilink;

z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities

end

−

Line 874: Line 1,165:

build_id_list = build_id_list,

extract_ids = extract_ids,

+

extract_id_access_levels = extract_id_access_levels,

is_embargoed = is_embargoed;

set_selected_modules = set_selected_modules;

}

WikiSysop

Bureaucrats, private-view, public-view, Administrators

97,692

edits

Changes

Module:Citation/CS1/Identifiers (view source)

Revision as of 15:57, 31 December 2018

Navigation menu

Search