Module:Citation/CS1/COinS: Difference between revisions

From wiki
Jump to navigation Jump to search
1>Antigng
m (已保护“Module:Citation/CS1/COinS”:​高风险模块([编辑=仅允许管理员](无限期)[移动=仅允许管理员](无限期)))
 
m (1 revision imported)
 
(One intermediate revision by one other user not shown)
Line 1: Line 1:
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
]]
]]


local is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities and Module:Citation/CS1/Links
local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities


local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
Line 18: Line 17:


local function make_coins_title (title, script)
local function make_coins_title (title, script)
title = has_accept_as_written (title);
if is_set (title) then
if is_set (title) then
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
Line 27: Line 27:
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
else
else
script = ''; -- if not set, make sure script is an empty string
script = ''; -- if not set, make sure script is an empty string
end
end
if is_set (title) and is_set (script) then
if is_set (title) and is_set (script) then
Line 65: Line 65:
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
end
end
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
pages = pages:gsub ('%b<>', ''); -- remove html-like tags; spans are added to <Pages> by utilities.hyphen_to_dash() which should not appear in COinS metadata
return pages;
return pages;
end
end
Line 137: Line 139:
end
end


value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
Line 144: Line 146:
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script
value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
end
end
value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
return value;
return value;
end
end
Line 158: Line 160:
]]
]]


local function COinS (data, class)
local function COinS(data, class)
if 'table' ~= type (data) or nil == next (data) then
if 'table' ~= type(data) or nil == next(data) then
return '';
return '';
end
end
Line 172: Line 174:
-- treat table strictly as an array with only set values.
-- treat table strictly as an array with only set values.
local OCinSoutput = setmetatable ({}, {
local OCinSoutput = setmetatable( {}, {
__newindex = function(self, key, value)
__newindex = function(self, key, value)
if is_set (value) then
if is_set(value) then
rawset (self, #self+1, table.concat {key, '=', mw.uri.encode (remove_wiki_link (value))});
rawset( self, #self+1, table.concat{ key, '=', mw.uri.encode( remove_wiki_link( value ) ) } );
end
end
end
end
});
});
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or
(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn
elseif 'conference' == class then
elseif 'conference' == class then
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
Line 193: Line 196:
end
end
OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only
OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only
if is_set (data.Map) then
OCinSoutput["rft.atitle"] = data.Title; -- 'periodical' article titles
OCinSoutput["rft.atitle"] = data.Map; -- for a map in a periodical
 
else
-- these used only for periodicals
OCinSoutput["rft.atitle"] = make_coins_title (data.Title, data.ScriptTitle);
-- all other 'periodical' article titles
end
-- these used onlu for periodicals
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
OCinSoutput["rft.issue"] = data.Issue;
OCinSoutput["rft.issue"] = data.Issue;
OCinSoutput["rft.pages"] = get_coins_pages (data.Pages); -- also used in book metadata
OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata


elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes
elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes
Line 212: Line 213:
elseif 'conference' == class then -- cite conference when Periodical not set
elseif 'conference' == class then -- cite conference when Periodical not set
OCinSoutput["rft.genre"] = "conference";
OCinSoutput["rft.genre"] = "conference";
OCinSoutput["rft.atitle"] = data.Chapter; -- conference paper as chapter in proceedings (book)
elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
if is_set (data.Chapter) or is_set (data.ScriptChapter) then
if is_set (data.Chapter) then
OCinSoutput["rft.genre"] = "bookitem";
OCinSoutput["rft.genre"] = "bookitem";
OCinSoutput["rft.atitle"] = make_coins_title (data.Chapter, data.ScriptChapter);
OCinSoutput["rft.atitle"] = data.Chapter; -- book chapter, encyclopedia article, interview in a book, or map title
-- book chapter, encyclopedia article, interview in a book, or map title
else
else
if 'map' == class or 'interview' == class then
if 'map' == class or 'interview' == class then
Line 224: Line 225:
end
end
end
end
else --{'AV media', 'AV media notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
OCinSoutput["rft.genre"] = "unknown";
OCinSoutput["rft.genre"] = "unknown";
end
end
OCinSoutput["rft.btitle"] = make_coins_title (data.Title, data.ScriptTitle);
OCinSoutput["rft.btitle"] = data.Title; -- book only
-- book only
OCinSoutput["rft.place"] = data.PublicationPlace; -- book only
OCinSoutput["rft.place"] = data.PublicationPlace; -- book only
OCinSoutput["rft.series"] = data.Series; -- book only
OCinSoutput["rft.series"] = data.Series; -- book only
OCinSoutput["rft.pages"] = get_coins_pages (data.Pages); -- book, journal
OCinSoutput["rft.pages"] = data.Pages; -- book, journal
OCinSoutput["rft.edition"] = data.Edition; -- book only
OCinSoutput["rft.edition"] = data.Edition; -- book only
OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation
OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation
Line 237: Line 237:
else -- cite thesis
else -- cite thesis
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier
OCinSoutput["rft.title"] = make_coins_title (data.Title, data.ScriptTitle);
OCinSoutput["rft.title"] = data.Title; -- dissertation (also patent but that is not yet supported)
-- dissertation (also patent but that is not yet supported)
OCinSoutput["rft.degree"] = data.Degree; -- dissertation only
OCinSoutput["rft.degree"] = data.Degree; -- dissertation only
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
end
end
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
-- and now common parameters (as much as possible)
-- and now common parameters (as much as possible)
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
 
for k, v in pairs (data.ID_list) do -- what to do about these? For now assume that they are common to all?
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
if k == 'ISBN' then v = v:gsub ("[^-0-9X]", ""); end
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
local id = cfg.id_handlers[k].COinS;
local id = cfg.id_handlers[k].COinS;
if string.sub (id or "", 1, 4) == 'info' then -- for ids that are in the info:registry
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
OCinSoutput["rft_id"] = table.concat {id, "/", v};
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
elseif string.sub (id or "", 1, 3) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords
OCinSoutput[id] = v;
OCinSoutput[ id ] = v;
elseif id then -- when cfg.id_handlers[k].COinS is not nil
elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=
OCinSoutput["rft_id"] = table.concat {cfg.id_handlers[k].prefix, v};-- others; provide a url
OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});
elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
end
end
end
end


local last, first;
local last, first;
for k, v in ipairs (data.Authors) do
for k, v in ipairs( data.Authors ) do
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters
if k == 1 then -- for the first author name only
if k == 1 then -- for the first author name only
if is_set (last) and is_set (first) then -- set these COinS values if |first= and |last= specify the first author name
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
Line 269: Line 271:
else -- for all other authors
else -- for all other authors
if is_set(last) and is_set(first) then
if is_set(last) and is_set(first) then
OCinSoutput["rft.au"] = table.concat {last, ", ", first}; -- book, journal, dissertation
OCinSoutput["rft.au"] = table.concat{ last, ", ", first }; -- book, journal, dissertation
elseif is_set(last) then
elseif is_set(last) then
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
end
end
-- TODO: At present we do not report "et al.". Add anything special if this condition applies?
end
end
end
end


OCinSoutput.rft_id = data.URL;
OCinSoutput.rft_id = data.URL;
OCinSoutput.rfr_id = table.concat {"info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage};
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
OCinSoutput = setmetatable (OCinSoutput, nil);
 
-- TODO: Add optional extra info:
-- rfr_dat=#REVISION<version> (referrer private data)
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
OCinSoutput = setmetatable( OCinSoutput, nil );
-- sort with version string always first, and combine.
-- sort with version string always first, and combine.
table.sort (OCinSoutput);
-- table.sort( OCinSoutput );
table.insert (OCinSoutput, 1, "ctx_ver=" .. ctx_ver); -- such as "Z39.88-2004"
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
return table.concat (OCinSoutput, "&");
return table.concat(OCinSoutput, "&");
end
end


Line 293: Line 303:
]]
]]


local function set_selected_modules (cfg_table_ptr, utilities_page_ptr, links_page_ptr)
local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
cfg = cfg_table_ptr;
cfg = cfg_table_ptr;


is_set = utilities_page_ptr.is_set; -- import functions from selected Module:Citation/CS1/Utilities module
has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module
is_set = utilities_page_ptr.is_set;
in_array = utilities_page_ptr.in_array;
in_array = utilities_page_ptr.in_array;
remove_wiki_link = utilities_page_ptr.remove_wiki_link;
strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
remove_wiki_link = links_page_ptr.remove_wiki_link;
end
end


Line 308: Line 318:


return {
return {
make_coins_title = make_coins_title,
get_coins_pages = get_coins_pages,
COinS = COinS,
COinS = COinS,
set_selected_modules = set_selected_modules,
set_selected_modules = set_selected_modules,
}
}

Latest revision as of 12:17, 23 April 2025

--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- ]]

local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities

local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration


--[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------

Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)

Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings of %27%27...

]]

local function make_coins_title (title, script) title = has_accept_as_written (title); if is_set (title) then title = strip_apostrophe_markup (title); -- strip any apostrophe markup else title = ; -- if not set, make sure title is an empty string end if is_set (script) then script = script:gsub ('^%l%l%s*:%s*', ); -- remove language prefix if present (script value may now be empty string) script = strip_apostrophe_markup (script); -- strip any apostrophe markup else script = ; -- if not set, make sure script is an empty string end if is_set (title) and is_set (script) then script = ' ' .. script; -- add a space before we concatenate end return title .. script; -- return the concatenation end


--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------

Returns a string where all of Lua's magic characters have been escaped. This is important because functions like string.gsub() treat their pattern and replace strings as patterns, not literal strings. ]]

local function escape_lua_magic_chars (argument) argument = argument:gsub("%%", "%%%%"); -- replace % with %% argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters return argument; end


--[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------

Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.

]]

local function get_coins_pages (pages) local pattern; if not is_set (pages) then return pages; end -- if no page numbers then we're done

while true do pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url " if nil == pattern then break; end -- no more URLs pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible end

pages = pages:gsub("[%[%]]", ""); -- remove the brackets pages = pages:gsub("–", "-" ); -- replace endashes with hyphens pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like and the like? pages = pages:gsub ('%b<>', ); -- remove html-like tags; spans are added to <Pages> by utilities.hyphen_to_dash() which should not appear in COinS metadata return pages; end


--[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------

There are three options for math markup rendering that depend on the editor's math preference settings. These settings are at Special:Preferences#mw-prefsection-rendering and are PNG images TeX source MathML with SVG or PNG fallback

All three are heavy with HTML and CSS which doesn't belong in the metadata.

Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings of the last editor to save the page.

This function gets the rendered form of an equation according to the editor's preference before the page is saved. It then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.

When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial value. To replace multipe equations it is necessary to call this function from within a loop.

]=]

local function coins_replace_math_stripmarker (value) local stripmarker = cfg.stripmarkers['math']; local rendering = value:match (stripmarker); -- is there a math stripmarker

if not rendering then -- when value doesn't have a math stripmarker, abandon this test return false, value; end

rendering = mw.text.unstripNoWiki (rendering); -- convert stripmarker into rendered value (or nil? ? when math render error)

if rendering:match ('alt="[^"]+"') then -- if PNG math option rendering = rendering:match ('alt="([^"]+)"'); -- extract just the math text elseif rendering:match ('$%s+.+%s+%$') then -- if TeX math option; $ is legit character that is escapes as \$ rendering = rendering:match ('$%s+(.+)%s+%$') -- extract just the math text elseif rendering:match ('<annotation[^>]+>.+</annotation>') then -- if MathML math option rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>') -- extract just the math text else return false, value; -- had math stripmarker but not one of the three defined forms end

return true, value:gsub (stripmarker, rendering, 1); end


--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------

Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.

2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29

TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible characters table?

]]

local function coins_cleanup (value) local replaced = true; -- default state to get the do loop running

while replaced do -- loop until all math stripmarkers replaced replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation end

value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message

value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content value = value:gsub (''(s?)', "'%1"); -- replace Template:' or Template:'s with simple apostrophe or apostrophe-s value = value:gsub (' ', ' '); -- replace   entity with plain space value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script value = value:gsub ('‍', ); -- remove ‍ entities value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ); -- remove zero-width joiner, zero-width space, soft hyphen end value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space return value; end


--[[--------------------------< C O I N S >--------------------------------------------------------------------

COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.

]]

local function COinS(data, class) if 'table' ~= type(data) or nil == next(data) then return ; end

for k, v in pairs (data) do -- spin through all of the metadata parameter values if 'ID_list' ~= k and 'Authors' ~= k then -- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed) data[k] = coins_cleanup (v); end end

local ctx_ver = "Z39.88-2004";

-- treat table strictly as an array with only set values. local OCinSoutput = setmetatable( {}, { __newindex = function(self, key, value) if is_set(value) then rawset( self, #self+1, table.concat{ key, '=', mw.uri.encode( remove_wiki_link( value ) ) } ); end end });

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then -- set genre according to the type of citation template we are rendering OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn elseif 'conference' == class then OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) elseif 'web' == class then OCinSoutput["rft.genre"] = "unknown"; -- cite web (when Periodical set) else OCinSoutput["rft.genre"] = "article"; -- journal and other 'periodical' articles end OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only OCinSoutput["rft.atitle"] = data.Title; -- 'periodical' article titles

-- these used only for periodicals OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc. OCinSoutput["rft.chron"] = data.Chron; -- free-form date components OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books OCinSoutput["rft.issue"] = data.Issue; OCinSoutput['rft.artnum'] = data.ArticleNumber; -- Template:Cite journal only OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata

elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"; -- book metadata identifier if 'report' == class or 'techreport' == class then -- cite report and cite techreport OCinSoutput["rft.genre"] = "report"; elseif 'conference' == class then -- cite conference when Periodical not set OCinSoutput["rft.genre"] = "conference"; OCinSoutput["rft.atitle"] = data.Chapter; -- conference paper as chapter in proceedings (book) elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then if is_set (data.Chapter) then OCinSoutput["rft.genre"] = "bookitem"; OCinSoutput["rft.atitle"] = data.Chapter; -- book chapter, encyclopedia article, interview in a book, or map title else if 'map' == class or 'interview' == class then OCinSoutput["rft.genre"] = 'unknown'; -- standalone map or interview else OCinSoutput["rft.genre"] = 'book'; -- book and encyclopedia end end else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} OCinSoutput["rft.genre"] = "unknown"; end OCinSoutput["rft.btitle"] = data.Title; -- book only OCinSoutput["rft.place"] = data.PublicationPlace; -- book only OCinSoutput["rft.series"] = data.Series; -- book only OCinSoutput["rft.pages"] = data.Pages; -- book, journal OCinSoutput["rft.edition"] = data.Edition; -- book only OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation

else -- cite thesis OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier OCinSoutput["rft.title"] = data.Title; -- dissertation (also patent but that is not yet supported) OCinSoutput["rft.degree"] = data.Degree; -- dissertation only OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation end -- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx" -- and now common parameters (as much as possible) OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation

for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end local id = cfg.id_handlers[k].COinS; if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry OCinSoutput["rft_id"] = table.concat{ id, "/", v }; elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords OCinSoutput[ id ] = v; elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol= OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label}); elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or , "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers) end end

local last, first; for k, v in ipairs( data.Authors ) do last, first = coins_cleanup (v.last), coins_cleanup (v.first or ); -- replace any nowiki stripmarkers, non-printing or invisible characters if k == 1 then -- for the first author name only if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation elseif is_set(last) then OCinSoutput["rft.au"] = last; -- book, journal, dissertation -- otherwise use this form for the first name end else -- for all other authors if is_set(last) and is_set(first) then OCinSoutput["rft.au"] = table.concat{ last, ", ", first }; -- book, journal, dissertation elseif is_set(last) then OCinSoutput["rft.au"] = last; -- book, journal, dissertation end -- TODO: At present we do not report "et al.". Add anything special if this condition applies? end end

OCinSoutput.rft_id = data.URL; OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };

-- TODO: Add optional extra info: -- rfr_dat=#REVISION<version> (referrer private data) -- ctx_id=<data.RawPage>#<ref> (identifier for the context object) -- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd) -- ctx_enc=info:ofi/enc:UTF-8 (character encoding)

OCinSoutput = setmetatable( OCinSoutput, nil );

-- sort with version string always first, and combine. -- table.sort( OCinSoutput ); table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" return table.concat(OCinSoutput, "&"); end


--[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------

Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.

]]

local function set_selected_modules (cfg_table_ptr, utilities_page_ptr) cfg = cfg_table_ptr;

has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module is_set = utilities_page_ptr.is_set; in_array = utilities_page_ptr.in_array; remove_wiki_link = utilities_page_ptr.remove_wiki_link; strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup; end


--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ ]]

return { make_coins_title = make_coins_title, get_coins_pages = get_coins_pages, COinS = COinS, set_selected_modules = set_selected_modules, }