Module:Webarchive/data

From Wooly Moon Books
Revision as of 09:09, 20 May 2022 by w>GreenC (add ghost archive)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:Webarchive/data/doc

--[[--------------------------< C O N F I G U R A T I O N >----------------------------------------------------

global configuration settings

]]

local config = {
	maxurls = 10,																-- Max number of URLs allowed. 
	tname = 'Webarchive',														-- name of calling template. Change if template rename.
	verifydates = true,															-- See documentation. Set false to disable.
	}


--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------

List of namespaces that should not be included in citation error categories.

Note: Namespace names should use underscores instead of spaces.

]]

local uncategorized_namespaces = {												-- same list as specified at [[Module:Citation/CS1/Configuration]]
	['User']=true, ['Talk']=true, ['User_talk']=true, ['Wikipedia_talk']=true, ['File_talk']=true,
	['Template_talk']=true, ['Help_talk']=true, ['Category_talk']=true, ['Portal_talk']=true,
	['Book_talk']=true, ['Draft_talk']=true, ['Module_talk']=true,
	['MediaWiki_talk']=true,
	}

local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases'};				-- list of Lua patterns found in page names of pages we should not categorize

local excepted_pages = {														-- these pages will be categorized if set true; set to nil to disable
	['Module talk:Webarchive/testcases'] = true,								-- test cases pages used during development
	['Template:Webarchive/testcases/Production'] = true,
	}


--[[--------------------------< C A T E G O R I E S >----------------------------------------------------------

this is a table of all categories supported by Module:Webarchive

]]

local categories = {
	archiveis = 'Category:Webarchive template archiveis links',
	error = 'Category:Webarchive template errors',
	other = 'Category:Webarchive template other archives',
	unknown = 'Category:Webarchive template unknown archives',
	warning = 'Category:Webarchive template warnings',
	wayback = 'Category:Webarchive template wayback links',
	webcite = 'Category:Webarchive template webcite links',
	}


--[[--------------------------< P R E F I X E S >--------------------------------------------------------------

used only with serviceName(), this table holds the two generic tail-text prefixes specified by services['<service name>'][1]

]]

local prefixes = {
	at = 'at',
	atthe = 'at the',
	}


--[=[-------------------------< S E R V I C E S >--------------------------------------------------------------

this is a table of tables for archive services.  Each service table has:
	[1]=prefix; may be boolean true or false, or text string where:
		true indicates that the prefix is taken from prefixes.atthe
		false indicates that the prefix is taken from prefixes.at
		'text string' is used in lieu of the typical 'at' or 'at the' prefix
	[2]=wikilink target article that describes the service; set to nil if not used
	[3]=wikilink label; the label in [[target|label]]; set to nil if not used; when there is not article ([2] is nil) use this to name the service; see wikiwix in the table
	[4]=service ID; set to nil if not used
	[5]=tracking category key from the categories table; set to nil if not used
	[6]=postfix; text string to be appended at the end of the tail string - see webarchive.loc.gov in the table

]=]

local services = {
	['archive.ec'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.fo'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.is'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.li'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.md'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.org'] = {true, 'Wayback Machine', nil, 'wayback', categories.wayback},
	['archive.ph'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.today'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive.vn'] = {false, 'archive.today', nil, 'archiveis', categories.archiveis},
	['archive-it.org'] = {false, 'Archive-It', nil, 'archiveit'},
	['arquivo.pt'] = {true, nil, 'Portuguese Web Archive'},
	['bibalex.org'] = {false, 'Bibliotheca Alexandrina#Internet Archive partnership', 'Bibliotheca Alexandrina'},
	['collectionscanada'] = {true, 'Canadian Government Web Archive'},
	['europarchive.org'] = {true, 'National Library of Ireland'},
	['freezepage.com'] = {false, nil, 'Freezepage'},
	['ghostarchive.org'] = {false, nil, 'Ghost Archive'},
	['haw.nsk'] = {true, 'Croatian Web Archive (HAW)'},
	['langzeitarchivierung.bib-bvb.de'] = {false, 'Bavarian State Library'},
	['loc.gov'] = {true, 'Library of Congress'},
	['nationalarchives.gov.uk'] = {true, 'UK Government Web Archive', nil, 'ukgwa'},
	['nlb.gov.sg'] = {false, 'Web Archive Singapore'},
	['parliament.uk'] = {true, 'UK Parliament\'s Web Archive'},
	['perma.cc'] = {false, 'Perma.cc'},
	['perma-archives.cc'] = {false, 'Perma.cc'},
	['proni.gov'] = {true, 'Public Record Office of Northern Ireland'},
	['screenshots.com'] = {false, nil, 'Screenshots'},
	['stanford.edu'] = {true, 'Stanford University Libraries', 'Stanford Web Archive'},
	['timetravel.mementoweb.org'] = {false, 'Memento Project'},
	['uni-lj.si'] = {true, nil, 'Slovenian Web Archive'},
	['veebiarhiiv.digar.ee'] = {true, nil, 'Estonian Web Archive'},
	['vefsafn.is'] = {true, 'National and University Library of Iceland'},
	['webarchive.bac-lac.gc.ca'] = {false, 'Library and Archives Canada'},
	['webarchive.loc.gov'] = {true, 'Library of Congress', nil, 'locwebarchives', nil, 'Web Archives'},
	['webarchive.nla.gov.au'] = {true, 'Australian Web Archive'},
	['webarchive.org.uk'] = {true, 'UK Web Archive'},
	['webcache.googleusercontent.com'] = {false, nil, 'Google Cache'},
	['webcitation.org'] = {false, 'WebCite', nil, 'webcite', categories.webcite},
	['webharvest.gov'] = {true, 'National Archives and Records Administration'},
	['webrecorder.io'] = {false, 'webrecorder.io'},
	['wikiwix.com'] = {false, nil, 'Wikiwix'},
	['yorku.ca'] = {false, 'York University Libraries', 'York University Digital Library'},
	}


--[[--------------------------< S T A T I C   T E X T >--------------------------------------------------------

for internationalzation

]]

local s_text = {
	addlarchives = 'Additional archives',
	addlpages = 'Additional pages archived&nbsp;on',							-- TODO why the &nbsp; there? replace with regular space?
	Archive_index = 'Archive index',
	Archived = 'Archived',
	archived = 'archived',
	archive = 'archive',
	Page = 'Page',
	}


--[[--------------------------< E R R _ W A R N _ M S G S >----------------------------------------------------

these tables hold error and warning message text

]]

local err_warn_msgs = {
	date_err = '(Date error)',													-- decodeWebciteDate, decodeWaybackDate, decodeArchiveisDate
	date_miss = '(Date missing)',												-- parseExtraArgs
	ts_short = '(Timestamp date length)',										-- decodeWaybackDate timestamp less than 8 digits
	ts_date = '(Timestamp date invalid)',										-- decodeWaybackDate timestamp not a valid date
	unknown_url = '(Error: unknown archive URL)',								-- serviceName
	unnamed_params = '(Positional parameters ignored)',

--warnings
	mismatch = '<sup>(Date mismatch)</sup>',									-- webarchive
	ts_len = '<sup>(Timestamp length)</sup>',									-- decodeWaybackDate, decodeArchiveisDate timestamp not 14 digits
	ts_cal = '<sup>(Calendar)</sup>',											-- decodeWaybackDate timestamp has trailing splat
	}


local crit_err_msgs = {															-- critical error messages
	conflicting = 'Conflicting |$1= and |$2=',
	empty = 'Empty url',
--	iabot1 = 'https://web.http',												-- TODO: these iabot bugs perportedly fixed; removing these causes lua script error
--	iabot2 = 'Invalid URL',														-- at Template:Webarchive/testcases/Production; resolve that before deleting these messages
	invalid_url = 'Invalid URL',
	ts_nan = 'Timestamp not a number',
	unknown = 'Unknown problem. Please report on template talk page',
	}



--[[--------------------------< D A T E   I N T E R N A T I O N A L I Z A T I O N >----------------------------

these tables hold data that is used when converting date formats from non-English languages (because mw.language.getContentLanguage:formatDate()
doesn't understand non-English month names)

]]

local month_num = {																-- retain English language names even though they may not be strictly required on the local wiki
	['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12,
	['Jan'] = 1, ['Feb'] = 2, ['Mar'] = 3, ['Apr'] = 4, ['May'] = 5, ['Jun'] = 6, ['Jul'] = 7, ['Aug'] = 8, ['Sep'] = 9, ['Oct'] = 10, ['Nov'] = 11, ['Dec'] = 12,
-- add local wiki month-names to number translation here
--	[''] = 1, [''] = 2, [''] = 3, [''] = 4, [''] = 5, [''] = 6, [''] = 7, [''] = 8, [''] = 9, [''] = 10, [''] = 11, [''] = 12,
	};

																				-- when the local wiki uses non-western digits in dates, local wiki digits must be
																				-- translated to western digits; lua only understands western digits
local digits = {																-- use this table to aid translation
--	[''] = 0, [''] = 1, [''] = 2, [''] = 3, [''] = 4, [''] = 5, [''] = 6, [''] = 7, [''] = 8, [''] = 9,	-- fill these table indexes with local digits
	enable = false																-- set to true to enable local-digit to western-digit translation
	};


--[[--------------------------< P A R A M E T E R   I N T E R N A T I O N A L I Z A T I O N >------------------

this table holds tables of parameter names and their non-English aliases.  In the enum_params table '#' is a single
character placeholder for 1 or more digit characters

parameter names in this table shall be lowercase
]]

local params = {
	['url'] = {'url'},
	['date'] = {'date', 'datum'},
	['title'] = {'title', 'titel'},
	['nolink'] = {'nolink'},
	['format'] = {'format'}
	}

local enum_params = {
	['url#'] = {'url#'},
	['date#'] = {'date#', 'datum#'},
	['title#'] = {'title#', 'titel#'},
	}

local format_vals = {															-- |format= accepts two values; add local language variants here
	['addlpages'] = {'addlpages'},
	['addlarchives'] = {'addlarchives'},
	}


--[[--------------------------< E X P O R T E D   T A B L E S >------------------------------------------------
]]

return {
	categories = categories,
	config = config,
	crit_err_msgs = crit_err_msgs,
	digits = digits,
	enum_params = enum_params,
	err_warn_msgs = err_warn_msgs,
	excepted_pages = excepted_pages,
	format_vals = format_vals,
	month_num = month_num,
	params = params,
	prefixes = prefixes,
	services = services,
	s_text = s_text,
	uncategorized_namespaces = uncategorized_namespaces,
	uncategorized_subpages = uncategorized_subpages,
	}