Module:Webarchive

Documentation for this module may be created at Module:Webarchive/doc

--[[ ----------------------------------
  Mòdulu Lua chi implmentat su template {{Webarchive}} e permitet de
  detzifrare sa data in sos URL de carchi archìviu fintzas in àteros mòdulos.
	]]

require('Module:No globals')
local getArgs = require('Module:Arguments').getArgs
local cfg = mw.loadData('Module:Webarchive/Configuratzione')

local p = {}
local track = {}   -- array assotziativu pro ponnere in paris sas categorias de arrastamentu
local maxurls = 10 -- nùmeru màssimu de URL permitidos
local encoded_date

--[[--------------------------< inlineError >-----------------------
	Errore crìticu. Formatat s'output totu in ruju. Annanghet una categoria de arrastamentu.
 ]]
local function inlineError(arg, msg)
	track['Categoria:Errores de compilatzione de su template Webarchive'] = 1
	return '<span style="font-size:100%" class="error citation-comment">Errore de compilatzione de su template : controllare su valore de <code style="color:inherit; border:inherit; padding:inherit;">&#124;' .. arg .. '=</code> (' .. msg .. ').</span>'
end

--[[--------------------------< inlineRed >-----------------------
	Formatat unu frammentu de testu in ruju, pro esèmpiu un avisu de integrare in s'output finale.
	Annanghet una categoria de arrastamentu.
 ]]
local function inlineRed(msg, trackmsg)
	if trackmsg == 'warning' then
		track['Categoria:Errores de compilatzione de su template Webarchive - Avvisi'] = 1
	elseif trackmsg == 'error' then
		track['Categoria:Errores de compilatzione de su template Webarchive'] = 1
	end
	return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
end


--[[--------------------------< base62 >-----------------------
	Cunvertet base-62 in base-10
	Crèditos: https://de.wikipedia.org/wiki/Modul:Expr
	]]
local function base62( value )
	local r = 1
	if value:match('^%w+$') then
		local n = #value
		local k = 1
		local c
		r = 0
		for i = n, 1, -1 do
			c = value:byte( i, i )
			if c >= 48 and c <= 57 then
				c = c - 48
			elseif c >= 65 and c <= 90 then
				c = c - 55
			elseif c >= 97 and c <= 122 then
				c = c - 61
			else
				r = 1
				break
			end
			r = r + c * k
			k = k * 62
		end
	end
	return r
end

--[[--------------------------< tableLength >-----------------------
	Frunit su nùmeru de elementos in una tabella
	]]
local function tableLength(t)
	local count = 0
	for _ in pairs(t) do count = count + 1 end
	return count
end

--[[--------------------------< formatDate >-----------------------
	Verificat su formadu de una data (dmy o iso) e si lu riconnoschet
	la riformatat in dmy, si nono frunit su valore gasi coment'est
	]]
local function formatDate(date)
	local y, m, d
	local try_year
	local split = mw.text.split(date, '-')
	if tableLength(split) == 3 then
		try_year = tonumber(split[1])
		if try_year and try_year > 1900 and try_year < 2200 then -- iso
			y, m, d = split[1], cfg.month_localized[tonumber(split[2])], split[3]
		end
	else
		split = mw.text.split(date, ' ')
		if tableLength(split) == 3 then
			try_year = tonumber(split[3])
			if try_year and try_year > 1900 and try_year < 2200 and
					(split[1] == '1º' or tonumber(split[1])) then -- dmy
				d, m, y = split[1], split[2], split[3]
			end
		end
	end
	d = tonumber(d) or d
	if d == 1 then d = '1º' end
	return m and mw.ustring.format('%s %s %s', d, m, y) or date
end

--[[--------------------------< formatUrlDate >-----------------------
	Controllat sa data prelevada automaticamente dae s'url de un'archìviu.
	Si est vàlida la formatat in dmy, si nono frunit nil.
 ]]
local function formatUrlDate(y, m, d)
	local current_year = tonumber(os.date('%Y'))
	y, m, d = tonumber(y), tonumber(m), tonumber(d)
	if not y or y == '' or not m or m == '' or not d or d == '' or d > 31 or
			m < 1 or m > 12 or y < 1900 or y > current_year then
		return nil
	end
	m = cfg.month_localized[m]
	if d == 1 then d = '1º' end
	return mw.ustring.format('%s %s %s', d, m, y)
end

--[[--------------------------< decodeWebciteDate >-----------------------
	Ricavat sa data dae un'URI-path a Webcite (es. /67xHmVFWP)
	]]
local function decodeWebciteDate(path)
	local path_elements = mw.text.split(path, '/')

	-- formados URL vàlidos chi non sunt base62:

	-- http://www.webcitation.org/query?id=1138911916587475
	-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
	-- http://www.webcitation.org/1138911916587475
	-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
	-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e

	if not path_elements[2] or path_elements[2] == '' then
		return
	elseif mw.ustring.find(path_elements[2], 'query') or
			mw.ustring.find(path_elements[2], 'cache') or
			mw.ustring.find(path_elements[2], 'getfile') or
			tonumber(path_elements[2]) then
		encoded_date = false
		return
	end
	local snapdate = os.date('%Y %m %d', string.sub(string.format('%d', base62(path_elements[2])),1,10))
	local dt = mw.text.split(snapdate, ' ')
	local fulldate = formatUrlDate(dt[1], dt[2], dt[3])
	return fulldate
end

--[[--------------------------< snapDateToString >-----------------------
	Ricava sa data da un'URI-path a Wayback (es. /web/20160901010101/http://example.com ).
	Gestit fintzas sa non tzifras comente "re_", "-" e "*".
 ]]
local function decodeWaybackDate(path)
	local snapdate = string.gsub(path, '^/all/', '') -- bogat sa secuèntzia de incumintzu "/all/"
	snapdate = string.gsub(snapdate, '^/w?e?b?/?', '') -- bogat sa secuèntzia de incumintzu "/web/" o "/"
	local path_elements = mw.text.split(snapdate, '/')
	snapdate = path_elements[1]
	if snapdate == '*' then return end
	snapdate = string.gsub(snapdate, '[a-z][a-z]_[0-9]?$', '')
	snapdate = string.gsub(snapdate, '[-]', '')
	snapdate = string.gsub(snapdate, '[*]$', '')
	local fulldate
	if tonumber(snapdate) and string.len(snapdate) >= 8 then
		local year = string.sub(snapdate, 1, 4)
		local month = string.sub(snapdate, 5, 6)
		local day = string.sub(snapdate, 7, 8)
		fulldate = formatUrlDate(year, month, day)
	end
	return fulldate
end

--[[--------------------------< decodeArchiveisDate >-----------------------
	Ricava sa data dae s'URI-path de unu ligàmene estesu a Archive.is (es. /2016.08.28-144552/http://example.com).
	Gestit "." e "-" in sa data, cunsiderende 2016.08.28-144552 comente a 20160828144552.
  ]]
local function decodeArchiveisDate(path)
    local path_elements = mw.text.split(path, '/')
    local snapdate = path_elements[2]
    if not path_elements[2] or path_elements[2] == '' then return end
    snapdate = string.gsub(snapdate, '[%.%-]', '')
    if not tonumber(snapdate) then encoded_date = false return end -- formadu ligàmene curtzu
	local fulldate
	if string.len(snapdate) >= 8 then
		local year = string.sub(snapdate, 1, 4)
		local month = string.sub(snapdate, 5, 6)
		local day = string.sub(snapdate, 7, 8)
		fulldate = formatUrlDate(year, month, day)
	end
	return fulldate
 end

--[[--------------------------< serviceName >-----------------------
	Imposta s'istringa de acabu e s'ID de su servìtziu in base a su domìniu
	estraidu da mw.uri.new() (es. web.archive.org)
	]]
local function serviceName(url_data, nolink, notail)
	local tracking = 'Categoria:Template Webarchive - ligàmenes a àteros archìvios'
	local bracketopen, bracketclose = nolink and '' or '[[', nolink and '' or ']]'
	encoded_date = nil -- reset
	for _,servizio in ipairs(cfg.servizi) do
		if string.gsub(url_data.host, 'www%.', '') == servizio.signature then
			url_data.service = servizio.service or 'àteros'
			if not notail and servizio.tailbracket then
				url_data.tail = mw.ustring.format(servizio.tailbracket, bracketopen, bracketclose)
			elseif not notail then
				url_data.tail = servizio.tail
			end
			tracking = servizio.tracking or tracking
			encoded_date = servizio.service and true
			break
		end
	end
	if url_data.service == nil then
		tracking = 'Categoria:Template Webarchive - ligàmenes a archìvios disconnotos'
		url_data.tail = ' su ' .. url_data.host .. ' ' .. inlineRed('URL de servìtziu de archiviatzione disconnotu')
	end
	track[tracking] = 1
end

--[[--------------------------< createTracking >-----------------------
	Frunit sas categorias de arrastamentu insertadas in track[]
	]]
local function createTracking()
	-- protzedet petzi in su namespace 0
	local current_namespace = mw.title.getCurrentTitle().namespace
	if current_namespace ~= 0 then return '' end
	local sand = ''
	if tableLength(track) > 0 then
		for key,_ in pairs(track) do
			sand = sand .. '[[' .. key .. ']]'
		end
	end
	return sand
end

--[[--------------------------< createRendering >-----------------------
	Frunit sa resa de sos datos in url_data[][]
	]]
local function createRendering(url_data)
	local sand
	local day = url_data[1].date and mw.ustring.match(url_data[1].date, '^%d+')
	local article = (day == '8' or day == '11') and 's\'' or 'su '
	if not url_data[1].title and not url_data[1].date then
		sand = mw.ustring.format('[%s Archiviadu]%s.', url_data[1].url, url_data[1].tail)
	elseif not url_data[1].title and url_data[1].date then
		sand = mw.ustring.format('[%s Archiviadu] %s%s%s.', url_data[1].url, article, url_data[1].date, url_data[1].tail)
	elseif url_data[1].title and not url_data[1].date then
		sand = mw.ustring.format('[%s %s]%s.', url_data[1].url, url_data[1].title, url_data[1].tail)
	elseif url_data[1].title and url_data[1].date then
		sand = mw.ustring.format('[%s %s]%s&#32;(archiviadu %s%s).', url_data[1].url, url_data[1].title, url_data[1].tail, article, url_data[1].date)
	else
		return nil
	end
	if #url_data > 1 then -- pro prus URL de archìviu
		sand = sand .. ' Archìvios agiuntivos: '
		local archives_output = {}
		for i = 2, #url_data do
			archives_output[#archives_output+1] = mw.ustring.format('[%s %s]%s%s', url_data[i].url, url_data[i].title or url_data[i].date, url_data[i].title and (' (' .. url_data[i].date .. ')') or '', url_data[i].tail or '')
		end
		sand = sand .. table.concat(archives_output, ', ') .. '.'
	end
	return sand
end

--[[--------------------------------------------------------------------
	Entry point pro mutida direta dae unu mòdulu.
	Retzit s'URL de un'archìviu e ne torrant sa data si resessit a la decodificare.
	]]
function p.decodeArchiveDate(url)
	local uri = mw.uri.new(url)
	local host, path = uri.host, uri.path
	if not url or not host or path == '' then return end
	host = string.gsub(host, 'www%.', '')
	for _, servizio in ipairs(cfg.servizi) do
		if host == servizio.signature then
			if servizio.service == 'wayback' then
				return decodeWaybackDate(path)
			elseif servizio.service == 'webcite' then
				return decodeWebciteDate(path)
			elseif servizio.service == 'archiveis' then
				return decodeArchiveisDate(path)
			end
		end
	end
	return
end

--[[--------------------------------------------------------------------
	Funtzionalidade de interfache printzipale pro s'implementatzione de sos
	Template:Webarchive
	]]
function p.webarchive(frame)
	-- càrriga in args sos paràmetros e si sunt nullos los ignorat, francu chi pro su paràmetru nolink
	local args = getArgs(frame, {
		valueFunc = function(key, value)
			if value then
				if key == 'nolink' then
					return true
				else
					value = mw.text.trim(value)
					if value ~= '' then return value end
				end
			end
			return nil
		end
	})
	local url_data = {}
	local i = 1
	while true do
		local n = i == 1 and args.url and '' or i
		local url = i == 1 and (args.url or args.url1) or args['url' .. i]

		-- verìfica de errores in su paràmetru url
		if i == 1 and not url then
			return inlineError('url', 'bòidu') .. createTracking()
		elseif not url or i > maxurls then
			break
		elseif mw.ustring.find(url, 'https://web.http') then
			track['Categoria:Errores de compilatzione de su template Webarchive'] = 1
			return inlineError('url' .. n, 'https://web.http') .. createTracking()
		elseif url == 'https://web.archive.org/http:/' then
			track['Categoria:Errores de compilatzione de su template Webarchive'] = 1
			return inlineError('url' .. n, 'URL non vàlidu') .. createTracking()
		end
		url_data[i] = {}
		url_data[i].url = url
		url_data[i].uri = mw.uri.new(url)
		url_data[i].host, url_data[i].path = url_data[i].uri.host, url_data[i].uri.path
		if not url_data[i].host or url_data[i].path == '' then
			return inlineError('url' .. n, 'URL non vàlidu') .. createTracking()
		end
		serviceName(url_data[i], args.nolink, i > 1 and true)

		-- gestione de sas datas
		local date = i == 1 and (args.date or args.date1 or args.data or args.data1) or
				args['datas' .. i] or args['data' .. i]
		if date then
			date = formatDate(date)
			local udate = url_data[i].service == 'wayback' and decodeWaybackDate(url_data[i].path) or
					url_data[i].service == 'webcite' and decodeWebciteDate(url_data[i].path) or
					url_data[i].service == 'archiveis' and decodeArchiveisDate(url_data[i].path)
			if udate and udate ~= date then
				date = date .. ' ' .. inlineRed('Sa data in s\'URL non siddat: ' .. udate, 'warning')
			elseif not udate and encoded_date == true then
				date = date .. ' ' .. inlineRed('Sa data in s\'URL non podet èssere detzifrada', 'error')
			end
		elseif url_data[i].service == 'wayback' then
			date = decodeWaybackDate(url_data[i].path)
		elseif url_data[i].service == 'webcite' then
			date = decodeWebciteDate(url_data[i].path)
		elseif url_data[i].service == 'archiveis' then
			date = decodeArchiveisDate(url_data[i].path)
		else
			date = inlineRed('Data chi mancat', 'warning')
		end
		if not date then
			date = encoded_date == false and inlineRed('Data chi mancat', 'warning') or
					inlineRed('Sa data in s\'URL non podet èssere detzifrada', 'error')
		end
		url_data[i].date = date

		-- gestione de su tìtulu
		url_data[i].title = i == 1 and (args.title or args.title1 or args.titolo or args.titolo1) or
				args['title' .. i] or args['tìtulu' .. i]

		i = i + 1
	end
	local rend = createRendering(url_data)
	if not rend then
		track['Categoria:Errores de compilatzione de su template Webarchive'] = 1
		rend = '<span style="font-size:100%" class="error citation-comment">Errores in [[:Template:Webarchive]]: problema disconnotu. Sinnala·lu in sas [[Cuntierras template:Webarchive|pàgina de sas cuntierras]] de su template.</span>'
	end

	return rend .. createTracking()
end

return p