Modul:Webarchive

A Wikipédiából, a szabad enciklopédiából

Webarchive[mi ez?] • [dokumentáció: mutat, szerkeszt] • [tesztek: létrehozás]

--[[ ----------------------------------

     Lua module implementing the {{webarchive}} template. 

       A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
   
  ]]

require('strict')

local p = {}
local track = {}                                        -- Associative array to hold tracking categories

--[[--------------------------< configuration >-----------------------
     Global configuration variables
  ]]
local tname = 'Webarchive'                              -- name of calling template. Change if template rename.
local maxurls = 10                                      -- Max number of URLs allowed.
local plain = nil

--[[--------------------------< inlineError >-----------------------

     Critical error. Render output completely in red. Add to tracking category.

 ]]
local function inlineError(arg, msg)
	return inlineRed('Hiba a webarchive sablonban: Ellenőrizd a(z) <code style="color:inherit; border:inherit; padding:inherit;">&#124;' .. arg .. '=</code> értékét. ' .. msg, 'error')
end

--[[--------------------------< inlineRed >-----------------------

      Render a text fragment in red, such as a warning as part of the final output.
      Add tracking category.

 ]]
local function inlineRed(msg, trackmsg)
	if trackmsg == "warning" then
		track["Kategória:Webarchive sablon figyelmeztetés"] = 1 
	elseif trackmsg == "error" then
		track["Kategória:Webarchive sablon hiba"] = 1 
	end

	return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
end

--[[--------------------------< trimArg >-----------------------

 ]]
local function trimArg(arg)
	if arg == "" or arg == nil then
		return nil
	else
		return mw.text.trim(arg)
	end
end

--[[--------------------------< base62 >-----------------------

     Convert base-62 to base-10
     Credit: https://de.wikipedia.org/wiki/Modul:Expr 

  ]]

local function base62( value )
	local r = 1

	if value:match( "^%w+$" ) then
		local n = #value
		local k = 1
		local c
		r = 0
		for i = n, 1, -1 do
			c = value:byte( i, i )
			if c >= 48 and c <= 57 then
				c = c - 48
			elseif c >= 65 and c <= 90 then
				c = c - 55
			elseif c >= 97 and c <= 122 then
				c = c - 61
			else    -- How comes?
				r = 1
				break    -- for i
			end
			r = r + c * k
			k = k * 62
		end -- for i
	end
	return r
end


--[[--------------------------< makeDate >-----------------------

     Given an ISO or spellt-out DMY/MDY date, format it in Hungarian.

 ]]

local function makeDate(date)
	assert(type(date) == 'string', 'makeDate(): string expected, got ' .. type(date))
	if date:match('%a+ %d+, %d+') or date:match('%d+ %a+ %d+') then
		date = mw.getContentLanguage():formatDate('Y-m-d', date)
	elseif date:match('%a+ %d+') then
		date = mw.getContentLanguage():formatDate('Y-m', date)
	end
	local split = mw.text.split(date, '-')
	local year = split[1]
	local month = split[2] or ''
	local day = split[3] or ''

	local zmonth = month                                                      -- month with leading 0
	month = month:match("0*(%d+)")                                            -- month without leading 0
	if not month or tonumber(month) < 1 or tonumber(month) > 12 then
		return year
	end

	local zday = day
	day = zday:match("0*(%d+)")
	if not day or tonumber(day) < 1 or tonumber(day) > 31 then
		return mw.getContentLanguage():formatDate('Y. F', year .. '-' .. zmonth .. '-01')
	end

	return mw.getContentLanguage():formatDate('Y. F j.', year .. '-' .. zmonth .. '-' .. zday)
end

--[[------------------------------< dateI >---------------------------

     Add the appropriate suffix to the date, e.g. 2016-os, 2016. szeptemberi, 2016. szeptember 1-ji

  ]]
local function dateI(date)
	assert(type(date) == 'string', 'dateI(): string expected, got ' .. type(date))
	if date:match('^%d+$') then
		return require('Modul:Nyelvtani modul').hanyas(date)
	elseif date:match('^%d+%. [a-záéíóöőúüű]+$') then
		return date .. 'i'
	elseif date:match('^%d+%. [a-záéíóöőúüű]+ %d+%.') then
		if date:match(' 1%.$') then
			return date:gsub('%.$', '-ji')
		else
			return date:gsub('%.$', '-i')
		end
	else
		return date
	end
end

--[[--------------------------< decodeWebciteDate >-----------------------

      Given a URI-path to Webcite (eg. /67xHmVFWP) return the formatted date

  ]]
local function decodeWebciteDate(path)
	local dt = {}
	dt.split = {}

	dt.split = mw.text.split(path, "/")

	-- valid URL formats that are not base62

	-- http://www.webcitation.org/query?id=1138911916587475
	-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
	-- http://www.webcitation.org/1138911916587475
	-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
	-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e

	if
		mw.ustring.find( dt.split[2], "query", 1, plain) or 
		mw.ustring.find( dt.split[2], "cache", 1, plain) or
		mw.ustring.find( dt.split[2], "getfile", 1, plain) or
		tonumber(dt.split[2])
	then
		return "query"
	end

	dt.full = os.date("%Y-%m-%d", string.sub(string.format("%d", base62(dt.split[2])),1,10) )
	dt.split = mw.text.split(dt.full, "-")
	dt.year = dt.split[1]
	dt.month = dt.split[2]
	dt.day = dt.split[3]

	if not tonumber(dt.year) or not tonumber(dt.month) or not tonumber(dt.day) then
		return inlineRed("[Dátumhiba] (1)", "error")
	end

	if tonumber(dt.month) > 12 or tonumber(dt.day) > 31 or tonumber(dt.month) < 1 then
		return inlineRed("[Dátumhiba] (2)", "error")
	end
	if tonumber(dt.year) > tonumber(os.date("%Y")) or tonumber(dt.year) < 1900 then
		return inlineRed("[Dátumhiba] (3)", "error")
	end

	local result, fulldate = pcall(makeDate, dt.full)
	if not result then
		return inlineRed("[Dátumhiba] (4)", "error")
	else
		return fulldate
	end
end

--[[--------------------------< snapDateToString >-----------------------

Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
  return the formatted date eg. "2016. szeptember 1."
  Handle non-digits in snapshot ID such as "re_" and "-" and "*"

 ]]

local function decodeWaybackDate(path)
	local snapdate, snapdatelong, currdate

	local safe = path
	snapdate = string.gsub(safe, "^/w?e?b?/?", "")                      -- Remove leading "/web/" or "/"
	safe = snapdate
	local N = mw.text.split(safe, "/")
	snapdate = N[1]
	if snapdate == "*" then                                             -- eg. /web/*/http..
		return "index"
	end
	safe = snapdate
	snapdate = string.gsub(safe, "[a-z][a-z]_[0-9]?$", "")              -- Remove any trailing "re_" from date 
	safe = snapdate
	snapdate = string.gsub(safe, "[-]", "")                             -- Remove dashes from date eg. 2015-01-01 
	safe = snapdate
	snapdate = string.gsub(safe, "[*]$", "")                            -- Remove trailing "*" 

	if not tonumber(snapdate) then
		return inlineRed("[Dátumhiba] (2)", "error")
	end
	local dlen = string.len(snapdate)
	if dlen < 4 then
		return inlineRed("[Dátumhiba] (3)", "error")
	end
	if dlen < 14 then
		snapdatelong = snapdate .. string.rep("0", 14 - dlen)
	else
		snapdatelong = snapdate
	end
	local year = string.sub(snapdatelong, 1, 4)
	local month = string.sub(snapdatelong, 5, 6)
	local day = string.sub(snapdatelong, 7, 8)
	if not tonumber(year) or not tonumber(month) or not tonumber(day) then
		return inlineRed("[Dátumhiba] (4)", "error")
	end
	if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then
		return inlineRed("[Dátumhiba] (5)", "error")
	end
	currdate = os.date("%Y")
	if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then
		return inlineRed("[Dátumhiba] (6)", "error")
	end

	local result, fulldate = pcall(makeDate, year .. '-' .. month .. '-' .. day)
	if not result then
		return inlineRed("[Dátumhiba] (7)", "error")
	else
		return fulldate
	end
end


--[[--------------------------< serviceName >-----------------------

     Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID

  ]]

local function serviceName(url, nolink)
	local tracking = "Kategória:Webarchive sablon egyéb archívumokkal"
	local host = url.host

	local bracketopen = "[["
	local bracketclose = "]]"
	if nolink then
		bracketopen = ""
		bracketclose = ""
	end

	url.service = "other"
	url.tail = " a(z) " .. url.host .. " archívumban " .. inlineRed("Hiba: ismeretlen archívum-URL")

	if mw.ustring.find( host, "archive.org", 1, plain ) then
		url.service = "wayback"
		url.tail = " a " .. bracketopen .. "Wayback Machine" .. bracketclose .. "-ben"
		tracking = "Kategória:Webarchive sablon Wayback Machine linkkel"
	elseif mw.ustring.find( host, "webcitation.org", 1, plain ) then
		url.service = "webcite"
		url.tail = " a " .. bracketopen .. "WebCite" .. bracketclose .. "-on"
		tracking = "Kategória:Webarchive sablon WebCite linkkel"
	elseif
		mw.ustring.find( host, "archive.is", 1, plain ) or
		mw.ustring.find( host, "archive.fo", 1, plain ) or
		mw.ustring.find( host, "archive.today", 1, plain ) or
		mw.ustring.find( host, "archive.il", 1, plain ) or
		mw.ustring.find( host, "archive.ec", 1, plain )
	then
		url.service = "archiveis"
		url.tail = " az " .. bracketopen .. "Archive.is" .. bracketclose .. "-en"
		tracking = "Kategória:Webarchive sablon archiveis linkkel"
	elseif mw.ustring.find( host, "archive[-]it.org", 1, plain ) then
		url.service = "archiveit"
		url.tail = " az " .. bracketopen .. "Archive-It" .. bracketclose .. "en"
	elseif mw.ustring.find( host, "arquivo.pt", 1, plain) then
		url.tail = " at the " .. "Portuguese Web Archive" 
	elseif mw.ustring.find( host, "loc.gov", 1, plain ) then
		url.tail = " at the " .. bracketopen .. "Library of Congress" .. bracketclose
	elseif mw.ustring.find( host, "webharvest.gov", 1, plain ) then
		url.tail = " at the " .. bracketopen .. "National Archives and Records Administration" .. bracketclose
	elseif mw.ustring.find( host, "bibalex.org", 1, plain ) then
		url.tail = " at " .. "[[Bibliotheca_Alexandrina#Internet_Archive_partnership|Bibliotheca Alexandrina]]"
	elseif mw.ustring.find( host, "collectionscanada", 1, plain ) then
		url.tail = " at the " .. "Canadian Government Web Archive"
	elseif mw.ustring.find( host, "haw.nsk", 1, plain ) then
		url.tail = " at the " .. "Croatian Web Archive (HAW)"
	elseif mw.ustring.find( host, "veebiarhiiv.digar.ee", 1, plain ) then
		url.tail = " at the " .. "Estonian Web Archive"
	elseif mw.ustring.find( host, "vefsafn.is", 1, plain ) then
		url.tail = " at the " .. "[[National and University Library of Iceland]]"
	elseif mw.ustring.find( host, "proni.gov", 1, plain ) then
		url.tail = " at the " .. bracketopen .. "Public Record Office of Northern Ireland" .. bracketclose
	elseif mw.ustring.find( host, "uni[-]lj.si", 1, plain ) then
		url.tail = " at the " .. "Slovenian Web Archive"
	elseif mw.ustring.find( host, "stanford.edu", 1, plain ) then
		url.tail = " at the " .. "[[Stanford University Libraries|Stanford Web Archive]]"
	elseif mw.ustring.find( host, "nationalarchives.gov.uk", 1, plain ) then
		url.tail = " at the " .. bracketopen .. "UK Government Web Archive" .. bracketclose
	elseif mw.ustring.find( host, "parliament.uk", 1, plain ) then
		url.tail = " at the " .. bracketopen .. "UK Parliament's Web Archive" .. bracketclose
	elseif mw.ustring.find( host, "webarchive.org.uk", 1, plain ) then
		url.tail = " at the " .. bracketopen .. "UK Web Archive" .. bracketclose
	elseif mw.ustring.find( host, "nlb.gov.sg", 1, plain ) then
		url.tail = " at " .. "Web Archive Singapore" 
	elseif mw.ustring.find( host, "pandora.nla.gov.au", 1, plain ) then
		url.tail = " at " .. bracketopen .. "Pandora Archive" .. bracketclose 
	elseif mw.ustring.find( host, "perma.cc", 1, plain ) then
		url.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose
	elseif mw.ustring.find( host, "perma-archives.cc", 1, plain ) then
		url.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose
	elseif mw.ustring.find( host, "screenshots.com", 1, plain ) then
		url.tail = " at Screenshots" 
	elseif mw.ustring.find( host, "wikiwix.com", 1, plain ) then
		url.tail = " at Wikiwix" 
	elseif mw.ustring.find( host, "freezepage.com", 1, plain ) then
		url.tail = " at Freezepage" 
	elseif mw.ustring.find( host, "webcache.googleusercontent.com", 1, plain ) then
		url.tail = " at Google Cache" 
	else
		tracking = "Kategória:Webarchive sablon ismeretlen archívummal"
	end

	track[tracking] = 1

	return url
end

--[[--------------------------< parseExtraArgs >-----------------------

     Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
       For example: {{webarchive |url=.. |url4=.. |url7=..}}
         Three url arguments not in numeric sequence (1..4..7). 
         Function only processes arguments numbered 2 or greater (in this case 4 and 7)
         It creates numeric sequenced table entries like:
           urlx.url2.url = <argument value for url4>
           urlx.url3.url = <argument value for url7>
       Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")

 ]]

local function parseExtraArgs(args)
	local i, j, argurl, argurl2, argdate, argtitle
	local ulx = {}

	j = 2
	for i = 2, maxurls do
		argurl = "url" .. i
		if trimArg(args[argurl]) then
			argurl2 = "url" .. j
			ulx[argurl2] = {}
			ulx[argurl2]["url"] = args[argurl]
			argdate = "date" .. j
			if trimArg(args[argdate]) then
				ulx[argurl2]["date"] = makeDate(args[argdate])
			else
				ulx[argurl2]["date"] = inlineRed("[Dátum hiányzik]", "warning")
			end
			argtitle = "title" .. j
			if trimArg(args[argtitle]) then
				ulx[argurl2]["title"] = args[argtitle]
			else
				ulx[argurl2]["title"] = nil
			end
			j = j + 1
		end
	end

	return j - 2, ulx
end

--[[--------------------------< createTracking >-----------------------

     Return data in track[] ie. tracking categories

  ]]

local function createTracking()
	local sand = ''
	for key, _ in pairs(track) do
		sand = sand .. '[[' .. key .. ']]'
	end
	return sand
end

--[[--------------------------< createRendering >-----------------------

     Return a rendering of the data in ulx[][]

  ]]

local function createRendering(ulx)
	local sand, displayheader, displayfield

	local period1 = ""   -- For backwards compat with {{wayback}}
	local period2 = "."
	local index = (ulx.url1.date == 'index')

	local indexstr = "archived"
	if index then
		indexstr = "archive"
	end
	local indexstr, datestr
	if index then
		indexstr, datestr = 'archívumok', 'indexe'
	else
		indexstr, datestr = 'archiválva', dateI(ulx.url1.date) .. ' dátummal'
	end
                                                                                          -- For {{wayback}}, {{webcite}}

	if ulx.url1.format == "none" then                                                     
		if not ulx.url1.title and not ulx.url1.date then                                  -- No title. No date
			sand = "[" .. ulx.url1.url .. " Archiválva]" .. ulx.url1.tail
		elseif not ulx.url1.title and ulx.url1.date then                                  -- No title. Date.
			if ulx.url1.service == "wayback" then 
				period1 = "."
				period2 = "" 
			end
			sand = "[" .. ulx.url1.url .. " " .. mw.getContentLanguage():ucfirst(indexstr) .. "] " .. datestr .. ulx.url1.tail .. period1
		elseif ulx.url1.title and not ulx.url1.date then                                  -- Title. No date.
			sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail
		elseif ulx.url1.title and ulx.url1.date then                                      -- Title. Date.
			sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail .. "&#32;(" .. indexstr .. " " .. datestr .. ")"
		else
			return nil
		end
		if ulx.url1.extraurls > 0 then                                                    -- For multiple archive URLs
			local tot = ulx.url1.extraurls + 1
			sand = sand .. period2 .. " További archívumok: "
			for i=2, tot do
				local indx = "url" .. i
				if ulx[indx]["title"] then 
					displayfield = "title"
				else
					displayfield = "date"
				end
				sand = sand .. "[" .. ulx[indx]["url"] .. " " .. ulx[indx][displayfield] .. "]"
				if i == tot then
					sand = sand .. "."
				else
					sand = sand .. ", "
				end
			end
		end
		return sand
                                                                                          -- For {{cite archives}}
	else                                                                  
		if ulx.url1.format == "addlarchives" then                         -- Multiple archive services 
			displayheader = "További archívumok: "
		else                                                              -- Multiple pages from the same archive 
			displayheader = "További " .. dateI(ulx.url1.date) .. " archívumok: "
		end
		local tot = 1 + ulx.url1.extraurls
		local sand = displayheader
		for i=1, tot do
			local indx = "url" .. i
			displayfield = ulx[indx]["title"]
			if ulx.url1.format == "addlarchives" then
				if not displayfield then 
					displayfield = ulx[indx]["date"]
				end
			else
				if not displayfield then
					displayfield = i .. ". oldal"
				end
			end
			sand = sand .. "[" .. ulx[indx]["url"] .. " " .. displayfield .. "]"
			if i == tot then
				sand = sand .. "."
			else
				sand = sand .. ", "
			end
		end
		return sand
	end
end

function p._webarchive(args)
                                                          -- URL argument (first)
	local url1 = trimArg(args.url) or trimArg(args.url1)           
	if not url1 then
		return inlineError("url", "Üres.") .. createTracking()
	end
	if mw.ustring.find( url1, "https://web.http", 1, plain ) then    -- track bug 
		return inlineError("url", "https://web.http") .. createTracking()
	end
	if url1 == "https://web.archive.org/http:/" then                 -- track bug
		return inlineError("url", "Érvénytelen URL") .. createTracking()
	end

	local uri1 = mw.uri.new(url1)
	local url1_ = {
		url = url1,
		host = uri1.host
	}
	local ulx
	url1_.extraurls, ulx = parseExtraArgs(args)

                                                          -- Nolink argument
	local nolink = not not args.nolink

	-- serviceName(uri1.host, nolink)
	ulx.url1 = serviceName(url1_, nolink)

                                                          -- Date argument
	local date = trimArg(args.date) or trimArg(args.date1)
	if date == "*" and ulx.url1.service == "wayback" then
		date = "index"
	elseif not date and ulx.url1.service == "wayback" then
		date = decodeWaybackDate( uri1.path )
		if not date then 
			date = inlineRed("[Dátumhiba] (1)", "error") 
		end
	elseif not date and ulx.url1.service == "webcite" then
		date = decodeWebciteDate( uri1.path )
		if date == "query" then
			date = inlineRed("[Dátum hiányzik]", "warning")
		elseif not date then 
			date = inlineRed("[Dátumhiba] (1)", "error")
		end
	elseif date then
		date = makeDate(date)
	else
		date = inlineRed("[Dátum hiányzik]", "warning")
	end
	ulx.url1.date = date

                                                          -- Format argument 
	local format = trimArg(args.format)
	if not format then
		format = "none"
	else
		if format == "addlpages" then
			if not ulx.url1.date then
				format = "none"
			end
		elseif format == "addlarchives" then
			format = "addlarchives"
		else
			format = "none"
		end
	end
	ulx.url1.format = format

                                                          -- Title argument
	ulx.url1.title = trimArg(args.title) or trimArg(args.title1)

	local rend = createRendering(ulx)
	if not rend then
		rend = inlineRed('Hiba a [[Template:' .. tname .. '|' .. tname .. ']] sablonban: Meghatározhatatlan hiba. Kérjük, jelezd a sablon vitalapján.', 'error')
	end

	return rend .. createTracking()
end

function p.webarchive(frame)
	local args = frame.args
	if (args[1]==nil) and (args["url"]==nil) then           -- if no argument provided than check parent template/module args
		args = frame:getParent().args 
	end
	return p._webarchive(args)
end

return p