Module:R:Woodhouse

La documentation pour ce module peut être créée à Module:R:Woodhouse/doc

--[[
This module looks up the Greek entry title in "Module:R:Woodhouse/psia1_to_infs",
a list of verbs, and adds any infinitive forms to a list including the page 
title itself, in both potential proper and common forms.  This list of Greek 
forms is used to retrieve English headwords in Woodhouse's dictionary from 
"Module:R:Woodhouse/reverse_index".  The infinitives step is necessary because 
Woodhouse mostly thus identifies verbs.  The function get_page() then looks up
each English headword alphabetically in "Module:R:Woodhouse/page_headwords", or
"Module:R:Woodhouse/page_headwords_proper", or both, per 
"Module:R:Woodhouse/proper_or_both", to find  page numbers in the paper edition 
of the dictionary.  This `abstract' page number is slightly altered with a few 
exceptions to obtain the page number used in the ARTFL project's URL.  These 
URLs are returned with bibliographical information and HTML for display. A 
remaining issue is that, about 5% of the time, the page number will be off by
one or two.  This problem may be slightly intricate to solve completely.

8 May 2020: Changed reverse_index implementation to reverse_index_tab_delimited implementation to save memory (The Lua table consumed 8x the binary text size.)
]]

local export = {}

local function remove_duplicates(ls) 
	local hash = {}
	local ls0 = {}
	for i, v in ipairs(ls) do
   		if not hash[v] then
    		table.insert(ls0, v)
       		hash[v] = true
       	end 
    end
	return ls0
end

local function concat(l1,l2)
	local l3 = {}
	if l1 then for i,v in pairs(l1) do l3[#l3+1] = v end end
	if l2 then for i,v in pairs(l2) do l3[#l3+1] = v end end
	return l3
end

local function b_search(ys, x, L, H)
    L = L or 1
    H = H or #ys
    
    if L < 0 then
    	error('L < 0')
    end
    
    while L < H do
        M = math.floor((L+H) / 2)
        if ys[M] < x then L = M+1 else H = M end
    end
    
    return L
 end

--comment for calculate_page_number_divergence:
--page_number_divergence results from a few buffer pages before the first
--headword, and subsequently, a few multi-page entries leaving some pages
--without headwords.  Each time this latter takes place, the index of 
--page-initial headwords drifts from the physical page number.
local function calculate_page_number_divergence(w0, is_proper)
	return 4 + ((is_proper and 4) or ((w0 >= 'taking' and 4) or (w0 >= 'setting' and 3) or (w0 >= 'putrefaction' and 2) or 1))
end

local function get_page(page_headwords, page_headwords_proper, w)
	local is_proper = mw.getLanguage('en'):ucfirst(w) == w
	local w0 = mw.getLanguage('en'):lcfirst(w)
	local p_n_divergence = calculate_page_number_divergence(w0, is_proper)
	if not is_proper then
		return  (b_search(page_headwords, w0, 0, 995) - 1) + p_n_divergence
	else
		return (b_search(page_headwords_proper, w0, 0, 32) - 1) + 995 + p_n_divergence
	end
end

local function uc1_eng(x)
	return mw.getLanguage('en'):ucfirst(x)
end

local function f_reverse_index(proper_or_both,reverse_index,title)
	local headwords_eng = {}
	local rix = reverse_index[title]
	if not (rix == nil) then 
		for i, v in pairs(rix) do 
			local pbc = proper_or_both[v]
			if pbc == nil then
				headwords_eng[#headwords_eng+1] = v
			else
				headwords_eng[#headwords_eng+1] = uc1_eng(v)
				if pbc == "b" then
					headwords_eng[#headwords_eng+1] = v
				end
			end
		end
	end
	return headwords_eng
end

local function f_reverse_index_tab_delimited(proper_or_both,reverse_index_tab_delimited,title)
	local headwords_eng = {}
	for English_words in reverse_index_tab_delimited:gmatch(
			"%f[^%z\n]" .. require "Module:utilities".pattern_escape(title)
			.. "\t([^\n]+)%f[%z\n]") do
		for word in English_words:gmatch("[^\t]+") do
			table.insert(headwords_eng, word)
		end
	end
	return headwords_eng
end


local function load_infinitives(w)
    --return mw.loadData("Module:R:Woodhouse/psia1_to_infs")[w]
	return require("Module:data tables").index_table("grc_RWoodhouse_lemma_to_infinitives", w)
end

--grc_RWoodhouse_lemma_to_headwords
local function print_html(title)
	--local psia1_to_infs = mw.loadData("Module:R:Woodhouse/psia1_to_infs")                    --for calculating title_addenda
	--local reverse_index = mw.loadData("Module:R:Woodhouse/reverse_index") 				   --for f_reverse_index_
	local reverse_index_tab_delimited = require("Module:R:Woodhouse/reverse_index_tab_delimited")    --for f_reverse_index_tab_delimited
	local proper_or_both = mw.loadData("Module:R:Woodhouse/proper_or_both")                  --for f_reverse_index
	local page_headwords = mw.loadData("Module:R:Woodhouse/page_headwords")                  --for get_page
	-- page_headwords also requires a select_all function in data_tables to work efficiently
	local page_headwords_proper = mw.loadData("Module:R:Woodhouse/page_headwords_proper")    --for get_page
	
	local title_uc = mw.getContentLanguage():ucfirst(title)
	local title_addenda = load_infinitives(title) --psia1_to_infs[title]
	local titles = concat({title, title_uc}, title_addenda)
	
	local headwords_eng = {}
	for i, title in ipairs(titles) do
		headwords_eng = concat(headwords_eng, f_reverse_index_tab_delimited(proper_or_both, reverse_index_tab_delimited, title)) --changed reverse_index to reverse_index_tab_delineated
	end
	headwords_eng = remove_duplicates(headwords_eng)
	table.sort(headwords_eng)
	
	local lst = {}
	local count = 0
	for k, v in pairs(headwords_eng) do
		local nPage = get_page(page_headwords, page_headwords_proper, v)
		table.insert(lst, "<li>[https://artflsrv03.uchicago.edu/cgi-bin/efts/sqldbs/WOODHOUSE/woodhouse.py?pagenumber="..string.format(nPage-5).."&pageturn=1 "..v.."] idem, page "..(nPage-5)..".</li>")
		count = count + 1
	end
	
	local expandtext = count .. " headword" .. ( count == 1 and "" or "s" )
	
	table.insert(lst, 1, "<div class='mw-collapsible mw-collapsed' style='display: inline' data-expandtext='"..expandtext.."><ul>")
    table.insert(lst, "</ul></div>")
	return table.concat(lst)
end

function export.reverse_index(frame)
	local args = frame:getParent().args
	local title = args['w'] or mw.title.getCurrentTitle().text
	if (not args['w'] or args['w'] == "") and mw.title.getCurrentTitle().nsText == "Template" then
		return ""
	else
		return print_html(title)
	end
end

return export