% \GetFileInfo{luatex-hyphen.drv}
%
% \title{The \texttt{hyphen.cfg} file for Lua\TeX }
% \date{2013/05/16 v1.6}
% \author{Khaled Hosny, Élie Roux, and Manuel Pégourié-Gonnard\\
% \texttt{khaledhosny@eglug.org} \\
% \texttt{elie.roux@telecom-bretagne.eu} \\
% \texttt{mpg@elzevir.fr}}
%
% \maketitle
%
% \begin{abstract}
% This package is mainly a Lua module, to be used by \textsf{Babel} and
% \textsf{polyglossia} to adapt their hyphenation patterns loading mechanism to
% Lua\TeX's dynamic pattern loading capabilities. It makes use of a % \texttt{language.dat.lua} file (whose format is described below) that should % be present in the distribution, in addition to the regular % \texttt{language.dat} file. % % \textsf{Babel} needed to be updated -- this used to be the goal of this % package -- before version \textsf{3.9} (\TeX Live 2013) and % \textsf{polyglossia} handles Lua\TeX\ since version \textsf{1.3} (\TeX Live % 2013). % % There is a version of \texttt{etex.src} modified for the same reasons using % similar code, which also makes use of the \texttt{luatex-hyphen.lua} and % \texttt{language.dat.lua} files described here. % \end{abstract} % % \section{Documentation} % % Hyphenation patterns should be loaded at runtime with Lua\TeX: if they appear % in the format, they will be rehashed when the format is loaded anyway, which % makes the format quite long to load (many seconds even on modern machines) % and provides for bad user experience. Hence, it is desirable to load as few % patterns as possible in the format, and load on-demand the needed patterns % at runtime. % % This package provides a modified version of hyphen.cfg adapted to Lua\TeX, % as well as a supporting Lua module. Since a lot of things, especially the % catcodes, are not as predictable at runtime than at format creation time, we % don't \verb+\input+ the usual pattern files, but rather load the patterns % using the Lua interface, using a special plain text version of the pattern % files if available. % % The existence and file name of such a version cannot be guessed, so we need % a specific database: the file \texttt{language.dat.lua}. This file should be % loadable by Lua and return a table whose keys are the canonical language % names as found in \texttt{language.dat}, and the values are Lua tables % consisting of: % \begin{enumerate} % \item A fixed part with one mandatory field: % \begin{quote} % \verb+synonyms = { <string> alternative name, ...}+ % \end{quote} % This field's value must be the same as in \texttt{language.dat}. % \item A variable part consisting of either: % \begin{itemize} % \item For most languages: % \begin{quote} % \verb+patterns = <string> filenames for patterns+\\ % \verb+hyphenation = <string> filenames for exceptions+\\ % \end{quote} % Each string contains a coma-separated list of file names (whitespace % before or after the coma is not accepted). % The files given by \verb+patterns+ (resp. \verb+hypenation+) must be % plain text files encoded in UTF-8, with only patterns (resp. % exceptions) and not even comments: their content will be used % directly without being parsed by \TeX. If one of these keys is % missing or is the empty string, it is ignored and no patterns (resp. % exceptions) are loaded for this language. % \item Special cases are supported by a field \verb+special+. Currently, % the following kind of values are recognized: % \begin{description} % \item[\texttt{'disabled:<reason>'}] allows to disable specific % languages: when the user tries to load this language, an error % will be issued, with the \verb+<reason>+. % \item[\texttt{'language0'}] only \texttt{english} should use this % type of special, to indicate it is normally dumped in the format % as \verb+\language0+ (see below). % \end{description} % Special languages may have \texttt{*hyphenmin} information when it % makes sense (mostly \verb+\language0+). % \end{itemize} % \item Optional fields may be added. For example: % \begin{quote} % \verb+loader = <string> name of the TeX loader+\\ % \verb+lefthyphenmin = <number> value for \lefthyphenmin+\\ % \verb+righthyphenmin = <number> value for \righthyphenmin+ % \end{quote} % Those fields are present in \texttt{language.dat.lua} as generated by % \texttt{tlmgr}, for example, but they \emph{are not} used by the present % code in any way. % \end{enumerate} % Languages that are mentioned in \texttt{language.dat} but not in % \texttt{language.dat.lua} will be loaded in the format. So, if the % \texttt{language.dat.lua} file is missing or incomplete, languages will just % go back to the ``old'' behaviour, resulting in longer startup time, which % seems less bad than complete breakage. % % For backward compatibility, Knuth's original patterns for US English are % always loaded in the format, as \verb+\language0+.\footnote{It is assumed % to be the first entry in \texttt{language.dat}.} % % \StopEventually{ % } % % \section{Implementation} % % \begin{macrocode} %<*lua> % \end{macrocode} % % Start a Lua module, two functions for error and information reporting. % % \begin{macrocode} luatexhyphen = luatexhyphen or {} local luatexhyphen = luatexhyphen local function wlog(msg, ...) texio.write_nl('log', 'luatex-hyphen: '..msg:format(...)) end local function err(msg, ...) error('luatex-hyphen: '..msg:format(...), 2) end % \end{macrocode} % % Load the \texttt{language.dat.lua} file with the Lua version of the % language database. % % \begin{macrocode} local dbname = "language.dat.lua" local language_dat local dbfile = kpse.find_file(dbname, 'lua') if not dbfile then err("file not found: "..dbname) else wlog('using data file: %s', dbfile) language_dat = dofile(dbfile) end % \end{macrocode} % % Look up a language in the database, and return the associated % information, as well as the canonical name of the language. % % \begin{macrocode} local function lookupname(name) if language_dat[name] then return language_dat[name], name else for canon, data in pairs(language_dat) do for _,syn in ipairs(data.synonyms) do if syn == name then return data, canon end end end end end luatexhyphen.lookupname = lookupname % \end{macrocode} % % Set hyphenation patterns and exceptions for a language given by its name % (in the database) and number (value of \verb+\language+). Doesn't return % anything, but will call \verb+error()+ if things go wrong. % % \begin{macrocode} local function loadlanguage(lname, id) if id == 0 then return end local msg = "loading%s patterns and exceptions for: %s (\\language%d)" % \end{macrocode} % % Lookup the language in the database. % % \begin{macrocode} local ldata, cname = lookupname(lname) if not ldata then err("no entry in %s for this language: %s", dbname, lname) end % \end{macrocode} % % Handle special languages. % % \begin{macrocode} if ldata.special then if ldata.special:find('^disabled:') then err("language disabled by %s: %s (%s)", dbname, cname, ldata.special:gsub('^disabled:', '')) elseif ldata.special == 'language0' then err("\\language0 should be dumped in the format") else err("bad entry in %s for language %s") end end % \end{macrocode} % % The generic case: load hyphenation patterns and exceptions from files % given by the language code. % % \begin{macrocode} wlog(msg, '', cname, id) for _, item in ipairs{'patterns', 'hyphenation'} do local filelist = ldata[item] if filelist ~= nil and filelist ~= '' then for _, file in ipairs(filelist:explode(',')) do local file = kpse.find_file(file) or err("file not found: %s", file) local fh = io.open(file, 'r') local data = fh:read('*a') or err("file not readable: %s", f) fh:close() lang[item](lang.new(id), data) end else if item == 'hyphenation' then item = item..' exceptions' end wlog("info: no %s for this language", item) end end end luatexhyphen.loadlanguage = loadlanguage % \end{macrocode} % % Add \textsf{Babel}'s ``dialects'' as synonyms. % % \begin{macrocode} local function adddialect(dialect, language) if dialect ~= '0' then dialect = dialect:gsub('l@', '') language = language:gsub('l@', '') data = lookupname(language) if data then data.synonyms[#data.synonyms+1] = dialect end end end luatexhyphen.adddialect = adddialect % \end{macrocode} % % \begin{macrocode} %</lua> % \end{macrocode} % % \Finale \endinput