#!/usr/bin/env texlua kpse.set_program_name("luatex") -- ctanbib -- export ctan entries to bib format -- Copyright: Michal Hoftich (2014-2023) -- -- This work may be distributed and/or modified under the -- conditions of the LaTeX Project Public License, either version 1.3 -- of this license or (at your option) any later version. -- The latest version of this license is in -- http://www.latex-project.org/lppl.txt -- and version 1.3 or later is part of all distributions of LaTeX -- version 2005/12/01 or later. -- -- This work has the LPPL maintenance status `maintained'. -- -- The Current Maintainer of this work is Michal Hoftich local lapp = require "lapp-mk4" local bibtype = "manual" local pkgurl = false local msg = [[ctanbib - convert ctan package information to bibtex format Usage: ctanbib [options] PKGNAME1 PKGNAME2 ... Look up the Catalogue entry for each PKGNAME on CTAN (at https://ctan.org/xml/2.0/pkg/PKGNAME), and convert relevant information to a BibTeX entry. By default, the url field in the output entry is the package's home field value if that is specified in the Catalogue entry; if it isn't, https://ctan.org/pkg/PKGNAME is used. Available options: -c,--ctan Always use CTAN /pkg/ url instead of the package homepage -C,--ctanpath Use the package's CTAN path as url (e.g.: /support/ctanbib) -e,--entrytype (default manual) Change entry type -h,--help Print this message -p,--pkgname Wrap the package name in a \ctanbibpkgname command -v,--version Print version info ctanbib package page: https://ctan.org/pkg/ctanbib]] local args = lapp(msg) if args.help then print(msg) os.exit(0) elseif args.version then print "ctanbib version 0.2d" os.exit(0) elseif args.ctan then pkgurl = true end -- manual is set by default in --entrytype, we don't want it to override the --ctan option if args.entrytype~="manual" then bibtype = args.entrytype end local pkgname = args[1] if not pkgname then print "[ctanbib] Error: missing package name" print(msg) os.exit(1) end local authors_url = "https://ctan.org/xml/2.0/authors" local license_url = "https://www.ctan.org/xml/2.0/licenses" -- change that for different title scheme local titleformat = "The %s package" local bibtexformat = [[ @manual{$package, title = {$title}, subtitle = {$subtitle}, author = {$author}, url = {$url}, urldate = {$urldate}, date = {$date}, version = {$version}, } ]] if args.pkgname then titleformat = "The \\ctanbibpkgname{%s} package" preamble = [[@preamble{ "\providecommand\ctanbibpkgname[1]{\textsl{#1}}" } ]] bibtexformat = preamble .. bibtexformat end local dom = require('luaxml-domobject') local load_xml = function(url, verification) -- local command = io.popen("wget -qO- ".. url,"r") local command = io.popen("curl --ssl-no-revoke -sS ".. url,"r") local verification = verification or "" if not command then return nil, "Cannot open Curl" end local info = command:read("*all") command:close() if not info or string.len(info) == 0 or not info:match(verification) then return nil, "Cannot load XML", info end return dom.parse(info ), info end local bibtex_escape = function(a) local a = a or "" return a:gsub("([%$%{%}%\\])", function(x) if x == "\\" then return "\\textbackslash " end return '\\'..x end) end -- we need to use this method temporarily because of a bug in -- CTAN API local author_list local fetch_author_list = function() if author_list then return author_list end local authors, msg = load_xml(authors_url, " 1 then return table.concat(current, ", ") end return nil -- no author end -- get the author name and surname local get_author = function(author) local name = process_author(author) if name then return name end -- if the package XML doesn't contain author name, -- we need to fetch the authors list from CTAN and find -- it here local author_list = fetch_author_list() local id = author:get_attribute("id") -- try to find the author id in list of authors -- if everything fails, just return an empty group return process_author(author_list[id]) or "{}" end local get_authors = function(a) local retrieved_authors = {} for _, author in ipairs(a) do table.insert(retrieved_authors, get_author(author)) end -- CTAN returns authors in the random order. to fix that, we sort them. -- I know that it is not a good solution for non-latin and accented names. -- We could use Lua-UCA for the sorting, but I feel it is a bit of overkill. table.sort(retrieved_authors) return table.concat(retrieved_authors," and ") end local get_title = function(record) local title = record:query_selector("name")[1] if title then title = title:get_text() -- don't uppercase package name, as suggested by Karl Berry and Boris Veytsman -- title = title:gsub("^(.)", function(a) return unicode.utf8.upper(a) end) else title = pkgname end return string.format(titleformat, bibtex_escape(title)) end local function verify_record(record) -- verify that we found a package if not record then return false end return #record:query_selector("name") > 0 end local get_url = function(record,pkgname) local home = record:query_selector("home")[1] if home then return home:get_attribute("href") end return "https://ctan.org/pkg/"..pkgname end local get_caption = function(record) local caption = record:query_selector("caption")[1] if caption then return bibtex_escape(caption:get_text()) end return nil end local get_version = function(record) local version = record:query_selector("version")[1] if version then return version:get_attribute("number"), version:get_attribute("date") end end local get_copyright_date = function(record) -- we must get the latest , because it contains the latest date local copyright_tbl = record:query_selector("copyright") local copyright = copyright_tbl[#copyright_tbl] if copyright then local year = copyright:get_attribute("year") if year then -- year can hold a range of year. we will chose the newest date return year:match("(%d+)$") end end end local ctan_url = function(record, pkgname) return "https://ctan.org/pkg/"..pkgname end local ctan_path = function(record) local ctan = record:query_selector("ctan")[1] -- some package don't contain the CTAN path if not ctan then return get_url(record) end local path = ctan:get_attribute("path") return path end local compile = function(template, records) return template:gsub("$([a-z]+)", function(a) return records[a] or "" end) end for _, pkgname in ipairs(args) do local url = "https://ctan.org/xml/2.0/pkg/" .. pkgname .. "?author-name=true" local record, info = load_xml(url, "%