dottes/abcfield.py

296 lines
10 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
#
# Extact a text field (title, by default) from a .abc file, and print it out
# with any ABC accented characters converted to HTML (default) or Latex.
#
# Optionally rearrange a field into display format:
# * In Title fields, change 'sort' form such as 'Exploding Potato, The'
# to display format 'The Exploding Potato'.
# * In Key fields, translate the ABC key representation to full text,
# e.g. G#dor becomes G# Dorian.
#
# Recognise continuation header fields and print those too. The ABC standard
# defines continuation fields as starting ':+'. Regrettably none of the tools
# I am using the Booke recognise that syntax, so I am adopting a Booke
# convention of '<header>:+' *also* being a continuation. Note that a
# continuation is a distinct line in the field value; the value has a line
# break between it and the previous line.
#
import argparse
import pathlib
import re
import subprocess
import sys
accentedletters = {
# Acute accents
"'A" : ("&Aacute;", "\\'{A}"),
"'E" : ("&Eacute;", "\\'{E}"),
"'I" : ("&Iacute;", "\\'{I}"),
"'O" : ("&Oacute;", "\\'{O}"),
"'U" : ("&Uacute;", "\\'{U}"),
"'Y" : ("&Yacute;", "\\'{Y}"),
"'a" : ("&aacute;", "\\'{a}"),
"'e" : ("&eacute;", "\\'{e}"),
"'i" : ("&iacute;", "\\'{i}"),
"'o" : ("&oacute;", "\\'{o}"),
"'u" : ("&uacute;", "\\'{u}"),
"'y" : ("&yacute;", "\\'{y}"),
# Grave accents
"`A" : ("&Agrave;", "\\`{A}"),
"`E" : ("&Egrave;", "\\`{E}"),
"`I" : ("&Igrave;", "\\`{I}"),
"`O" : ("&Ograve;", "\\`{O}"),
"`U" : ("&Ugrave;", "\\`{U}"),
"`a" : ("&agrave;", "\\`{a}"),
"`e" : ("&egrave;", "\\`{e}"),
"`i" : ("&igrave;", "\\`{i}"),
"`o" : ("&ograve;", "\\`{o}"),
"`u" : ("&ugrave;", "\\`{u}"),
# Umlauts
"\"A" : ("&Auml;", "\\\"{A}"),
"\"E" : ("&Euml;", "\\\"{E}"),
"\"I" : ("&Iuml;", "\\\"{I}"),
"\"O" : ("&Ouml;", "\\\"{O}"),
"\"U" : ("&Uuml;", "\\\"{U}"),
"\"Y" : ("&Yuml;", "\\\"{Y}"),
"\"a" : ("&auml;", "\\\"{a}"),
"\"e" : ("&euml;", "\\\"{e}"),
"\"i" : ("&iuml;", "\\\"{\i}"),
"\"o" : ("&ouml;", "\\\"{o}"),
"\"u" : ("&uuml;", "\\\"{u}"),
"\"y" : ("&yuml;", "\\\"{y}"),
# Circumflexes
"^A" : ("&Acirc;", "\\^{A}"),
"^E" : ("&Ecirc;", "\\^{E}"),
"^I" : ("&Icirc;", "\\^{I}"),
"^O" : ("&Ocirc;", "\\^{O}"),
"^U" : ("&Ucirc;", "\\^{U}"),
"^a" : ("&acirc;", "\\^{a}"),
"^e" : ("&ecirc;", "\\^{e}"),
"^i" : ("&icirc;", "\\^{\i}"),
"^o" : ("&ocirc;", "\\^{o}"),
"^u" : ("&ucirc;", "\\^{u}"),
# Tilde
"~A" : ("&Atilde;", "\\~{A}"),
"~N" : ("&Ntilde;", "\\~{N}"),
"~O" : ("&Otilde;", "\\~{O}"),
"~a" : ("&atilde;", "\\~{a}"),
"~n" : ("&ntilde;", "\\~{n}"),
"~o" : ("&otilde;", "\\~{o}"),
# Cedilla
",C" : ("&Ccedil;", "\\c{C}"),
",c" : ("&ccedil;", "\\c{c}"),
# Slash
"/O" : ("&Oslash;", "\\O"),
"/o" : ("&oslash;", "\\o"),
# Ring
"AA" : ("&Aring;", "\\r{A}"),
"aa" : ("&aring;", "\\r{a}"),
# Ligatures
"AE" : ("&AElig;", "\\AE"),
"ae" : ("&aelig;", "\\ae"),
"ss" : ("&szlig;", "\\ss"),
# Quote marks
"''" : ("&apos;", "'"),
"'\"" : ("&quot;", "\""),
}
abckeys = {
"m": "Minor",
"min": "Minor",
"mix": "Mixolydian",
"dor": "Dorian",
"phr": "Phrygian",
"lyd": "Lydian",
"loc": "Locrian",
}
# Convert ABC accented chars to HTML entities or LaTex.
def convertAccents(t, latex=False):
res = ""
while True:
p = t.partition('\\')
res += p[0]
if p[1] == "":
break
abc = p[2][0:2]
t = p[2][2:]
if abc in accentedletters:
if latex:
res += accentedletters[abc][1]
else:
res += accentedletters[abc][0]
else:
res += "\\" + abc
return res
# Convert Title fields from sort to display, so Bat, The->The Bat.
def convertTitleToDisplay(t):
p = t.rpartition(',')
if p[1] == "":
return t
else:
first = p[2].strip()
second = p[0].strip()
return (first + " " if first.isalnum() else first) + second
# Convert Key field from ABC to display, so G#dor->G# Dorian.
def convertKeyToDisplay(t):
letter = t[0].upper()
accidental = ""
mode = ""
try:
accidental = t[1]
if accidental == '#' or accidental == 'b':
mode = t[2:]
else:
accidental = ""
mode = t[1:]
except IndexError:
pass
mode = mode.strip().lower()
return letter + accidental + ' ' + abckeys.get(mode, "Major")
# Convert input string from Markdown to HTML or LaTeX. Fix up link
# targets so any 'foo.abc' target links to the tune with that name.
def convertMarkdown(t, latex):
if latex:
target = "--to=latex"
else:
target = "--to=html"
res = subprocess.check_output(['pandoc', '--from=markdown', target], input=t, universal_newlines=True)
if latex:
res = re.sub(r'\\href{(.*?).abc}', r'\\hyperlink{\1}', res)
else:
res = re.sub(r'href="(.*?).abc"', r'href="\1.html"', res)
return res.strip()
# Implement a custom Markdown shorthand for referencing ABC files.
# <foo.abc> will expand to ['title of foo'](foo.abc).
def expandCustomMarkdown(t, dir):
# Given a match to (foo.abc), return a markdown link to the tune with the
# title (and subtitle, if present) of the tune as the text of the link.
# Because we're going through Markdown, character entities must be
# HTML. Pandoc will convert them to UTF-8.
def getTitleLink(m):
fname = m.group(1) + ".abc"
path = pathlib.Path(dir, fname)
with path.open() as f:
lines = f.readlines()
return "[" + getFullTitle(lines, dir) + "](" + fname + ")"
return re.sub(r'<(.*?).abc>', getTitleLink, t)
# Return the raw text for a given field. Optionally the nth field is taken,
# or the field data must start with a designated string to be recognised.
def getFieldText(lines, field, n = 1, starts = None):
res = ""
for line in lines:
line = line.strip()
if len(line) > 2 and line[1] == ':':
if line[0] == "+" or (line[0] == field and line[2] == "+"):
if not res:
continue
if line[0] == "+":
line = line[2:]
else:
line = line[3:]
res = res + '\n' + line.strip()
else:
if res:
break
if line[0] == field:
line = line[2:].strip()
if starts:
if line.find(starts) != 0:
continue
line = line[len(starts):].strip()
if n > 1:
n = n - 1
continue
res = line
return res
# Return display text for a given field.
def getFieldDisplayText(lines, dir, field, n = 1, starts = None, latex = False):
res = getFieldText(lines, field, n, starts)
if res:
# Fields that go through Markdown must have HTML entities.
mdfield = field.upper() in ['H', 'N'];
res = convertAccents(res, False if mdfield else latex)
if field.upper() == "T":
res = convertTitleToDisplay(res)
elif field.upper() == "K":
res = convertKeyToDisplay(res)
elif mdfield:
res = convertMarkdown(expandCustomMarkdown(res, dir), latex)
return res
# Return full title (title + [" (" + subtitle + ")"] if subtitle exists).
def getFullTitle(lines, dir, starts = None, latex = False):
title = getFieldDisplayText(lines, dir, "T", starts=starts, latex=latex)
subtitle = getFieldDisplayText(lines, dir, "T", n=2, starts=starts, latex=latex)
return title if len(subtitle) == 0 else title + " (" + subtitle + ")"
if __name__ == "__main__":
def process(f, dir, args):
lines = f.readlines()
if args.display:
if args.field.upper() == "FT":
line = getFullTitle(lines, dir, args.starts, args.latex)
else:
line = getFieldDisplayText(lines, dir, args.field, args.index, args.starts, args.latex)
else:
if args.field.upper() == "FT":
args.field = "T"
line = getFieldText(lines, args.field, args.index, args.starts)
if line:
print(line)
return True
else:
return False
# execute only if run as a script
parser = argparse.ArgumentParser(description="Extract field data from ABC file.")
parser.add_argument("-f", "--field", dest="field", default="T",
help=("extract the given field [default: %(default)s]. "
"Field FT is special; it returns the full title "
"- the title followed by subtitle in () if "
"present - for display text, or just the title "
"for non-display text."))
parser.add_argument("-l", "--latex", dest="latex",
action="store_true", default=False,
help="convert special characters for LaTeX (default HTML)")
parser.add_argument("-d", "--display", dest="display",
action="store_true", default=False,
help=("convert to display text. Convert accents to "
"LaTeX or HTML, in titles convert 'Tune, The' to "
"'The Tune', convert keys to full key name, "
"and expand Markdown in notes and history."))
parser.add_argument("-n", "--index", dest="index",
action="store", type=int, default=1,
help="report INDEXth value [default: %(default)s]")
parser.add_argument("-s", "--starts", dest="starts",
action="store", default=None,
help=("report only if line starts with CONTENT "
"and remove CONTENT"),
metavar="CONTENT")
parser.add_argument('input', type=argparse.FileType('r'),
help='input ABC file')
args = parser.parse_args()
path = pathlib.Path(args.input.name)
with path.open() as f:
res = process(f, path.parent, args)
sys.exit(int(not res))