dottes/abcfield.py

#!/usr/bin/env python3
#
# Extact a text field (title, by default) from a .abc file, and print it out
# with any ABC accented characters converted to HTML (default) or Latex.
#
# Optionally rearrange a field into display format:
# * In Title fields, change 'sort' form such as 'Exploding Potato, The'
#   to display format 'The Exploding Potato'.
# * In Key fields, translate the ABC key representation to full text,
#   e.g. G#dor becomes G# Dorian.
#
# Recognise continuation header fields and print those too. The ABC standard
# defines continuation fields as starting ':+'. Regrettably none of the tools
# I am using the Booke recognise that syntax, so I am adopting a Booke
# convention of '<header>:+' *also* being a continuation. Note that a
# continuation is a distinct line in the field value; the value has a line
# break between it and the previous line.
#

import optparse
import pathlib
import re
import subprocess
import sys

accentedletters = {
    # Acute accents
    "'A" : ("&Aacute;", "\\'{A}"),
    "'E" : ("&Eacute;", "\\'{E}"),
    "'I" : ("&Iacute;", "\\'{I}"),
    "'O" : ("&Oacute;", "\\'{O}"),
    "'U" : ("&Uacute;", "\\'{U}"),
    "'Y" : ("&Yacute;", "\\'{Y}"),
    "'a" : ("&aacute;", "\\'{a}"),
    "'e" : ("&eacute;", "\\'{e}"),
    "'i" : ("&iacute;", "\\'{i}"),
    "'o" : ("&oacute;", "\\'{o}"),
    "'u" : ("&uacute;", "\\'{u}"),
    "'y" : ("&yacute;", "\\'{y}"),

    # Grave accents
    "`A" : ("&Agrave;", "\\`{A}"),
    "`E" : ("&Egrave;", "\\`{E}"),
    "`I" : ("&Igrave;", "\\`{I}"),
    "`O" : ("&Ograve;", "\\`{O}"),
    "`U" : ("&Ugrave;", "\\`{U}"),
    "`a" : ("&agrave;", "\\`{a}"),
    "`e" : ("&egrave;", "\\`{e}"),
    "`i" : ("&igrave;", "\\`{i}"),
    "`o" : ("&ograve;", "\\`{o}"),
    "`u" : ("&ugrave;", "\\`{u}"),

    # Umlauts
    "\"A" : ("&Auml;", "\\\"{A}"),
    "\"E" : ("&Euml;", "\\\"{E}"),
    "\"I" : ("&Iuml;", "\\\"{I}"),
    "\"O" : ("&Ouml;", "\\\"{O}"),
    "\"U" : ("&Uuml;", "\\\"{U}"),
    "\"Y" : ("&Yuml;", "\\\"{Y}"),
    "\"a" : ("&auml;", "\\\"{a}"),
    "\"e" : ("&euml;", "\\\"{e}"),
    "\"i" : ("&iuml;", "\\\"{\i}"),
    "\"o" : ("&ouml;", "\\\"{o}"),
    "\"u" : ("&uuml;", "\\\"{u}"),
    "\"y" : ("&yuml;", "\\\"{y}"),

    # Circumflexes
    "^A" : ("&Acirc;", "\\^{A}"),
    "^E" : ("&Ecirc;", "\\^{E}"),
    "^I" : ("&Icirc;", "\\^{I}"),
    "^O" : ("&Ocirc;", "\\^{O}"),
    "^U" : ("&Ucirc;", "\\^{U}"),
    "^a" : ("&acirc;", "\\^{a}"),
    "^e" : ("&ecirc;", "\\^{e}"),
    "^i" : ("&icirc;", "\\^{\i}"),
    "^o" : ("&ocirc;", "\\^{o}"),
    "^u" : ("&ucirc;", "\\^{u}"),

    # Tilde
    "~A" : ("&Atilde;", "\\~{A}"),
    "~N" : ("&Ntilde;", "\\~{N}"),
    "~O" : ("&Otilde;", "\\~{O}"),
    "~a" : ("&atilde;", "\\~{a}"),
    "~n" : ("&ntilde;", "\\~{n}"),
    "~o" : ("&otilde;", "\\~{o}"),

    # Cedilla
    ",C" : ("&Ccedil;", "\\c{C}"),
    ",c" : ("&ccedil;", "\\c{c}"),

    # Slash
    "/O" : ("&Oslash;", "\\O"),
    "/o" : ("&oslash;", "\\o"),

    # Ring
    "AA" : ("&Aring;", "\\r{A}"),
    "aa" : ("&aring;", "\\r{a}"),

    # Ligatures
    "AE" : ("&AElig;", "\\AE"),
    "ae" : ("&aelig;", "\\ae"),
    "ss" : ("&szlig;", "\\ss"),
}

abckeys = {
    "m":   "Minor",
    "min": "Minor",
    "mix": "Mixolydian",
    "dor": "Dorian",
    "phr": "Phrygian",
    "lyd": "Lydian",
    "loc": "Locrian",
}

# Convert ABC accented chars to HTML entities or LaTex.
def convertAccents(t, latex=False):
    res = ""
    while True:
        p = t.partition('\\')
        res += p[0]
        if p[1] == "":
            break
        abc = p[2][0:2]
        t = p[2][2:]
        if abc in accentedletters:
            if latex:
                res += accentedletters[abc][1]
            else:
                res += accentedletters[abc][0]
        else:
            res += "\\" + abc
    return res

# Convert Title fields from sort to display, so Bat, The->The Bat.
def convertTitleToDisplay(t):
    p = t.rpartition(',')
    if p[1] == "":
        return t
    else:
        first = p[2].strip()
        second = p[0].strip()
        return (first + " " if first.isalnum() else first) + second

# Convert Key field from ABC to display, so G#dor->G# Dorian.
def convertKeyToDisplay(t):
    letter = t[0].upper()
    accidental = ""
    mode = ""
    try:
        accidental = t[1]
        if accidental == '#' or accidental == 'b':
            mode = t[2:]
        else:
            accidental = ""
            mode = t[1:]
    except IndexError:
        pass
    mode = mode.strip().lower()
    return letter + accidental + ' ' + abckeys.get(mode, "Major")

# Convert input string from Markdown to HTML or LaTeX. Fix up link
# targets so any 'foo.abc' target links to the tune with that name.
def convertMarkdown(t, latex):
    if latex:
        target = "--to=latex"
    else:
        target = "--to=html"
    res = subprocess.check_output(['pandoc', '--from=markdown', target], input=t, universal_newlines=True)
    if latex:
        res = re.sub(r'\\href{(.*?).abc}', r'\\hyperlink{\1}', res)
    else:
        res = re.sub(r'href="(.*?).abc"', r'href="\1.html"', res)
    return res.strip()

# Implement a custom Markdown shorthand for referencing ABC files.
# <foo.abc> will expand to ['title of foo'](foo.abc).
def expandCustomMarkdown(t, dir, latex):
    # Given a match to (foo.abc), return a markdown link to the tune with the
    # title (and subtitle, if present) of the tune as the text of the link.
    def getTitle(m):
        fname = m.group(1) + ".abc"
        path = pathlib.Path(dir, fname)
        with path.open() as f:
            title = getFieldDisplayText(f, dir, "T", latex=latex)
            f.seek(0)
            subtitle = getFieldDisplayText(f, dir, "T", n=2, latex=latex)
            if len(subtitle) > 0:
                title = title + " (" + subtitle + ")"
            return "[" + title + "](" + fname + ")"
    return re.sub(r'<(.*?).abc>', getTitle, t)

# Return the raw text for a given field. Optionally the nth field is taken,
# or the field data must start with a designated string to be recognised.
def getFieldText(inf, field, n = 1, starts = None):
    res = ""
    for line in inf:
        line = line.strip()
        if len(line) > 2 and line[1] == ':':
            if line[0] == "+" or (line[0] == field and line[2] == "+"):
                if not res:
                    continue
                if line[0] == "+":
                    line = line[2:]
                else:
                    line = line[3:]
                res = res + '\n' + line.strip()
            else:
                if res:
                    break
                if line[0] == field:
                    line = line[2:].strip()
                    if starts:
                        if line.find(starts) != 0:
                            continue
                        line = line[len(starts):].strip()
                    if n > 1:
                        n = n - 1
                        continue
                    res = line
    return res

# Return display text for a given field.
def getFieldDisplayText(inf, dir, field, n = 1, starts = None, latex = False):
    res = getFieldText(inf, field, n, starts)
    if res:
        res = convertAccents(res, latex)
        if field.upper() == "T":
            res = convertTitleToDisplay(res)
        elif field.upper() == "K":
            res = convertKeyToDisplay(res)
        elif field.upper() in ["H", "N"]:
            res = convertMarkdown(expandCustomMarkdown(res, dir, latex), latex)
    return res

if __name__ == "__main__":
    def process(inf, dir, options):
        if options.display:
            line = getFieldDisplayText(inf, dir, options.field, options.index, options.starts, options.latex)
        else:
            line = getFieldText(inf, options.field, options.index, options.starts)
        if line:
            print(line)
            return True
        else:
            return False

    # execute only if run as a script
    parser = optparse.OptionParser(usage="usage: %prog [options] [filename]\n\n"
                                   "  Extract field data from ABC file.")
    parser.add_option("-f", "--field", dest="field", default="T",
                      help="extract the field FIELD", metavar="FIELD")
    parser.add_option("-l", "--latex", dest="latex",
                      action="store_true", default=False,
                      help="convert special characters for LaTeX")
    parser.add_option("-d", "--display", dest="display",
                      action="store_true", default=False,
                      help="convert to display text")
    parser.add_option("-n", "--index", dest="index",
                      action="store", type="int", default=1,
                      help="report INDEXth value [default: %default]",
                      metavar="INDEX")
    parser.add_option("-s", "--starts", dest="starts",
                      action="store", type="string", default=None,
                      help="report only if line starts CONTENT and remove CONTENT",
                      metavar="CONTENT")
    (options, args) = parser.parse_args()

    res = False
    if len(args) > 0:
        for arg in args:
            path = pathlib.Path(arg)
            with path.open() as f:
                res = res or process(f, path.parent, options)
    else:
        res = process(sys.stdin, ".", options)
    sys.exit(int(not res))