From 30c34782b7ae746da036ac54012e0026525ea2f9 Mon Sep 17 00:00:00 2001 From: Jim Hague Date: Tue, 17 Jun 2014 09:11:38 +0100 Subject: [PATCH] Add MusicXML conversion to tune pages. Might help someone. --- abc2xml/Changes.txt | 261 +++ abc2xml/Readme.txt | 76 + abc2xml/abc2xml.py | 1747 ++++++++++++++++++++ abc2xml/pyparsing.py | 3749 ++++++++++++++++++++++++++++++++++++++++++ dottes.html.tune | 2 + makeWeb.sh | 8 +- 6 files changed, 5842 insertions(+), 1 deletion(-) create mode 100644 abc2xml/Changes.txt create mode 100644 abc2xml/Readme.txt create mode 100644 abc2xml/abc2xml.py create mode 100644 abc2xml/pyparsing.py diff --git a/abc2xml/Changes.txt b/abc2xml/Changes.txt new file mode 100644 index 0000000..62adff3 --- /dev/null +++ b/abc2xml/Changes.txt @@ -0,0 +1,261 @@ +Version 58 +- grammer for I:score instruction replaces ad hoc parsing. +- voice merging, part grouping and grand staffs are derived by transformation of the parse tree of I:score. +- also move clef redefinitions after [I:staff +/-n]. +- avoid a false beam break when [I:staff +/-n] is used within a beamed note group. +- absolute ABC staff numbers in [I:staff n] are mapped to MusicXML grand staff numbers (when possible). +- added translation of the [+-^_]15 octave clefs. +- no default channel/panning/volume in elements. + +Version 57 +- grand staff implemented. +- in an [I:score] instruction recognize a braced voice group as a grand staff when the opening brace is followed by an asterix ('{*' no space allowed). +- also recognize a normal braced voice group as a grand staff when precisely one of the braced voices has a name. +- the name of a staff or part is taken from the first of its voices (in ABC source order) for which a name is defined. +- within a grand staff notes and directions preceded by [I:staff +/-n] are moved n staffs upwards or downwards. +- accept voice ids with a leading asterix in I:score instructions (for the time being the asterix is discarded, however) + +Version 56 +- the element should come before the element in a note. (previously caused syntax warning with FinaleNotepad) +- translates the _8, ^8 variants of the clefs in addition to the +8, -8. Also translates octave= argument of V: and K: +- part names are left empty when not explicitly given using V:id name="" (previously id was used in that case) +- transpose= was only honoured when a typed clef= definition was present, now is works in any V: or K: field. +- a tempo definition in the ABC header (Q:) is treated as an inline tempo ([Q:]) for the first voice. +- repeated occurrences of [I:MIDI] in a voice are translated into a MusicXML instrument changes for that voice. + Neither Finale nor Musescore recognize the relevant MusicXML elements ( and yet. +- new cue=on/off attribute for K: and V: definitions, to mark sections of cue notes in a voice. + Neither Finale nor Musescore handle cue size for directions, yet (). +- normally whole measure rests are discarded when merging staffs. Use option -r to keep these rests in the merged results. + +Version 55 +- replaced info message text: "leeg" with "empty voice", when skipping an empty voice. +- avoided exception when ABC voice has only one measure containing only a volta number +- do not output an xml-backup element when it has zero duration + +Version 54 +- allow gracenotes within chords +- discard unbalanced chord brackets (caused a syntax error previously) +- chords containing only rests caused an exception +- ABC-code like: B2 1 caused an exception, now interpreted as: B2/1 + +Version 53 +- tie to a chord with a single note caused an exception. +- a first line with a single measure and only a left bar line caused a syntax error. +- a first line with a single measure with only chords and no bar lines caused a syntax error. +- option -z or --mxl writes an additional compressed xml file with extention .mxl +- better treatment of barlines with spaces interspersed. +- dot after note right before barline caused syntax error (now only warning and skip) + +Version 52 +- accept and translate chords in a grace sequence (caused an exception previously) +- corrected bug when parsing %%score without space between voice number and left bracket, like %%staves 1(2 3) + +Version 51 +- treat abc file without any X: as a single tune and prepend X:1 +- recognize alto4 as alternative notation for tenor clef + +Version 50 +- corrected translation of the clef octave change notation (i.e. +8, -8 suffix) + +Version 49 +- ties between different pitches are converted to slurs (previously the tie in 'A-B A' would extend +to the second A, now there will be a slur between A and B). +- do not add accidentals to tied notes even when present in ABC code. +- use consistent voice numbering in XML when merging ABC-voices with overlays. + +Version 48 +- M: field with more than one slash caused an exception. +- limit shortest note durations to 1/64 +- skip redundant closing slurs (caused exception previously) +- check instrument id when generating MIDI elements (caused exception when errors in %%score) +- issue warning message when illegal user defined characters are used (X,Y,Z,x,y,z) +- use correct xml-syntax for credit elements (was wrong in version 47) +- translate explicit alterations for non traditional keys +(non traditional key signatures not supported by MuseScore, but note alterations are correct) +- skip more illegal symbols in stead of issuing parse error + +Version 47 +- translate middle= in clefs +- translate transpose= in clefs (also %%MIDI transpose is recognised) +- translate subname= in V: fields (name= was already translated) +- translate "%%MIDI program" and "%%MIDI channel" (multiple instruments per staff supported) +- an abc file with multiple tunes can be converted to a set of musicxml files. +- new command line option "-m SKIP NUM" to set the range of tunes to be converted for large abc files. +- leading dot was skipped when parsing a float in page format settings (.5in -> 5in) +- accept [r:] syntax for remarks (caused syntax error previously) +- relax syntax for user defined symbols (allowed character set was needlessly restricted) +- all abc meta-info fields are translated to credit tags (with type) + +Version 46 +- warn when unit length is a single integer and assume L:1/8 (caused exception previously) +- translate tune with only header fields (caused exception previously) +- translate tunes where first voice definition appears in the middle of the body (caused exception previously) +- skip illegal characters in chords and issue warning (caused syntax error previously) +- skip illegal U: field in body and issue warning (caused syntax error previously) +- more illegal characters between elements are skipped but with less specific warnings. +- line endings with only were not handled properly +- add check for text without any abc code in it (caused exception previously) +- conversion of pObj to string caused exception when it contained non latin-1 characters (e.g. in warning messages) + +Version 45 +- ignore old style continuation when next line is w: +- replace illegal durations by the nearest smaller legal one (caused exception previously) +- when multiple stave mappings keep only first one and issue warning +- accept start of next tune (X:) when not preceeded by an empty line (caused syntax error previously) +- warn when unit length is no fraction of integers and assume L:1/8 (caused exception previously) +- raise meaningful exception when the tune is empty (e.g. empty line after K:) +- broken rhythm involving a rest was not translated + +Version 44 +- translate volta brackets with quoted text (in addition to numbers and number ranges) +- when error in M: field assume 4/4. (caused exception previously) +- allow voice names with underscore +- bug in parsing %%score when no space between two stave groups, like: (v1 v2)(v3 v4) +- corrected merging of voices of unequal length (caused exception previously) +- user defined symbols with '()<>.' chars were not accepted +- when p was a user defined symbol also !p! was erroneously seen as used defined +- skip random text before tune starts (caused syntax error previously) + +Version 43 +- more non standard bar lines recognized (e.g. :]) +- accept X: fields when the tune number is followed by non-numeric chars. +- allow complex meter (M:2+2+3/8) +- warn for illegal L: fields (like L:1/6) and use 1/8 instead +- accept and skip E: field +- wedge close was not recognized when written with old '+' notation (bug was in fixSlurs) +- remove * and ** at end of line (probably old notation for right adjustment) +- accept and ignore a Q:-field with only text +- replace "Expected end of text at .." message from parser by "Syntax error at .." + +Version 42 +- translate tempo from Q: field +- translate -8 in key or voice field (temporary fix) +- accept empty text annotations +- in addition to !<(! also accept !crescendo(! and the same for diminuendo +- ignore line continuation after information field +- in lyrics treat isolated tildes as note skips +- encode parse error message in utf-8 and when that fails in latin-1 +- limit length of misplaced symbol message to 80 chars +- put text between K: and first V: into the header (was skipped previously) +- bug in %%score when no spaces around '|'-character +- added "o" as alternative for dim ("Edim7" = "Eo7") +- in chord symbols "+" and "-" were not always translated correctly +- tuple abbreviation (n:: and (n::m were not translated correctly +- dotted barlines translated (: .|) + +Version 41 +- translate (multi-)measure rests: Z, X +- discard misplaced broken symbols (no more exceptions) +- discard misplaced octave suffices (warning, no parse error) +- discard misplaced header fields (warning, no parse error after K:) +- show location of misplaced symbols +- only parse header if it is not empty (-> better error messages) +- accept score/stave-directives and page formatting also in body + +Version 40 +- when reading a file with multiple tunes only translate the first one +- command line option added for scale, page size and margins +- all %% directives are changed into I: fields +- translate scale, page size and margins from ABC directives, but command line overrides + +Version 39 +- accept and discard rests in chords +- accept and discard accidentals before rests +- spacer is treated as a normal note but discarded +- accept chords with only one note +- invisible rests (x) are translated as invisible in xml too. + +Version 38 +- also recognise ma and mi in chord symbols (e.g. Cma7, Cmi7) +- replace tildes in lyrics by spaces +- accept syllabi starting with a tilde +- accept space after V: 1 + +Version 37 +- accidental elements are written whenever an explicit accidentals are present in ABC (for finale, mscore does not need them) +- tuplet notation elements are written to mark begin and end of tuplet (for finale, mscore doet not need them) +- normal-type elements are written when tuplets are irregular +- issue understandable message when metre is not recognized +- take (compound) metre into account when translating tuplet shorthands (as described in ABC 2.1) +- do not add beaming-elements into rest-elements (still break beam when long rest in beamgroup). + +Version 36 +- volta-stop elements were somtimes (recognised at and) put into a left-bar element, which +is not valid MusicXML. These elements are now put into the right-bar of the previous measure. +- accept volta's without barline: "abc [1 def" is read as: "abc |[1 def" +- accept volta's after redundant left barline: "abc |\n|[1 def" is read as "abc |\n[1 def" +- changed document order of repeat and ending elements (MusicXML order requirement) +- xml output to stdout is also encoded in utf-8 (was erroneously done in latin-1) +- prevent finale detecting the string "xml" in the -tag (finale quirk) +- only issue a -tag when abc code really uses explicit linebreaks + +Version 35 +- recognise and translate (nested) braces and brackets in both %%staves and %%score +- translate more keys (maj, min, mix, dor, phr, lyd, loc and none) +- recognise and skip [I:...] in body +- invisible bar lines translated +- silently skip slur end when there is no corresponding slur begin +- also accept user defined symbols delimited by + +- limit length of syntax error message (in a way compatible with pyparsing internal administration) +- add tag, because Finale seems to expect it. + +Version 34 +- removed copy() from 2 parse expressions because it did not appear to work on Linux +- recognize, warn and skip misplaced accidentals and ties +- bug in old style continuation when there was nothing left to continue with. +- limit syntax error output to 80 chars, with error in the middle. + +Version 33 +- added !coda!, !segno!, !fine!, !dacoda!, !dalsegno! etc. +- move coda and segno when just before a barline to next measure +- added several ornaments and articulations. +- recognize (and skip) '<' and '>' position markers for text expressions +- fall back to text expression for unrecognized chord symbols. +- recognize (and skip) alternative chord symbols between brackets +- interpret expressions like "ABC" as text expression and not as misspelled chord symbol. +- beam detection improved (grammar) + +Version 32 +- grammar for both linebreaks ! and $ without ambiguities! +- remove commandline option for linebreak (also I:linebreak not needed anymore) +- accept both + and ! for deco's +- accept (and skip) empty fields +- accept ']' in all header fields +- strip leading and trailing spaces from header fields (T: title -> T:title) +- also translate inline fields before a left barline +- recognise volta after forward repeat +- translate dashes to comma's in volta ([1-3 -> [1,3) +- recognise slurs after broken rhythm symbols i.e. replace (ab>>)c -> (ab)>>c +- skip P: fields +- allow lines with a single measure without barline +- combine ~-syllabi (multiple syllabi under one note) +- skip W: lyrics because no musicXML equivalent +- translate longa and breve + +Version 31 +- bug in treatment of double-repeat when it is at the end of a voice-lyrics block +- added declaration because Finale seems to need it. +- added identification/encoding element so people can see what software made the file + +Version 30 +- voice overlays +- merging voices rewritten: lyrics renumbering, measurewise merging +- linebreak after left-bar is output directly (i.e. not postponed to the next measure) + +Version 29 +- implementation of beaming +- insert line breaks at start of next measure +- keep only one line break per measure when mapping multiple voices +- renumber slurs when clash occurs during voice mapping +- syntax error message is encoded in latin-1 +- bug in recognition of presence of open tie + +Version 28 +- all chord symbols were erroneously recognized as text annotations (wrong syntax priority) +- Recognize (and skip) redundant left side bars +- don't stumble on a double repeat (::) when it occurs at the and of a voice/lyrics block +- better error message when header fields appear after the first K: +- output of non latin-1 strings on stderr uses repr() + +Version 27 +- Initial release diff --git a/abc2xml/Readme.txt b/abc2xml/Readme.txt new file mode 100644 index 0000000..b8e0bb0 --- /dev/null +++ b/abc2xml/Readme.txt @@ -0,0 +1,76 @@ +---- abc2xml ---- + +abc2xml is a command line utility that translates ABC notation into MusicXML. + +In principle all elements from ABC are translated, but some translations are only partially +implemented. Translated are: +- multiple lyric lines per voice +- mapping voices to staves, brackets and braces (%%score or %%staves) +- voice overlays (only single &-overlays, no multiple &&-overlays yet) +- dynamics, slurs, several decorations (ties between different voices are not possible in MusicXML +and are changed to slurs when present in ABC. Also (illegal) ties between different pitches are converted +to slurs. In both cases a warning message is issued) +- grace notes, tuplets +- keys (maj, min, mix, dor, phr, lyd, loc and none), meter, tempo +- clefs (only most common clef names, tranposition= and middle= are supported) +- jazz chord symbols and text annotations +- beaming. Only one beam per abc-beam group is translated at the moment, which is +sufficient for MuseScore. In musicXML every beam should be explicitly notated, so a 32th +note should start 3 beams. +- scale, page size and page margins are recognized as either %% or I: directive. The scale value is +the distance between two stafflines in millimeters. The other values are also in millimeters unless +they are followed by a unit (cm,in,pt). +- %%MIDI program and %%MIDI channel (or I:MIDI ..) are translated when used in a current voice +(i.e. after a V: definition). The instrument of a voice cannot be changed in the middle of a tune. +If one does so, only the last %%MIDI will be used for the whole voice. (and the earlier settings are +discarded). %%MIDI transpose is translated and has the same effect as transpose= in the clef, i.e. +only play back is transposed, not notation. In %%MIDI program=num, the number should be between 0 +and 127. Also in %%MIDI channel=num, the number is starts from zero. The midi translation supports +mapping multiple voices whith different instruments to one stave. (i.e. the resulting xml part will +have multiple instruments). This feature, though present in MusicXML is not supported by MuseScore, +nor by Finale Notepad. These programs only allow one instrument per stave. +- multiple tunes within one abc file can be converted to a set of xml files, one file per tune. + +In conjunction with xml2abc the translation from xml -> abc -> xml works for all examples +in the set from MakeMusic. The translated examples produce reasonable score when typeset with MuseScore. + +---- Usage: ---- + +When you have Python installed: +> python abc2xml.py [-h] [-m SKIP NUM] [-o DIR] [-p PFMT] [-z MODE] file1 [file2 ...] + +When you use the Win32 executable: +> abc2xml.exe [-h] [-m SKIP NUM] [-o DIR] [-p PFMT] [-z MODE] file1 [file2 ...] + +Translates all .abc files in the file list to MusicXML. Output goes to stdout unless the -o option +is given. Wildcards in file names are expanded. +Option -h prints help message with explanation of the options +Option -m skip num skips skip tunes and then reads at most num tunes. +Can be used when abc files contain multiple tunes (tune books) to select only a subset of the tunes. +The default skips nothing (skip=0) and reads 1 tune (num=1). +Option -o dir translates every .abc file to a separate .xml file with the same name +into directory dir. For example, -o. puts all xml files into the same directory where +the input files reside. +Option -p fmt sets the page format of the ouput. fmt should be a string with 7 float +values sepatated by comma's without any spaces. The values are: space, page-height, -width, and +page margin left, -right, -top, -bottom. space defines the scale of the whole score and equals the +distance between two staff lines in mm. When the -p option is omitted the values default to A4 with +10mm margins and space=1.764. All commandline values are in millimeters. +Option -z mode or --mxl mode writes compressed xml files with extention .mxl. +If mode is a or add both .xml and .mxl files will be written. If mode is r or replace only .mxl +files are written. + +---- Download ---- + +The python script: abc2xml.py-58.zip +http://wim.vree.org/svgParse/abc2xml.py-58.zip + +Stand alone win32 executable: abc2xml.exe-58.zip +http://wim.vree.org/svgParse/abc2xml.exe-58.zip + +---- ABC Syntax ---- + +ABC is recognized by a high level parser that reads the ABC syntax in a notation close to (E)BNF. +The drawback of this approach is that many dialects of ABC will cause sytax errors. +In addition, the high level parser implementation in pure python (pyparsing.py) is slow. +The pyparsing library is included (as a single python file) in abc2xml.py-58.zip \ No newline at end of file diff --git a/abc2xml/abc2xml.py b/abc2xml/abc2xml.py new file mode 100644 index 0000000..c4bfc32 --- /dev/null +++ b/abc2xml/abc2xml.py @@ -0,0 +1,1747 @@ +# coding=latin-1 +''' +Copyright (C) 2012: Willem G. Vree +Contributions: Nils Liberg, Nicolas Froment, Norman Schmidt, Reinier Maliepaard, Martin Tarenskeen + +This program is free software; you can redistribute it and/or modify it under the terms of the +GNU General Public License as published by the Free Software Foundation; either version 2 of +the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. . +''' + +from pyparsing import Word, OneOrMore, Optional, Literal, NotAny, MatchFirst +from pyparsing import Group, oneOf, Suppress, ZeroOrMore, Combine, FollowedBy +from pyparsing import srange, CharsNotIn, StringEnd, LineEnd, White, Regex +from pyparsing import nums, alphas, alphanums, ParseException, Forward +try: import xml.etree.cElementTree as E +except: import xml.etree.ElementTree as E +import types, sys, os, re, datetime + +VERSION = 58 + +def info (s, warn=1): + x = (warn and '-- ' or '') + s + try: sys.stderr.write (x + '\n') + except: sys.stderr.write (repr (x) + '\n') + +def abc_grammar (): # header, voice and lyrics grammar for ABC + b1 = Word (u"-,'<>\u2019#", exact=1) # catch misplaced chars in chords + + #----------------------------------------------------------------- + # ABC header (fld_text elements are matched later with reg. epr's) + #----------------------------------------------------------------- + + number = Word (nums).setParseAction (lambda t: int (t[0])) + field_str = Regex (r'(?:\\.|[^]\\])*') # match anything until end of field, skip escaped \] + field_str.setParseAction (lambda t: t[0].strip ()) # and strip spacing + + userdef_symbol = Word (srange ('[H-Wh-w~]'), exact=1) + fieldId = oneOf ('K L M Q P I T C O A Z N G H R B D F S E r') # info fields + X_field = Literal ('X') + Suppress (':') + number + field_str + U_field = Literal ('U') + Suppress (':') + userdef_symbol + Suppress ('=') + field_str + V_field = Literal ('V') + Suppress (':') + Word (alphanums + '_') + field_str + inf_fld = fieldId + Suppress (':') + field_str + ifield = Suppress ('[') + (X_field | U_field | V_field | inf_fld) + Suppress (']') + abc_header = OneOrMore (ifield) + StringEnd () + + #--------------------------------------------------------------------------------- + # I:score with recursive part groups and {* grand staff marker + #--------------------------------------------------------------------------------- + + voiceId = Suppress (Optional ('*')) + Word (alphanums + '_') + voice_gr = Suppress ('(') + OneOrMore (voiceId | Suppress ('|')) + Suppress (')') + simple_part = voiceId | voice_gr | Suppress ('|') + grand_staff = oneOf ('{* {') + OneOrMore (simple_part) + Suppress ('}') + part = Forward () + part_seq = OneOrMore (part | Suppress ('|')) + brace_gr = Suppress ('{') + part_seq + Suppress ('}') + bracket_gr = Suppress ('[') + part_seq + Suppress ('\]') # closing brackets are escaped by splitHeaderVoices + part << MatchFirst (simple_part | grand_staff | brace_gr | bracket_gr | Suppress ('|')) + abc_scoredef = Suppress (oneOf ('staves score')) + OneOrMore (part) + + #--------------------------------------------------------------------------------- + # ABC voice (not white space sensitive, beams detected in note/rest parse actions) + #--------------------------------------------------------------------------------- + + inline_field = Suppress ('[') + (inf_fld | U_field | V_field) + Suppress (']') + + note_length = Optional (number, 1) + Group (ZeroOrMore ('/')) + Optional (number, 2) + octaveHigh = OneOrMore ("'").setParseAction (lambda t: len(t)) + octaveLow = OneOrMore (',').setParseAction (lambda t: -len(t)) + octave = octaveHigh | octaveLow + + basenote = oneOf ('C D E F G A B c d e f g a b y') # includes spacer for parse efficiency + accidental = oneOf ('^^ __ ^ _ =') + rest_sym = oneOf ('x X z Z') + slur_beg = oneOf ('( .(') + ~Word (nums) # no tuplet_start + slur_ends = OneOrMore (oneOf (') .)')) + + long_decoration = Combine (oneOf ('! +') + CharsNotIn ('!+ \n') + oneOf ('! +')) + staccato = Literal ('.') + ~Literal ('|') # avoid dotted barline + decoration = staccato | userdef_symbol | long_decoration | slur_beg + decorations = OneOrMore (decoration) + staff_decos = decorations + ~oneOf (': | [|] []') + + tie = oneOf ('.- -') + rest = Optional (accidental) + rest_sym + note_length + pitch = Optional (accidental) + basenote + Optional (octave, 0) + note = pitch + note_length + Optional (tie) + Optional (slur_ends) + chord_note = Optional (decorations) + pitch + note_length + Optional (tie) + Optional (slur_ends) + chord_notes = OneOrMore (chord_note | rest | b1) + grace_notes = Forward () + chord = Suppress ('[') + OneOrMore (chord_notes | grace_notes) + Suppress (']') + note_length + Optional (tie) + Optional (slur_ends) + stem = note | chord | rest + + broken = Combine (OneOrMore ('<') | OneOrMore ('>')) + + tuplet_num = Suppress ('(') + number + tuplet_into = Suppress (':') + Optional (number, 0) + tuplet_notes = Suppress (':') + Optional (number, 0) + tuplet_start = tuplet_num + Optional (tuplet_into + Optional (tuplet_notes)) + + acciaccatura = Literal ('/') + grace_stem = Optional (decorations) + stem + grace_notes << Group (Suppress ('{') + Optional (acciaccatura) + OneOrMore (grace_stem) + Suppress ('}')) + + text_expression = Optional (oneOf ('^ _ < > @'), '^') + Optional (CharsNotIn ('"'), "") + chord_accidental = oneOf ('# b =') + triad = oneOf ('ma Maj maj M mi min m aug dim o + -') + seventh = oneOf ('7 ma7 Maj7 M7 maj7 mi7 m7 dim7 o7 -7 aug7 +7 m7b5 mi7b5') + sixth = oneOf ('6 ma6 M6 m6 mi6') + ninth = oneOf ('9 ma9 M9 maj9 Maj9 mi9 m9') + elevn = oneOf ('11 ma11 M11 maj11 Maj11 mi m11') + suspended = oneOf ('sus sus2 sus4') + chord_degree = Combine (Optional (chord_accidental) + oneOf ('2 4 5 6 7 9 11 13')) + chord_kind = Optional (seventh | sixth | ninth | elevn | triad, '_') + Optional (suspended) + chord_root = oneOf ('C D E F G A B') + Optional (chord_accidental) + chord_bass = oneOf ('C D E F G A B') + Optional (chord_accidental) # needs a different parse action + chordsym = chord_root + chord_kind + ZeroOrMore (chord_degree) + Optional (Suppress ('/') + chord_bass) + chord_sym = chordsym + Optional (Literal ('(') + CharsNotIn (')') + Literal (')')).suppress () + chord_or_text = Suppress ('"') + (chord_sym ^ text_expression) + Suppress ('"') + + volta_nums = Optional ('[').suppress () + Combine (Word (nums) + ZeroOrMore (oneOf (', -') + Word (nums))) + volta_text = Literal ('[').suppress () + Regex (r'"[^"]+"') + volta = volta_nums | volta_text + invisible_barline = oneOf ('[|] []') + dashed_barline = oneOf (': .|') + double_rep = Literal (':') + FollowedBy (':') # otherwise ambiguity with dashed barline + voice_overlay = Combine (OneOrMore ('&')) + bare_volta = FollowedBy (Literal ('[') + Word (nums)) # no barline, but volta follows (volta is parsed in next measure) + bar_left = (oneOf ('[|: |: [: :') + Optional (volta)) | Optional ('|').suppress () + volta | oneOf ('| [|') + bars = ZeroOrMore (':') + ZeroOrMore ('[') + OneOrMore (oneOf ('| ]')) + bar_right = Optional (decorations) + (invisible_barline | double_rep | Combine (bars) | dashed_barline | voice_overlay | bare_volta) + + errors = ~bar_right + Optional (Word (' \n')) + CharsNotIn (':&|', exact=1) + linebreak = Literal ('$') | ~decorations + Literal ('!') # no need for I:linebreak !!! + element = inline_field | broken | staff_decos | stem | chord_or_text | grace_notes | tuplet_start | linebreak | errors + measure = Group (ZeroOrMore (inline_field) + Optional (bar_left) + ZeroOrMore (element) + bar_right + Optional (linebreak)) + noBarMeasure = Group (ZeroOrMore (inline_field) + Optional (bar_left) + OneOrMore (element) + Optional (linebreak)) + abc_voice = ZeroOrMore (measure) + Optional (noBarMeasure | Group (bar_left)) + ZeroOrMore (inline_field).suppress () + StringEnd () + + #---------------------------------------- + # ABC lyric lines (white space sensitive) + #---------------------------------------- + + skip_note = oneOf ('* - ~') + extend_note = Literal ('_') + measure_end = Literal ('|') + syl_chars = CharsNotIn ('*~-_| \t\n') + white = Word (' \t') + syllable = Combine (Optional ('~') + syl_chars + ZeroOrMore (Literal ('~') + syl_chars)) + Optional ('-') + lyr_elem = (syllable | skip_note | extend_note | measure_end) + Optional (white).suppress () + lyr_head = (Literal ('w:') + Optional (white)).suppress () + lyr_line = Group (lyr_head + ZeroOrMore (lyr_elem) + LineEnd ().suppress ()) + + #---------------------------------------------------------------- + # Parse actions to convert all relevant results into an abstract + # syntax tree where all tree nodes are instances of pObj + #---------------------------------------------------------------- + + ifield.setParseAction (lambda t: pObj ('field', t)) + grand_staff.setParseAction (lambda t: pObj ('grand', t, 1)) # 1 = keep ordered list of results + brace_gr.setParseAction (lambda t: pObj ('bracegr', t, 1)) + bracket_gr.setParseAction (lambda t: pObj ('bracketgr', t, 1)) + voice_gr.setParseAction (lambda t: pObj ('voicegr', t, 1)) + voiceId.setParseAction (lambda t: pObj ('vid', t, 1)) + abc_scoredef.setParseAction (lambda t: pObj ('score', t, 1)) + note_length.setParseAction (lambda t: pObj ('dur', (t[0], (t[2] << len (t[1])) >> 1))) + chordsym.setParseAction (lambda t: pObj ('chordsym', t)) + chord_root.setParseAction (lambda t: pObj ('root', t)) + chord_kind.setParseAction (lambda t: pObj ('kind', t)) + chord_degree.setParseAction (lambda t: pObj ('degree', t)) + chord_bass.setParseAction (lambda t: pObj ('bass', t)) + text_expression.setParseAction (lambda t: pObj ('text', t)) + inline_field.setParseAction (lambda t: pObj ('inline', t)) + grace_notes.setParseAction (doGrace) # (lambda t: pObj ('grace', t)) + acciaccatura.setParseAction (lambda t: pObj ('accia', t)) + note.setParseAction (noteActn) + chord_note.setParseAction (noteActn) + rest.setParseAction (restActn) + decorations.setParseAction (lambda t: pObj ('deco', t)) + slur_ends.setParseAction (lambda t: pObj ('slurs', t)) + chord.setParseAction (lambda t: pObj ('chord', t)) + tie.setParseAction (lambda t: pObj ('tie', t)) + pitch.setParseAction (lambda t: pObj ('pitch', t)) + bare_volta.setParseAction (lambda t: ['|']) # return barline that user forgot + dashed_barline.setParseAction (lambda t: ['.|']) + bar_right.setParseAction (lambda t: pObj ('rbar', t)) + bar_left.setParseAction (lambda t: pObj ('lbar', t)) + broken.setParseAction (lambda t: pObj ('broken', t)) + tuplet_start.setParseAction (lambda t: pObj ('tup', t)) + linebreak.setParseAction (lambda t: pObj ('linebrk', t)) + measure.setParseAction (doMaat) + noBarMeasure.setParseAction (doMaat) + syllable.setParseAction (lambda t: pObj ('syl', t)) + skip_note.setParseAction (lambda t: pObj ('skip', t)) + extend_note.setParseAction (lambda t: pObj ('ext', t)) + measure_end.setParseAction (lambda t: pObj ('sbar', t)) + b1.setParseAction (errorWarn) + errors.setParseAction (errorWarn) + lyr_block = OneOrMore (lyr_line).leaveWhitespace () # after leaveWhiteSpace no more parse actions can be set!! + + return abc_header, abc_voice, lyr_block, abc_scoredef + +class pObj (object): # every relevant parse result is converted into a pObj + def __init__ (s, name, t, seq=0): # t = list of nested parse results + s.name = name # name uniqueliy identifies this pObj + rest = [] # collect parse results that are not a pObj + attrs = {} # new attributes + for x in t: # nested pObj's become attributes of this pObj + if type (x) == pObj: + attrs [x.name] = attrs.get (x.name, []) + [x] + else: + rest.append (x) # collect non-pObj's (mostly literals) + for name, xs in attrs.items (): + if len (xs) == 1: xs = xs[0] # only list if more then one pObj + setattr (s, name, xs) # create the new attributes + s.t = rest # all nested non-pObj's (mostly literals) + s.objs = seq and t or [] # for nested ordered (lyric) pObj's + + def __repr__ (s): # make a nice string representation of a pObj + r = [] + for nm in dir (s): + if nm.startswith ('_'): continue # skip build in attributes + elif nm == 'name': continue # redundant + else: + x = getattr (s, nm) + if not x: continue # s.t may be empty (list of non-pObj's) + if type (x) == types.ListType: r.extend (x) + else: r.append (x) + xs = [] + for x in r: # recursively call __repr__ and convert all strings to latin-1 + if isinstance (x, types.StringTypes): + try: xs.append (x.encode ('latin-1')) + except: xs.append (repr (x)) # string -> no recursion + else: xs.append (repr (x)) # pObj -> recursive call + return '(' + s.name + ' ' +','.join (xs) + ')' + +global prevloc # global to remember previous match position of a note/rest +prevloc = 0 +def detectBeamBreak (line, loc, t): + global prevloc # location in string 'line' of previous note match + xs = line[prevloc:loc+1] # string between previous and current note match + xs = xs.lstrip () # first note match starts on a space! + prevloc = loc # location in string 'line' of current note match + b = pObj ('bbrk', [' ' in xs]) # space somewhere between two notes -> beambreak + t.insert (0, b) # insert beambreak as a nested parse result + +def noteActn (line, loc, t): # detect beambreak between previous and current note/rest + if 'y' in t[0].t: return [] # discard spacer + detectBeamBreak (line, loc, t) # adds beambreak to parse result t as side effect + return pObj ('note', t) + +def restActn (line, loc, t): # detect beambreak between previous and current note/rest + detectBeamBreak (line, loc, t) # adds beambreak to parse result t as side effect + return pObj ('rest', t) + +def errorWarn (line, loc, t): # warning for misplaced symbols and skip them + info ('**misplaced symbol: %s' % t[0], warn=0) + lineCopy = line [:] + if loc > 40: + lineCopy = line [loc - 40: loc + 40] + loc = 40 + info (lineCopy.replace ('\n', ' '), warn=0) + info (loc * '-' + '^', warn=0) + return [] + +#------------------------------------------------------------- +# transformations of a measure (called by parse action doMaat) +#------------------------------------------------------------- + +def simplify (a, b): # divide a and b by their greatest common divisor + x, y = a, b + while b: a, b = b, a % b + return x / a, y / a + +def doBroken (prev, brk, x): + if not prev: info ('error in broken rhythm: %s' % x); return # no changes + nom1, den1 = prev.dur.t # duration of first note/chord + nom2, den2 = x.dur.t # duration of second note/chord + if brk == '>': + nom1, den1 = simplify (3 * nom1, 2 * den1) + nom2, den2 = simplify (1 * nom2, 2 * den2) + elif brk == '<': + nom1, den1 = simplify (1 * nom1, 2 * den1) + nom2, den2 = simplify (3 * nom2, 2 * den2) + elif brk == '>>': + nom1, den1 = simplify (7 * nom1, 4 * den1) + nom2, den2 = simplify (1 * nom2, 4 * den2) + elif brk == '<<': + nom1, den1 = simplify (1 * nom1, 4 * den1) + nom2, den2 = simplify (7 * nom2, 4 * den2) + else: return # give up + prev.dur.t = nom1, den1 # change duration of previous note/chord + x.dur.t = nom2, den2 # and current note/chord + +def convertBroken (t): # convert broken rhythms to normal note durations + prev = None # the last note/chord before the broken symbol + brk = '' # the broken symbol + remove = [] # indexes to broken symbols (to be deleted) in measure + for i, x in enumerate (t): # scan all elements in measure + if x.name == 'note' or x.name == 'chord' or x.name == 'rest': + if brk: # a broken symbol was encountered before + doBroken (prev, brk, x) # change duration previous note/chord/rest and current one + brk = '' + else: + prev = x # remember the last note/chord/rest + elif x.name == 'broken': + brk = x.t[0] # remember the broken symbol (=string) + remove.insert (0, i) # and its index, highest index first + for i in remove: del t[i] # delete broken symbols from high to low + +def convertChord (t): # convert chord to sequence of notes in musicXml-style + ins = [] + for i, x in enumerate (t): + if x.name == 'chord': + if hasattr (x, 'rest') and not hasattr (x, 'note'): # chords containing only rests + if type (x.rest) == types.ListType: x.rest = x.rest[0] # more rests == one rest + ins.insert (0, (i, [x.rest])) # just output a single rest, no chord + continue + num1, den1 = x.dur.t # chord duration + tie = getattr (x, 'tie', None) # chord tie + slurs = getattr (x, 'slurs', []) # slur endings + deco = getattr (x, 'deco', []) # chord decorations + if type (x.note) != types.ListType: x.note = [x.note] # when chord has only one note ... + for j, nt in enumerate (x.note): # all notes of the chord + num2, den2 = nt.dur.t # note duration * chord duration + nt.dur.t = simplify (num1 * num2, den1 * den2) + if tie: nt.tie = tie # tie on all chord notes + if j == 0 and deco: nt.deco = deco # decorations only on first chord note + if j == 0 and slurs: nt.slurs = slurs # slur endings only on first chord note + if j > 0: nt.chord = pObj ('chord', [1]) # label all but first as chord notes + else: # remember all pitches of the chord in the first note + pitches = [n.pitch for n in x.note] # to implement conversion of erroneous ties to slurs + nt.pitches = pObj ('pitches', pitches) + ins.insert (0, (i, x.note)) # high index first + for i, notes in ins: # insert from high to low + for nt in reversed (notes): + t.insert (i+1, nt) # insert chord notes after chord + del t[i] # remove chord itself + +def doMaat (t): # t is a Group() result -> the measure is in t[0] + convertBroken (t[0]) # remove all broken rhythms and convert to normal durations + convertChord (t[0]) # replace chords by note sequences in musicXML style + +def doGrace (t): # t is a Group() result -> the grace sequence is in t[0] + convertChord (t[0]) # a grace sequence may have chords + for nt in t[0]: # flag all notes within the grace sequence + if nt.name == 'note': nt.grace = 1 # set grace attribute + return t[0] # ungroup the parse result +#-------------------- +# musicXML generation +#---------------------------------- + +def compChordTab (): # avoid some typing work: returns mapping constant {ABC chordsyms -> musicXML kind} + maj, min, aug, dim, dom, ch7, ch6, ch9, ch11, hd = 'major minor augmented diminished dominant -seventh -sixth -ninth -11th half-diminished'.split () + triad = zip ('ma Maj maj M mi min m aug dim o + -'.split (), [maj, maj, maj, maj, min, min, min, aug, dim, dim, aug, min]) + seventh = zip ('7 ma7 Maj7 M7 maj7 mi7 m7 dim7 o7 -7 aug7 +7 m7b5 mi7b5'.split (), + [dom, maj+ch7, maj+ch7, maj+ch7, maj+ch7, min+ch7, min+ch7, dim+ch7, dim+ch7, min+ch7, aug+ch7, aug+ch7, hd, hd]) + sixth = zip ('6 ma6 M6 mi6 m6'.split (), [maj+ch6, maj+ch6, maj+ch6, min+ch6, min+ch6]) + ninth = zip ('9 ma9 M9 maj9 Maj9 mi9 m9'.split (), [dom+ch9, maj+ch9, maj+ch9, maj+ch9, maj+ch9, min+ch9, min+ch9]) + elevn = zip ('11 ma11 M11 maj11 Maj11 mi11 m11'.split (), [dom+ch11, maj+ch11, maj+ch11, maj+ch11, maj+ch11, min+ch11, min+ch11]) + return dict (triad + seventh + sixth + ninth + elevn) + +def addElem (parent, child, level): + indent = 2 + chldrn = parent.getchildren () + if chldrn: + chldrn[-1].tail += indent * ' ' + else: + parent.text = '\n' + level * indent * ' ' + parent.append (child) + child.tail = '\n' + (level-1) * indent * ' ' + +def addElemT (parent, tag, text, level): + e = E.Element (tag) + e.text = text + addElem (parent, e, level) + +def mkTmod (tmnum, tmden, lev): + tmod = E.Element ('time-modification') + addElemT (tmod, 'actual-notes', str (tmnum), lev + 1) + addElemT (tmod, 'normal-notes', str (tmden), lev + 1) + return tmod + +def addDirection (parent, elem, lev, gstaff, subelms=[], placement='below', cue_on=0): + dir = E.Element ('direction', placement=placement) + addElem (parent, dir, lev) + typ = E.Element ('direction-type') + addElem (dir, typ, lev + 1) + addElem (typ, elem, lev + 2) + for subel in subelms: addElem (elem, subel, lev + 3) + if cue_on: addElem (dir, E.Element ('level', size='cue'), lev + 1) + if gstaff: addElemT (dir, 'staff', str (gstaff), lev + 1) + return dir + +def removeElems (root_elem, parent_str, elem_str): + for p in root_elem.findall (parent_str): + e = p.find (elem_str) + if e != None: p.remove (e) + +def alignLyr (vce, lyrs): + empty_el = pObj ('leeg', '*') + for k, lyr in enumerate (lyrs): # lyr = one full line of lyrics + i = 0 # syl counter + for msre in vce: # reiterate the voice block for each lyrics line + for elem in msre: + if elem.name == 'note' and not (hasattr (elem, 'chord') or hasattr (elem, 'grace')): + if i >= len (lyr): lr = empty_el + else: lr = lyr [i] + elem.objs.append (lr) + if lr.name != 'sbar': i += 1 + if i < len (lyr) and lyr[i].name == 'sbar': i += 1 + return vce + +slur_move = re.compile (r'(?<][<>]?)(\)+)') # (? I: + x2 = r1.sub ('', x) # remove comment + while x2.endswith ('*'): x2 = x2[:-1] # remove old syntax for right adjusting + if not x2: continue # empty line + if x2[:2] == 'W:': continue # skip W: lyrics + if x2[:2] == 'w:' and xs[-1][-1] == '\\': + xs[-1] = xs[-1][:-1] # ignore line continuation before lyrics line + ro = r2.match (x2) + if ro: # field -> inline_field, escape all ']' + if x2[-1] == '\\': x2 = x2[:-1] # ignore continuation after field line + x2 = '[' + x2.replace (']',r'\]') + ']' + if x2[:2] == '+:': # new style continuation + xs[-1] += x2[2:] + elif xs and xs[-1][-1] == '\\': # old style continuation + xs[-1] = xs[-1][:-1] + x2 + else: # skip lines (except I:) until first X: + if x.startswith ('X:'): + if nx == 1: break # second tune starts without an empty line !! + nx = 1 # start of first tune + if nx == 1 or x.startswith ('I:'): + xs.append (x2) + if xs and xs[-1][-1] == '\\': # nothing left to continue with, remove last continuation + xs[-1] = xs[-1][:-1] + + r1 = re.compile (r'\[[A-Z]:(\\.|[^]\\])*\]') # inline field with escaped ']' + r2 = re.compile (r'\[K:') # start of K: field + r3 = re.compile (r'\[V:|\[I:MIDI') # start of V: field or midi field + fields, voices, b = [], [], 0 + for i, x in enumerate (xs): + n = len (r1.sub ('', x)) # remove all inline fields + if n > 0: b = 1; break # real abc present -> end of header + if r2.search (x): # start of K: field + fields.append (x) + i += 1; b = 1 + break # first K: field -> end of header + if r3.search (x): # start of V: field + voices.append (x) + else: + fields.append (x) + if b: voices += xs[i:] + else: voices += [] # tune has only header fields + header = '\n'.join (fields) + abctext = '\n'.join (voices) + + xs = abctext.split ('[V:') + if len (xs) == 1: abctext = '[V:1]' + abctext # abc has no voice defs at all + elif r1.sub ('', xs[0]).strip (): # remove inline fields from starting text, if any + abctext = '[V:1]' + abctext # abc with voices has no V: at start + + r1 = re.compile (r'\[V:\s*(\S*)[ \]]') # get voice id from V: field (skip spaces betwee V: and ID) + vmap = {} # {voice id -> [voice abc string]} + vorder = {} # mark document order of voices + xs = re.split (r'(\[V:[^]]*\])', abctext) # split on every V-field (V-fields included in split result list) + if len (xs) == 1: raise (Exception ('bugs ...')) + else: + header += xs[0] # xs[0] = text between K: and first V:, normally empty, but we put it in the header + i = 1 + while i < len (xs): # xs = ['', V-field, voice abc, V-field, voice abc, ...] + vce, abc = xs[i:i+2] + id = r1.search (vce).group (1) # get voice ID from V-field + vmap[id] = vmap.get (id, []) + [vce, abc] # collect abc-text for each voice id (include V-fields) + if id not in vorder: vorder [id] = i # store document order of first occurrence of voice id + i += 2 + voices = [] + ixs = sorted ([(i, id) for id, i in vorder.items ()]) # restore document order of voices + for i, id in ixs: + voice = ''.join (vmap [id]) # all abc of one voice + xs = re.split (r'((?:\nw:[^\n]*)+)', voice) # split voice into voice-lyrics blocks + if len (xs) == 1: # no lyrics + voice = fixSlurs (xs[0]) # put slurs right after the notes + vce_lyr = [[voice, '']] + else: + if xs[-1].strip () != '': xs.append ('w:') # last block had no lyrics + vce_lyr = [] # [[voice, lyrics],[],...] list of voice-lyrics blocks + for k in range (0, len (xs) - 1, 2): + voice, lyrics = xs [k:k+2] + voice = fixSlurs (voice) # put slurs right after the notes + vce_lyr.append ((voice, lyrics)) + voices.append ((id, vce_lyr)) + return header, voices + +def mergeMeasure (m1, m2, slur_offset, voice_offset, is_grand=0): + slurs = m2.findall ('note/notations/slur') + for slr in slurs: + slrnum = int (slr.get ('number')) + slur_offset + slr.set ('number', str (slrnum)) # make unique slurnums in m2 + vs = m2.findall ('note/voice') # set all voice number elements in m2 + for v in vs: v.text = str (voice_offset + int (v.text)) + ls = m1.findall ('note/lyric') # all lyric elements in m1 + lnum_max = max ([int (l.get ('number')) for l in ls] + [0]) # highest lyric number in m1 + ls = m2.findall ('note/lyric') # update lyric elements in m2 + for el in ls: + n = int (el.get ('number')) + el.set ('number', str (n + lnum_max)) + ns = m1.findall ('note') # determine the total duration of m1, subtract all backups + dur1 = sum (int (n.find ('duration').text) for n in ns + if n.find ('grace') == None and n.find ('chord') == None) + dur1 -= sum (int (b.text) for b in m1.findall ('backup/duration')) + nns, es = 0, [] # nns = number of real notes in m2 + for e in m2.getchildren (): # scan all elements of m2 + if e.tag == 'attributes': + if not is_grand: continue # no attribute merging for normal voices + else: nns += 1 # but we do merge (clef) attributes for a grand staff + if e.tag == 'print': continue + if e.tag == 'note' and (mxm.gmwr or e.find ('rest') == None): nns += 1 + es.append (e) # buffer elements to be merged + if nns > 0: # only merge if m2 contains any real notes + if dur1 > 0: # only insert backup if duration of m1 > 0 + b = E.Element ('backup') + addElem (m1, b, level=3) + addElemT (b, 'duration', str (dur1), level=4) + for e in es: addElem (m1, e, level=3) # merge buffered elements of m2 + +def mergePartList (parts, is_grand=0): # merge parts, make grand staff when is_grand true + + def delAttrs (part): # for the time being we only keep clef attributes + xs = [(m, e) for m in part.findall ('measure') for e in m.findall ('attributes')] + for m, e in xs: + for c in e.getchildren (): + if c.tag == 'clef': continue # keep clef attribute + e.remove (c) # delete all other attrinutes for higher staff numbers + if len (e.getchildren ()) == 0: m.remove (e) # remove empty attributes element + + p1 = parts[0] + for p2 in parts[1:]: + if is_grand: delAttrs (p2) # delete all attributes except clef + for i in range (len (p1) + 1, len (p2) + 1): # second part longer than first one + maat = E.Element ('measure', number = str(i)) # append empty measures + addElem (p1, maat, 2) + slurs = p1.findall ('measure/note/notations/slur') # find highest slur num in first part + slur_max = max ([int (slr.get ('number')) for slr in slurs] + [0]) + vs = p1.findall ('measure/note/voice') # all voice number elements in first part + vnum_max = max ([int (v.text) for v in vs] + [0]) # highest voice number in first part + for im, m2 in enumerate (p2.findall ('measure')): # merge all measures of p2 into p1 + mergeMeasure (p1[im], m2, slur_max, vnum_max, is_grand) # may change slur numbers in p1 + return p1 + +def mergeParts (parts, vids, staves, is_grand=0): + if not staves: return parts, vids # no voice mapping + partsnew, vidsnew = [], [] + for voice_ids in staves: + pixs = [] + for vid in voice_ids: + if vid in vids: pixs.append (vids.index (vid)) + else: info ('score partname %s does not exist' % vid) + if pixs: + xparts = [parts[pix] for pix in pixs] + if len (xparts) > 1: mergedpart = mergePartList (xparts, is_grand) + else: mergedpart = xparts [0] + partsnew.append (mergedpart) + vidsnew.append (vids [pixs[0]]) + return partsnew, vidsnew + +def mergePartMeasure (part, msre, ovrlaynum): # merge msre into last measure of part, only for overlays + slurs = part.findall ('measure/note/notations/slur') # find highest slur num in part + slur_max = max ([int (slr.get ('number')) for slr in slurs] + [0]) + last_msre = part.getchildren ()[-1] # last measure in part + mergeMeasure (last_msre, msre, slur_max, ovrlaynum) # voice offset = s.overlayVNum + +def setFristVoiceNameFromGroup (vids, vdefs): # vids = [vid], vdef = {vid -> (name, subname, voicedef)} + vids = [v for v in vids if v in vdefs] # only consider defined voices + if not vids: return vdefs + vid0 = vids [0] # first vid of the group + _, _, vdef0 = vdefs [vid0] # keep de voice definition (vdef0) when renaming vid0 + for vid in vids: + nm, snm, vdef = vdefs [vid] + if nm: # first non empty name encountered will become + vdefs [vid0] = nm, snm, vdef0 # name of merged group == name of first voice in group (vid0) + break + return vdefs + +def mkGrand (p, vdefs): # transform parse subtree into list needed for s.grands + xs = [] + for i, x in enumerate (p.objs): # changing p.objs [i] alters the tree. changing x has no effect on the tree. + if type (x) == pObj: + us = mkGrand (x, vdefs) # first get transformation results of current pObj + if x.name == 'grand': # x.objs contains ordered list of nested parse results within x + vids = [y.objs[0] for y in x.objs[1:]] # the voice ids in the grand staff + nms = [vdefs [u][0] for u in vids if u in vdefs] # the names of those voices + accept = sum ([1 for nm in nms if nm]) == 1 # accept as grand staff when only one of the voices has a name + if accept or us[0] == '{*': + xs.append (us[1:]) # append voice ids as a list (discard first item '{' or '{*') + vdefs = setFristVoiceNameFromGroup (vids, vdefs) + p.objs [i] = x.objs[1] # replace voices by first one in the grand group (this modifies the parse tree) + else: + xs.extend (us[1:]) # extend current result with all voice ids of rejected grand staff + else: xs.extend (us) # extend current result with transformed pObj + else: xs.append (p.t[0]) # append the non pObj (== voice id string) + return xs + +def mkStaves (p, vdefs): # transform parse tree into list needed for s.staves + xs = [] + for i, x in enumerate (p.objs): # structure and comments identical to mkGrand + if type (x) == pObj: + us = mkStaves (x, vdefs) + if x.name == 'voicegr': + xs.append (us) + vids = [y.objs[0] for y in x.objs] + vdefs = setFristVoiceNameFromGroup (vids, vdefs) + p.objs [i] = x.objs[0] + else: + xs.extend (us) + else: + if p.t[0] not in '{*': xs.append (p.t[0]) + return xs + +def mkGroups (p): # transform parse tree into list needed for s.groups + xs = [] + for x in p.objs: + if type (x) == pObj: + if x.name == 'vid': xs.extend (mkGroups (x)) + elif x.name == 'bracketgr': xs.extend (['['] + mkGroups (x) + [']']) + elif x.name == 'bracegr': xs.extend (['{'] + mkGroups (x) + ['}']) + else: xs.extend (mkGroups (x) + ['}']) # x.name == 'grand' == rejected grand staff + else: + xs.append (p.t[0]) + return xs + +class MusicXml: + typeMap = {1:'long', 2:'breve', 4:'whole', 8:'half', 16:'quarter', 32:'eighth', 64:'16th', 128:'32nd', 256:'64th'} + dynaMap = {'p':1,'pp':1,'ppp':1,'f':1,'ff':1,'fff':1,'mp':1,'mf':1,'sfz':1} + wedgeMap = {'>(':1, '>)':1, '<(':1,'<)':1,'crescendo(':1,'crescendo)':1,'diminuendo(':1,'diminuendo)':1} + artMap = {'.':'staccato','>':'accent','accent':'accent','wedge':'staccatissimo','tenuto':'tenuto'} + ornMap = {'trill':'trill-mark','T':'trill-mark','turn':'turn','uppermordent':'inverted-mordent','lowermordent':'mordent', + 'pralltriller':'inverted-mordent','mordent':'mordent','turn':'turn','invertedturn':'inverted-turn'} + tecMap = {'upbow':'up-bow', 'downbow':'down-bow'} + capoMap = {'fine':('Fine','fine','yes'), 'D.S.':('D.S.','dalsegno','segno'), 'D.C.':('D.C.','dacapo','yes'),'dacapo':('D.C.','dacapo','yes'), + 'dacoda':('To Coda','tocoda','coda'), 'coda':('coda','coda','coda'), 'segno':('segno','segno','segno')} + sharpness = ['Fb', 'Cb','Gb','Db','Ab','Eb','Bb','F','C','G','D','A', 'E', 'B', 'F#','C#','G#','D#','A#','E#','B#'] + offTab = {'maj':8, 'm':11, 'min':11, 'mix':9, 'dor':10, 'phr':12, 'lyd':7, 'loc':13} + modTab = {'maj':'major', 'm':'minor', 'min':'minor', 'mix':'mixolydian', 'dor':'dorian', 'phr':'phrygian', 'lyd':'lydian', 'loc':'locrian'} + clefMap = { 'alto1':('C','1'), 'alto2':('C','2'), 'alto':('C','3'), 'alto4':('C','4'), 'tenor':('C','4'), + 'bass3':('F','3'), 'bass':('F','4'), 'treble':('G','2'), 'perc':('percussion',''), 'none':('','')} + clefLineMap = {'B':'treble', 'G':'alto1', 'E':'alto2', 'C':'alto', 'A':'tenor', 'F':'bass3', 'D':'bass'} + alterTab = {'=':'0', '_':'-1', '__':'-2', '^':'1', '^^':'2'} + accTab = {'=':'natural', '_':'flat', '__':'flat-flat', '^':'sharp', '^^':'sharp-sharp'} + chordTab = compChordTab () + uSyms = {'~':'roll', 'H':'fermata','L':'>','M':'lowermordent','O':'coda', + 'P':'uppermordent','S':'segno','T':'trill','u':'upbow','v':'downbow'} + pageFmtDef = [1.764,297,210,10,10,10,10] # the MuseScore page formatting defaults for A4 + creditTab = {'O':'origin', 'A':'area', 'Z':'transcription', 'N':'notes', 'G':'group', 'H':'history', 'R':'rhythm', + 'B':'book', 'D':'discography', 'F':'fileurl', 'S':'source'} + + def __init__ (s): + s.pageFmtCmd = [] # set by command line option -p + s.gmwr = 0 # set by command line option -r + s.reset () + def reset (s): + s.divisions = 120 # xml duration of 1/4 note + s.ties = {} # {abc pitch tuple -> alteration} for all open ties + s.slurstack = [] # stack of open slur numbers + s.slurbeg = 0 # number of slurs to start (when slurs are detected at element-level) + s.tmnum = 0 # time modification, numerator + s.tmden = 0 # time modification, denominator + s.ntup = 0 # number of tuplet notes remaining + s.tupnts = [] # all tuplet modifiers with corresp. durations: [(duration, modifier), ...] + s.irrtup = 0 # 1 if an irregular tuplet + s.ntype = '' # the normal-type of a tuplet (== duration type of a normal tuplet note) + s.unitL = (1, 8) # default unit length + s.unitLcur = (1, 8) # unit length of current voice + s.keyAlts = {} # alterations implied by key + s.msreAlts = {} # temporarily alterations + s.curVolta = '' # open volta bracket + s.slurstack = [] # stack of open slur numbers + s.title = '' # title of music + s.creator = {} # {creator-type -> creator string} + s.credits = {} # {credit-type -> string} + s.lyrdash = {} # {lyric number -> 1 if dash between syllables} + s.usrSyms = s.uSyms # user defined symbols + s.prevNote = None # xml element of previous beamed note to correct beams (start, continue) + s.grcbbrk = False # remember any bbrk in a grace sequence + s.linebrk = 0 # 1 if next measure should start with a line break + s.bardecos = [] # barline decorations (coda, segno) that go into the next measure (MuseScore deficiency!) + s.nextdecos = [] # decorations for the next note + s.prevmsre = None # the previous measure + s.supports_tag = 0 # issue supports-tag in xml file when abc uses explicit linebreaks + s.staveDefs = [] # collected %%staves or %%score instructions from score + s.staves = [] # staves = [[voice names to be merged into one stave]] + s.groups = [] # list of merged part names with interspersed {[ and }] + s.grands = [] # [[vid1, vid2, ..], ...] voiceIds to be merged in a grand staff + s.gStaffNums = {} # map each voice id in a grand staff to a staff number + s.gNstaves = {} # map each voice id in a grand staff to total number of staves + s.pageFmtAbc = [] # formatting from abc directives + s.mdur = (4,4) # duration of one measure + s.gtrans = 0 # octave transposition (by clef) + s.midprg = ['', ''] # MIDI channel nr, program nr for the current part + s.vid = '' # abc voice id for the current part + s.gcue_on = 0 # insert tag in each note + + def mkPitch (s, acc, note, oct, lev): + nUp = note.upper () + octnum = (4 if nUp == note else 5) + int (oct) + s.gtrans + pitch = E.Element ('pitch') + addElemT (pitch, 'step', nUp, lev + 1) + alter = '' + if (note, oct) in s.ties: + tied_alter, _, vnum = s.ties [(note,oct)] # vnum = overlay voice number when tie started + if vnum == s.overlayVnum: alter = tied_alter # tied note in the same overlay -> same alteration + elif acc: + s.msreAlts [(nUp, octnum)] = s.alterTab [acc] + alter = s.alterTab [acc] # explicit notated alteration + elif (nUp, octnum) in s.msreAlts: alter = s.msreAlts [(nUp, octnum)] # temporary alteration + elif nUp in s.keyAlts: alter = s.keyAlts [nUp] # alteration implied by the key + if alter: addElemT (pitch, 'alter', alter, lev + 1) + addElemT (pitch, 'octave', str (octnum), lev + 1) + return pitch, alter + + def mkNote (s, n, lev): + nnum, nden = n.dur.t # abc dutation of note + if nden == 0: nden = 1 # occurs with illegal ABC like: "A2 1". Now interpreted as A2/1 + num, den = simplify (nnum * s.unitLcur[0], nden * s.unitLcur[1]) # normalised with unit length + if den > 64: # limit denominator to 64 + num = int (round (64 * float (num) / den)) # scale note to num/64 + num, den = simplify (max ([num, 1]), 64) # smallest num == 1 + info ('duration too small: rounded to %d/%d' % (num, den)) + if n.name == 'rest' and ('Z' in n.t or 'X' in n.t): + num, den = s.mdur # duration of one measure + dvs = (4 * s.divisions * num) / den # divisions is xml-duration of 1/4 + rdvs = dvs # real duration (will be 0 for chord/grace) + num, den = simplify (num, den * 4) # scale by 1/4 for s.typeMap + ndot = 0 + if num == 3: ndot = 1; den = den / 2 # look for dotted notes + if num == 7: ndot = 2; den = den / 4 + nt = E.Element ('note') + if getattr (n, 'grace', ''): # a grace note (and possibly a chord note) + grace = E.Element ('grace') + if s.acciatura: grace.set ('slash', 'yes'); s.acciatura = 0 + addElem (nt, grace, lev + 1) + dvs = rdvs = 0 # no (real) duration for a grace note + if den <= 16: den = 32 # not longer than 1/8 for a grace note + if s.gcue_on: # insert cue tag + cue = E.Element ('cue') + addElem (nt, cue, lev + 1) + if getattr (n, 'chord', ''): # a chord note + chord = E.Element ('chord') + addElem (nt, chord, lev + 1) + rdvs = 0 # chord notes no real duration + if rdvs and s.ntup >= 0: s.ntup -= 1 # count tuplet notes only on non-chord, non grace notes (rdvs > 0) + if den not in s.typeMap: # take the nearest smaller legal duration + info ('illegal duration %d/%d' % (nnum, nden)) + den = min (x for x in s.typeMap.keys () if x > den) + xmltype = str (s.typeMap [den]) # xml needs the note type in addition to duration + acc, step, oct = '', 'C', '0' # abc-notated pitch elements (accidental, pitch step, octave) + alter = '' # xml alteration + if n.name == 'rest': + if 'x' in n.t or 'X' in n.t: nt.set ('print-object', 'no') + rest = E.Element ('rest') + addElem (nt, rest, lev + 1) + else: + p = n.pitch.t # get pitch elements from parsed tokens + if len (p) == 3: acc, step, oct = p + else: step, oct = p + pitch, alter = s.mkPitch (acc, step, oct, lev + 1) + addElem (nt, pitch, lev + 1) + if s.ntup >= 0: # modify duration for tuplet notes + dvs = dvs * s.tmden / s.tmnum + if dvs: addElemT (nt, 'duration', str (dvs), lev + 1) # skip when dvs == 0, requirement of musicXML + inst = E.Element ('instrument', id='I-'+s.vid) # instrument id for midi + if s.midprg != ['', '']: addElem (nt, inst, lev + 1) # only add when %%midi was present + addElemT (nt, 'voice', '1', lev + 1) # default voice, for merging later + addElemT (nt, 'type', xmltype, lev + 1) # add note type + for i in range (ndot): # add dots + dot = E.Element ('dot') + addElem (nt, dot, lev + 1) + ptup = (step, oct) # pitch tuple without alteration to check for ties + tstop = ptup in s.ties and s.ties[ptup][2] == s.overlayVnum # open tie on this pitch tuple in this overlay + if acc and not tstop: addElemT (nt, 'accidental', s.accTab [acc], lev + 1) # only add accidental if note not tied + tupnotation = '' # start/stop notation element for tuplets + if s.ntup >= 0: # add time modification element for tuplet notes + tmod = mkTmod (s.tmnum, s.tmden, lev + 1) + addElem (nt, tmod, lev + 1) + if s.ntup > 0 and not s.tupnts: tupnotation = 'start' + s.tupnts.append ((rdvs, tmod)) # remember all tuplet modifiers with corresp. durations + if s.ntup == 0: # last tuplet note (and possible chord notes there after) + if rdvs: tupnotation = 'stop' # only insert notation in the real note (rdvs > 0) + s.cmpNormType (rdvs, lev + 1) # compute and/or add normal-type elements (-> s.ntype) + gstaff = s.gStaffNums.get (s.vid, 0) # staff number of the current voice + if gstaff: addElemT (nt, 'staff', str (gstaff), lev + 1) + s.doBeams (n, nt, den, lev + 1) + s.doNotations (n, ptup, alter, tupnotation, tstop, nt, lev + 1) + if n.objs: s.doLyr (n, nt, lev + 1) + return nt + + def cmpNormType (s, rdvs, lev): # compute the normal-type of a tuplet (only needed for Finale) + if rdvs: # the last real tuplet note (chord notes can still follow afterwards with rdvs == 0) + durs = [dur for dur, tmod in s.tupnts if dur > 0] + ndur = sum (durs) / s.tmnum # duration of the normal type + s.irrtup = any ((dur != ndur) for dur in durs) # irregular tuplet + tix = 16 * s.divisions / ndur # index in typeMap of normal-type duration + if tix in s.typeMap: + s.ntype = str (s.typeMap [tix]) # the normal-type + else: s.irrtup = 0 # give up, no normal type possible + if s.irrtup: # only add normal-type for irregular tuplets + for dur, tmod in s.tupnts: # add normal-type to all modifiers + addElemT (tmod, 'normal-type', s.ntype, lev + 1) + s.tupnts = [] # reset the tuplet buffer + + def doNotations (s, n, ptup, alter, tupnotation, tstop, nt, lev): + slurs = getattr (n, 'slurs', 0) # slur ends + pts = getattr (n, 'pitches', []) # all chord notes available in the first note + if pts: # make list of pitches in chord: [(pitch, octave), ..] + if type (pts.pitch) == pObj: pts = [pts.pitch] # chord with one note + else: pts = [tuple (p.t[-2:]) for p in pts.pitch] # normal chord + for pt, (tie_alter, nts, vnum) in s.ties.items (): # scan all open ties and delete illegal ones + if vnum != s.overlayVnum: continue # tie belongs to different overlay + if pts and pt in pts: continue # pitch tuple of tie exists in chord + if getattr (n, 'chord', 0): continue # skip chord notes + if pt == ptup: continue # skip correct single note tie + if getattr (n, 'grace', 0): continue # skip grace notes + info ('tie between different pitches: %s%s converted to slur' % pt) + del s.ties [pt] # remove the note from pending ties + e = [t for t in nts.findall ('tied') if t.get ('type') == 'start'][0] # get the tie start element + e.tag = 'slur' # convert tie into slur + slurnum = len (s.slurstack) + 1 + s.slurstack.append (slurnum) + e.set ('number', str (slurnum)) + if slurs: slurs.t.append (')') # close slur on this note + else: slurs = pObj ('slurs', [')']) + tstart = getattr (n, 'tie', 0) # start a new tie + decos = s.nextdecos # decorations encountered so far + ndeco = getattr (n, 'deco', 0) # possible decorations of notes of a chord + if ndeco: # add decorations, translate used defined symbols + decos += [s.usrSyms.get (d, d).strip ('!+') for d in ndeco.t] + s.nextdecos = [] + if not (tstop or tstart or decos or slurs or s.slurbeg or tupnotation): return nt + nots = E.Element ('notations') # notation element needed + if tupnotation: # add tuplet type + tup = E.Element ('tuplet', type=tupnotation) + if tupnotation == 'start': tup.set ('bracket', 'yes') + addElem (nots, tup, lev + 1) + if tstop: # stop tie + del s.ties[ptup] # remove flag + tie = E.Element ('tied', type='stop') + addElem (nots, tie, lev + 1) + if tstart: # start a tie + s.ties[ptup] = (alter, nots, s.overlayVnum) # remember pitch tuple to stop tie and apply same alteration + tie = E.Element ('tied', type='start') + addElem (nots, tie, lev + 1) + if decos: # look for slurs and decorations + arts = [] # collect articulations + for d in decos: # do all slurs and decos + if d == '(': s.slurbeg += 1; continue # slurs made in while loop at the end + elif d == 'fermata' or d == 'H': + ntn = E.Element ('fermata', type='upright') + elif d == 'arpeggio': + ntn = E.Element ('arpeggiate', number='1') + else: arts.append (d); continue + addElem (nots, ntn, lev + 1) + if arts: # do only note articulations and collect staff annotations in xmldecos + rest = s.doArticulations (nots, arts, lev + 1) + if rest: info ('unhandled note decorations: %s' % rest) + while s.slurbeg > 0: + s.slurbeg -= 1 + slurnum = len (s.slurstack) + 1 + s.slurstack.append (slurnum) + ntn = E.Element ('slur', number='%d' % slurnum, type='start') + addElem (nots, ntn, lev + 1) + if slurs: # these are only slur endings + for d in slurs.t: + if not s.slurstack: break # no more open slurs + slurnum = s.slurstack.pop () + slur = E.Element ('slur', number='%d' % slurnum, type='stop') + addElem (nots, slur, lev + 1) + if nots.getchildren() != []: # only add notations if not empty + addElem (nt, nots, lev) + + def doArticulations (s, nots, arts, lev): + decos = [] + for a in arts: + if a in s.artMap: + art = E.Element ('articulations') + addElem (nots, art, lev) + addElem (art, E.Element (s.artMap[a]), lev + 1) + elif a in s.ornMap: + orn = E.Element ('ornaments') + addElem (nots, orn, lev) + addElem (orn, E.Element (s.ornMap[a]), lev + 1) + elif a in s.tecMap: + tec = E.Element ('technical') + addElem (nots, tec, lev) + addElem (tec, E.Element (s.tecMap[a]), lev + 1) + else: decos.append (a) # return staff annotations + return decos + + def doLyr (s, n, nt, lev): + for i, lyrobj in enumerate (n.objs): + if lyrobj.name != 'syl': continue + dash = len (lyrobj.t) == 2 + if dash: + if i in s.lyrdash: type = 'middle' + else: type = 'begin'; s.lyrdash [i] = 1 + else: + if i in s.lyrdash: type = 'end'; del s.lyrdash [i] + else: type = 'single' + lyrel = E.Element ('lyric', number = str (i + 1)) + addElem (nt, lyrel, lev) + addElemT (lyrel, 'syllabic', type, lev + 1) + addElemT (lyrel, 'text', lyrobj.t[0].replace ('~',' '), lev + 1) + + def doBeams (s, n, nt, den, lev): + if hasattr (n, 'chord') or hasattr (n, 'grace'): + s.grcbbrk = s.grcbbrk or n.bbrk.t[0] # remember if there was any bbrk in or before a grace sequence + return + bbrk = s.grcbbrk or n.bbrk.t[0] or den < 32 + s.grcbbrk = False + if not s.prevNote: pbm = None + else: pbm = s.prevNote.find ('beam') + bm = E.Element ('beam', number='1') + bm.text = 'begin' + if pbm != None: + if bbrk: + if pbm.text == 'begin': + s.prevNote.remove (pbm) + elif pbm.text == 'continue': + pbm.text = 'end' + s.prevNote = None + else: bm.text = 'continue' + if den >= 32 and n.name != 'rest': + addElem (nt, bm, lev) + s.prevNote = nt + + def stopBeams (s): + if not s.prevNote: return + pbm = s.prevNote.find ('beam') + if pbm != None: + if pbm.text == 'begin': + s.prevNote.remove (pbm) + elif pbm.text == 'continue': + pbm.text = 'end' + s.prevNote = None + + def staffDecos (s, decos, maat, lev, bardecos=0): + gstaff = s.gStaffNums.get (s.vid, 0) # staff number of the current voice + for d in decos: + d = s.usrSyms.get (d, d).strip ('!+') # try to replace user defined symbol + if d in s.dynaMap: + dynel = E.Element ('dynamics') + addDirection (maat, dynel, lev, gstaff, [E.Element (d)], 'below', s.gcue_on) + elif d in s.wedgeMap: # wedge + if ')' in d: type = 'stop' + else: type = 'crescendo' if '<' in d or 'crescendo' in d else 'diminuendo' + addDirection (maat, E.Element ('wedge', type=type), lev, gstaff) + elif d in ['coda', 'segno']: + if bardecos: s.bardecos.append (d) # postpone to begin next measure + else: + text, attr, val = s.capoMap [d] + dir = addDirection (maat, E.Element (text), lev, gstaff, placement='above') + sound = E.Element ('sound'); sound.set (attr, val) + addElem (dir, sound, lev + 1) + elif d in s.capoMap: + text, attr, val = s.capoMap [d] + words = E.Element ('words'); words.text = text + dir = addDirection (maat, words, lev, gstaff, placement='above') + sound = E.Element ('sound'); sound.set (attr, val) + addElem (dir, sound, lev + 1) + elif d == '(': s.slurbeg += 1 # start slur on next note + else: s.nextdecos.append (d) # keep annotation for the next note + + def doFields (s, maat, fieldmap, lev): + def doClef (): + clef, gtrans = 0, 0 + clefn = re.search (r'alto1|alto2|alto4|alto|tenor|bass3|bass|treble|perc|none', field) + clefm = re.search (r"(?:^m=| m=|middle=)([A-Ga-g])([,']*)", field) + trans_oct2 = re.search (r'octave=([-+]\d)', field) + trans = re.search (r'(?:^t=| t=|transpose=)(-?[\d]+)', field) + trans_oct = re.search (r'([+-^_])(8|15)', field) + cue_onoff = re.search (r'cue=(on|off)', field) + if clefn: + clef = clefn.group () + if clefm: + note, octstr = clefm.groups () + nUp = note.upper () + octnum = (4 if nUp == note else 5) + (len (octstr) if "'" in octstr else -len (octstr)) + gtrans = (3 if nUp in 'AFD' else 4) - octnum + if clef not in ['perc', 'none']: clef = s.clefLineMap [nUp] + if clef: + s.gtrans = gtrans # only change global tranposition when a clef is really defined + sign, line = s.clefMap [clef] + if not sign: return + c = E.Element ('clef') + gstaff = s.gStaffNums.get (s.vid, 0) # the current staff number + if gstaff: c.set ('number', str (gstaff)) # only add staff number when defined + addElemT (c, 'sign', sign, lev + 2) + if line: addElemT (c, 'line', line, lev + 2) + if trans_oct: + n = trans_oct.group (1) in '-_' and -1 or 1 + if trans_oct.group (2) == '15': n *= 2 # 8 => 1 octave, 15 => 2 octaves + addElemT (c, 'clef-octave-change', str (n), lev + 2) # transpose print out + if trans_oct.group (1) in '+-': s.gtrans += n # also transpose all pitches with one octave + if trans_oct2: + n = int (trans_oct2.group (1)) + s.gtrans += n + atts.append ((7, c)) + if trans != None: # add transposition in semitones + e = E.Element ('transpose') + addElemT (e, 'chromatic', str (trans.group (1)), lev + 3) + atts.append ((9, e)) + if cue_onoff: s.gcue_on = cue_onoff.group (1) == 'on' + atts = [] # collect xml attribute elements [(order-number, xml-element), ..] + for ftype, field in fieldmap.items (): + if not field: # skip empty fields + continue + if ftype == 'Div': # not an abc field, but handled as if + d = E.Element ('divisions') + d.text = field + atts.append ((1, d)) + elif ftype == 'gstaff': # make grand staff + e = E.Element ('staves') + e.text = str (field) + atts.append ((4, e)) + elif ftype == 'M': + if field == 'none': continue + if field == 'C': field = '4/4' + elif field == 'C|': field = '2/2' + t = E.Element ('time') + if '/' not in field: + info ('M:%s not recognized, 4/4 assumed' % field) + field = '4/4' + beats, btype = field.split ('/')[:2] + try: s.mdur = simplify (eval (beats), int (btype)) # measure duration for Z and X rests (eval allows M:2+3/4) + except: + info ('error in M:%s, 4/4 assumed' % field) + s.mdur = (4,4) + beats, btype = '4','4' + addElemT (t, 'beats', beats, lev + 2) + addElemT (t, 'beat-type', btype, lev + 2) + atts.append ((3, t)) + elif ftype == 'K': + accs = ['F','C','G','D','A','E','B'] # == s.sharpness [7:14] + mode = '' + key = re.match (r'\s*([A-G][#b]?)\s*([a-zA-Z]*)', field) + alts = re.search (r'\s((\s?[=^_][A-Ga-g])+)', ' ' + field) # avoid matching middle=G and m=G + if key: + key, mode = key.groups () + mode = mode.lower ()[:3] # only first three chars, no case + if mode not in s.offTab: mode = 'maj' + fifths = s.sharpness.index (key) - s.offTab [mode] + if fifths >= 0: s.keyAlts = dict (zip (accs[:fifths], fifths * ['1'])) + else: s.keyAlts = dict (zip (accs[fifths:], -fifths * ['-1'])) + elif field.startswith ('none') or field == '': # the default key + fifths = 0 + mode = 'maj' + if alts: + alts = re.findall (r'[=^_][A-Ga-g]', alts.group(1)) # list of explicit alterations + alts = [(x[1], s.alterTab [x[0]]) for x in alts] # [step, alter] + for step, alter in alts: # correct permanent alterations for this key + s.keyAlts [step.upper ()] = alter + k = E.Element ('key') + koctave = [] + lowerCaseSteps = [step.upper () for step, alter in alts if step.islower ()] + for step, alter in s.keyAlts.items (): + if alter == '0': # skip neutrals + del s.keyAlts [step.upper ()] # otherwise you get neutral signs on normal notes + continue + addElemT (k, 'key-step', step.upper (), lev + 2) + addElemT (k, 'key-alter', alter, lev + 2) + koctave.append ('5' if step in lowerCaseSteps else '4') + if koctave: # only key signature if not empty + for oct in koctave: + e = E.Element ('key-octave', number=oct) + addElem (k, e, lev + 2) + atts.append ((2, k)) + elif mode: + k = E.Element ('key') + addElemT (k, 'fifths', str (fifths), lev + 2) + addElemT (k, 'mode', s.modTab [mode], lev + 2) + atts.append ((2, k)) + doClef () + elif ftype == 'L': + s.unitLcur = map (int, field.split ('/')) + if len (s.unitLcur) == 1 or s.unitLcur[1] not in s.typeMap: + info ('L:%s is not allowed, 1/8 assumed' % field) + s.unitLcur = 1,8 + elif ftype == 'V': + doClef () + elif ftype == 'I': + xs = s.doField_I (ftype, field) + if xs and len (xs) == 1: # when "%%MIDI transpose" matched insert 'attribute/transpose/chromatic' + e = E.Element ('transpose') + addElemT (e, 'chromatic', xs[0], lev + 2) # xs[0] == signed number string given after transpose + atts.append ((9, e)) + if xs and len (xs) == 2: # repeated occurrence of [I:MIDI] -> instrument change + midchan, midprog = xs + snd = E.Element ('sound') + mi = E.Element ('midi-instrument', id='I-' + s.vid) + addElem (maat, snd, lev) + addElem (snd, mi, lev + 1) + if midchan: addElemT (mi, 'midi-channel', midchan, lev + 2) + if midprog: addElemT (mi, 'midi-program', str (int (midprog) + 1), lev + 2) # compatible with abc2midi + + elif ftype == 'Q': + s.doTempo (maat, field, lev) + elif ftype in 'TCOAZNGHRBDFSU': + info ('**illegal header field in body: %s, content: %s' % (ftype, field)) + else: + info ('unhandled field: %s, content: %s' % (ftype, field)) + + if atts: + att = E.Element ('attributes') # insert sub elements in the order required by musicXML + addElem (maat, att, lev) + for _, att_elem in sorted (atts): # ordering ! + addElem (att, att_elem, lev + 1) + + def doTempo (s, maat, field, lev): + gstaff = s.gStaffNums.get (s.vid, 0) # staff number of the current voice + t = re.search (r'(\d)/(\d\d?)\s*=\s*([.\d]+)|([.\d]+)', field) + if not t: return + try: + if t.group (4): + num, den, upm = 1, s.unitLcur[1] , float (t.group (4)) + else: + num, den, upm = int (t.group (1)), int (t.group (2)), float (t.group (3)) + except: return # float or int conversion failure + if num != 1: info ('in Q: numerator > 1 in %d/%d not supported' % (num, den)) + qpm = 4. * num * upm / den + metro = E.Element ('metronome') + u = E.Element ('beat-unit'); u.text = s.typeMap [4 * den] + pm = E.Element ('per-minute'); pm.text = '%.2f' % upm + dir = addDirection (maat, metro, lev, gstaff, [u, pm], placement='above') + sound = E.Element ('sound'); sound.set ('tempo', '%.2f' % qpm) + addElem (dir, sound, lev + 1) + + def mkBarline (s, maat, loc, lev, style='', dir='', ending=''): + b = E.Element ('barline', location=loc) + if style: + addElemT (b, 'bar-style', style, lev + 1) + if s.curVolta: # first stop a current volta + end = E.Element ('ending', number=s.curVolta, type='stop') + s.curVolta = '' + if loc == 'left': # stop should always go to a right barline + bp = E.Element ('barline', location='right') + addElem (bp, end, lev + 1) + addElem (s.prevmsre, bp, lev) # prevmsre has no right barline! (ending would have stopped there) + else: + addElem (b, end, lev + 1) + if ending: + ending = ending.replace ('-',',') # MusicXML only accepts comma's + endtxt = '' + if ending.startswith ('"'): # ending is a quoted string + endtxt = ending.strip ('"') + ending = '33' # any number that is not likely to occur elsewhere + end = E.Element ('ending', number=ending, type='start') + if endtxt: end.text = endtxt # text appears in score in stead of number attribute + addElem (b, end, lev + 1) + s.curVolta = ending + if dir: + r = E.Element ('repeat', direction=dir) + addElem (b, r, lev + 1) + addElem (maat, b, lev) + + def doChordSym (s, maat, sym, lev): + alterMap = {'#':'1','=':'0','b':'-1'} + rnt = sym.root.t + chord = E.Element ('harmony') + addElem (maat, chord, lev) + root = E.Element ('root') + addElem (chord, root, lev + 1) + addElemT (root, 'root-step', rnt[0], lev + 2) + if len (rnt) == 2: addElemT (root, 'root-alter', alterMap [rnt[1]], lev + 2) + kind = s.chordTab.get (sym.kind.t[0], 'major') + addElemT (chord, 'kind', kind, lev + 1) + degs = getattr (sym, 'degree', '') + if degs: + if type (degs) != types.ListType: degs = [degs] + for deg in degs: + deg = deg.t[0] + if deg[0] == '#': alter = '1'; deg = deg[1:] + elif deg[0] == 'b': alter = '-1'; deg = deg[1:] + else: alter = '0'; deg = deg + degree = E.Element ('degree') + addElem (chord, degree, lev + 1) + addElemT (degree, 'degree-value', deg, lev + 2) + addElemT (degree, 'degree-alter', alter, lev + 2) + addElemT (degree, 'degree-type', 'add', lev + 2) + + def mkMeasure (s, i, t, lev, fieldmap={}): + s.msreAlts = {} + s.ntup = -1 + s.acciatura = 0 # next grace element gets acciatura attribute + overlay = 0 + maat = E.Element ('measure', number = str(i)) + if fieldmap: s.doFields (maat, fieldmap, lev + 1) + if s.linebrk: # there was a line break in the previous measure + e = E.Element ('print') + e.set ('new-system', 'yes') + addElem (maat, e, lev + 1) + s.linebrk = 0 + if s.bardecos: # output coda and segno attached to the previous right barline + s.staffDecos (s.bardecos, maat, lev + 1) + s.bardecos = [] + for it, x in enumerate (t): + if x.name == 'note' or x.name == 'rest': + note = s.mkNote (x, lev + 1) + addElem (maat, note, lev + 1) + elif x.name == 'lbar': + bar = x.t[0] + if bar == '|': pass # skip redundant bar + elif ':' in bar: # forward repeat + volta = x.t[1] if len (x.t) == 2 else '' + s.mkBarline (maat, 'left', lev + 1, style='heavy-light', dir='forward', ending=volta) + else: # bar must be a volta number + s.mkBarline (maat, 'left', lev + 1, ending=bar) + elif x.name == 'rbar': + if hasattr (x, 'deco'): # MuseScore does not support this -> emergency solution + s.staffDecos (x.deco.t, maat, lev + 1, bardecos=1) # coda, segno -> next measure + bar = x.t[0] + if bar == '.|': + s.mkBarline (maat, 'right', lev + 1, style='dotted') + elif ':' in bar: # backward repeat + s.mkBarline (maat, 'right', lev + 1, style='light-heavy', dir='backward') + elif bar == '||': + s.mkBarline (maat, 'right', lev + 1, style='light-light') + elif bar == '[|]' or bar == '[]': + s.mkBarline (maat, 'right', lev + 1, style='none') + elif '[' in bar or ']' in bar: + s.mkBarline (maat, 'right', lev + 1, style='light-heavy') + elif bar[0] == '&': overlay = 1 + elif x.name == 'tup': + if len (x.t) == 3: n, into, nts = x.t + else: n, into, nts = x.t[0], 0, 0 + if into == 0: into = 3 if n in [2,4,8] else 2 + if nts == 0: nts = n + s.tmnum, s.tmden, s.ntup = n, into, nts + elif x.name == 'deco': + s.staffDecos (x.t, maat, lev + 1) # output staff decos, postpone note decos to next note + elif x.name == 'text': + pos, text = x.t[:2] + place = 'above' if pos == '^' else 'below' + words = E.Element ('words') + words.text = text + gstaff = s.gStaffNums.get (s.vid, 0) # staff number of the current voice + addDirection (maat, words, lev + 1, gstaff, placement=place) + elif x.name == 'inline': + fieldtype, fieldval = x.t[:2] + s.doFields (maat, {fieldtype:fieldval}, lev + 1) + elif x.name == 'accia': s.acciatura = 1 + elif x.name == 'linebrk': + s.supports_tag = 1 + if it > 0 and t[it -1].name == 'lbar': # we are at start of measure + e = E.Element ('print') # output linebreak now + e.set ('new-system', 'yes') + addElem (maat, e, lev + 1) + else: + s.linebrk = 1 # output linebreak at start of next measure + elif x.name == 'chordsym': + s.doChordSym (maat, x, lev + 1) + s.stopBeams () + s.prevmsre = maat + return maat, overlay + + def mkPart (s, maten, id, lev, attrs, nstaves): + s.slurstack = [] + s.unitLcur = s.unitL # set the default unit length at begin of each voice + s.curVolta = '' + s.lyrdash = {} + s.linebrk = 0 + s.midprg = ['', ''] # MIDI channel nr, program nr for the current part + s.gcue_on = 0 # reset cue note marker for each new voice + s.gtrans = 0 # reset octave transposition (by clef) + part = E.Element ('part', id=id) + s.overlayVnum = 0 # overlay voice number to relate ties that extend from one overlayed measure to the next + gstaff = s.gStaffNums.get (s.vid, 0) # staff number of the current voice + attrs_cpy = attrs.copy () # don't change attrs itself in next line + if gstaff == 1: attrs_cpy ['gstaff'] = nstaves # make a grand staff + msre, overlay = s.mkMeasure (1, maten[0], lev + 1, attrs_cpy) + addElem (part, msre, lev + 1) + for i, maat in enumerate (maten[1:]): + s.overlayVnum = s.overlayVnum + 1 if overlay else 0 + msre, next_overlay = s.mkMeasure (i+2, maat, lev + 1) + if overlay: mergePartMeasure (part, msre, s.overlayVnum) + else: addElem (part, msre, lev + 1) + overlay = next_overlay + return part + + def mkScorePart (s, id, vids_p, partAttr, lev): + naam, subnm, midprg = partAttr [id] + sp = E.Element ('score-part', id='P'+id) + nm = E.Element ('part-name') + nm.text = naam + addElem (sp, nm, lev + 1) + snm = E.Element ('part-abbreviation') + snm.text = subnm + if subnm: addElem (sp, snm, lev + 1) # only add if subname was given + if s.staves: instr_vids = [vids for vids in s.staves if vids[0] == id][0] + else: instr_vids = [id] + inst = [] + for id in instr_vids: + if id not in partAttr: continue # error in %%score -> instr_vids may have non existing id's + naam, subnm, midprg = partAttr [id] + midchan, midprog = midprg + if not midchan and not midprog: continue # only add if program nr or channel was given + si = E.Element ('score-instrument', id='I-'+id) + addElemT (si, 'instrument-name', naam, lev + 2) + mi = E.Element ('midi-instrument', id='I-'+id) + if midchan: addElemT (mi, 'midi-channel', midchan, lev + 2) + if midprog: addElemT (mi, 'midi-program', str (int (midprog) + 1), lev + 2) # compatible with abc2midi + inst.append ((si, mi)) + for si, mi in inst: addElem (sp, si, lev + 1) + for si, mi in inst: addElem (sp, mi, lev + 1) + return sp, len (inst) + + def mkPartlist (s, vids, partAttr, lev): + def addPartGroup (sym, num): + pg = E.Element ('part-group', number=str (num), type='start') + addElem (partlist, pg, lev + 1) + addElemT (pg, 'group-symbol', sym, lev + 2) + addElemT (pg, 'group-barline', 'yes', lev + 2) + partlist = E.Element ('part-list') + g_num = 0 # xml group number + nInstrs = [] # number of instruments in each part + for g in (s.groups or vids): # brace/bracket or abc_voice_id + if g == '[': g_num += 1; addPartGroup ('bracket', g_num) + elif g == '{': g_num += 1; addPartGroup ('brace', g_num) + elif g in '}]': + pg = E.Element ('part-group', number=str (g_num), type='stop') + addElem (partlist, pg, lev + 1) + g_num -= 1 + else: # g = abc_voice_id + if g not in vids: continue # error in %%score + sp, nInst = s.mkScorePart (g, vids, partAttr, lev + 1) + addElem (partlist, sp, lev + 1) + nInstrs.append (nInst) + return partlist, nInstrs + + def doField_I (s, type, x): + def readPfmt (x, n): # read ABC page formatting constant + if not s.pageFmtAbc: s.pageFmtAbc = s.pageFmtDef # set the default values on first change + ro = re.search (r'[^.\d]*([\d.]+)\s*(cm|in|pt)?', x) # float followed by unit + if ro: + x, unit = ro.groups () # unit == None when not present + u = {'cm':10., 'in':25.4, 'pt':25.4/72} [unit] if unit else 1. + s.pageFmtAbc [n] = float (x) * u # convert ABC values to millimeters + else: info ('error in page format: %s' % x) + + if x.startswith ('score') or x.startswith ('staves'): + s.staveDefs += [x] # collect all voice mappings + elif x.startswith ('staffwidth'): info ('skipped I-field: %s' % x) + elif x.startswith ('staff'): # set new staff number of the current voice + r1 = re.search (r'staff *([+-]?)(\d)', x) + if r1: + sign = r1.group (1) + num = int (r1.group (2)) + gstaff = s.gStaffNums.get (s.vid, 0) # staff number of the current voice + if sign: # relative staff number + num = (sign == '-') and gstaff - num or gstaff + num + else: # absolute abc staff number + try: vabc = s.staves [num - 1][0] # vid of (first voice of) abc-staff num + except: vabc = 0; info ('abc staff %s does not exist' % num) + num = s.gStaffNumsOrg.get (vabc, 0) # xml staff number of abc-staff num + if gstaff and num > 0 and num <= s.gNstaves [s.vid]: + s.gStaffNums [s.vid] = num + else: info ('could not relocate to staff: %s' % r1.group ()) + else: info ('not a valid staff redirection: %s' % x) + elif x.startswith ('scale'): readPfmt (x, 0) + elif x.startswith ('pageheight'): readPfmt (x, 1) + elif x.startswith ('pagewidth'): readPfmt (x, 2) + elif x.startswith ('leftmargin'): readPfmt (x, 3) + elif x.startswith ('rightmargin'): readPfmt (x, 4) + elif x.startswith ('topmargin'): readPfmt (x, 5) + elif x.startswith ('botmargin'): readPfmt (x, 6) + elif x.startswith ('MIDI'): + r1 = re.search (r'program *(\d*) +(\d+)', x) + r2 = re.search (r'channel\D*(\d+)', x) + if r1: ch, prg = r1.groups () # channel nr or '', program nr + if r2: ch, prg = r2.group (1), '' # channel nr only + if r1 or r2: + if s.midprg[1] == '': # no instrument defined yet + s.midprg[1] = prg + if ch: s.midprg[0] = ch + elif ch and s.midprg[0] == '': # no channel defined yet + s.midprg[0] = ch + else: # repeated midi def -> insert instument change + return [ch, prg] + r = re.search (r'transpose[^-\d]*(-?\d+)', x) + if r: return [r.group (1)] + else: info ('skipped I-field: %s' % x) + + def parseStaveDef (s, vdefs): + if not s.staveDefs: return vdefs + for x in s.staveDefs [1:]: info ('%%%%%s dropped, multiple stave mappings not supported' % x) + x = s.staveDefs [0] # only the first %%score is honoured + score = abc_scoredef.parseString (x) [0] + f = lambda x: type (x) == types.UnicodeType and [x] or x + s.staves = map (f, mkStaves (score, vdefs)) + s.grands = map (f, mkGrand (score, vdefs)) + s.groups = mkGroups (score) + vce_groups = [vids for vids in s.staves if len (vids) > 1] # all voice groups + d = {} # for each voice group: map first voice id -> all merged voice ids + for vgr in vce_groups: d [vgr[0]] = vgr + for gstaff in s.grands: # for all grand staves + if len (gstaff) == 1: continue # skip single parts + for v, stf_num in zip (gstaff, range (1, len (gstaff) + 1)): + for vx in d.get (v, [v]): # allocate staff numbers + s.gStaffNums [vx] = stf_num # to all constituant voices + s.gNstaves [vx] = len (gstaff) # also remember total number of staves + s.gStaffNumsOrg = s.gStaffNums.copy () # keep original allocation for abc -> xml staff map + return vdefs + + def voiceNamesAndMaps (s, ps): # get voice names and mappings + vdefs = {} + for vid, vcedef, vce in ps: # vcedef == emtpy of first pObj == voice definition + pname, psubnm = '', '' # part name and abbreviation + if not vcedef: # simple abc without voice definitions + vdefs [vid] = pname, psubnm, '' + else: # abc with voice definitions + if vid != vcedef.t[1]: info ('voice ids unequal: %s (reg-ex) != %s (grammar)' % (vid, vcedef.t[1])) + rn = re.search (r'(?:name|nm)="([^"]*)"', vcedef.t[2]) + if rn: pname = rn.group (1) + rn = re.search (r'(?:subname|snm|sname)="([^"]*)"', vcedef.t[2]) + if rn: psubnm = rn.group (1) + vdefs [vid] = pname, psubnm, vcedef.t[2] + xs = [pObj.t[1] for maat in vce for pObj in maat if pObj.name == 'inline'] # all inline statements in vce + s.staveDefs += [x for x in xs if x.startswith ('score') or x.startswith ('staves')] # filter %%score and %%staves + return vdefs + + def doHeaderField (s, fld, attrmap): + type, value = fld.t[:2] + if not value: # skip empty field + return + if type == 'M': + attrmap [type] = value + elif type == 'L': + try: s.unitL = map (int, fld.t[1].split ('/')) + except: + info ('illegal unit length:%s, 1/8 assumed' % fld.t[1]) + s.unitL = 1,8 + if len (s.unitL) == 1 or s.unitL[1] not in s.typeMap: + info ('L:%s is not allowed, 1/8 assumed' % fld.t[1]) + s.unitL = 1,8 + elif type == 'K': + attrmap[type] = value + elif type == 'T': + if s.title: s.title = s.title + '\n' + value + else: s.title = value + elif type == 'C': + s.creator ['composer'] = s.creator.get ('composer', '') + value + elif type == 'Z': + s.creator ['lyricist'] = s.creator.get ('lyricist', '') + value + elif type == 'U': + sym = fld.t[2].strip ('!+') + s.usrSyms [value] = sym + elif type == 'I': + s.doField_I (type, value) + elif type == 'Q': + attrmap[type] = value + elif type in s.creditTab: s.credits [s.creditTab [type]] = value + else: + info ('skipped header: %s' % fld) + + def mkIdentification (s, score, lev): + if s.title: + addElemT (score, 'movement-title', s.title, lev + 1) + ident = E.Element ('identification') + addElem (score, ident, lev + 1) + if s.creator: + for ctype, cname in s.creator.items (): + c = E.Element ('creator', type=ctype) + c.text = cname + addElem (ident, c, lev + 2) + encoding = E.Element ('encoding') + addElem (ident, encoding, lev + 2) + encoder = E.Element ('encoder') + encoder.text = 'abc2xml version %d' % VERSION + addElem (encoding, encoder, lev + 3) + if s.supports_tag: # avoids interference of auto-flowing and explicit linebreaks + suports = E.Element ('supports', attribute="new-system", element="print", type="yes", value="yes") + addElem (encoding, suports, lev + 3) + encodingDate = E.Element ('encoding-date') + encodingDate.text = str (datetime.date.today ()) + addElem (encoding, encodingDate, lev + 3) + + def mkDefaults (s, score, lev): + if s.pageFmtCmd: s.pageFmtAbc = s.pageFmtCmd + if not s.pageFmtAbc: return # do not output the defaults if none is desired + space, h, w, l, r, t, b = s.pageFmtAbc + mils = 4 * space # staff height in millimeters + scale = 40. / mils # tenth's per millimeter + dflts = E.Element ('defaults') + addElem (score, dflts, lev) + scaling = E.Element ('scaling') + addElem (dflts, scaling, lev + 1) + addElemT (scaling, 'millimeters', '%g' % mils, lev + 2) + addElemT (scaling, 'tenths', '40', lev + 2) + layout = E.Element ('page-layout') + addElem (dflts, layout, lev + 1) + addElemT (layout, 'page-height', '%g' % (h * scale), lev + 2) + addElemT (layout, 'page-width', '%g' % (w * scale), lev + 2) + margins = E.Element ('page-margins', type='both') + addElem (layout, margins, lev + 2) + addElemT (margins, 'left-margin', '%g' % (l * scale), lev + 3) + addElemT (margins, 'right-margin', '%g' % (r * scale), lev + 3) + addElemT (margins, 'top-margin', '%g' % (t * scale), lev + 3) + addElemT (margins, 'bottom-margin', '%g' % (b * scale), lev + 3) + + def mkCredits (s, score, lev): + if not s.credits: return + for ctype, ctext in s.credits.items (): + credit = E.Element ('credit', page='1') + addElemT (credit, 'credit-type', ctype, lev + 2) + addElemT (credit, 'credit-words', ctext, lev + 2) + addElem (score, credit, lev) + + def parse (s, abc_string): + abctext = abc_string if type (abc_string) == types.UnicodeType else decodeInput (abc_string) + abctext = abctext.replace ('[I:staff ','[I:staff') # avoid false beam breaks + s.reset () + header, voices = splitHeaderVoices (abctext) + ps = [] + try: + hs = abc_header.parseString (header) if header else '' + for id, vce_lyr in voices: # vce_lyr = [voice-block] where voice-block = (measures, corresponding lyric lines) + vcelyr = [] # list of measures where measure = list of elements (see syntax) + prevLeftBar = None # previous voice ended with a left-bar symbol (double repeat) + for voice, lyr in vce_lyr: + vce = abc_voice.parseString (voice).asList () + if not vce: # empty voice, insert an inline field that will be rejected + vce = [[pObj ('inline', ['I', 'empty voice'])]] + if prevLeftBar: + vce[0].insert (0, prevLeftBar) # insert at begin of first measure + prevLeftBar = None + if vce[-1] and vce[-1][-1].name == 'lbar': # last measure ends with an lbar + prevLeftBar = vce[-1][-1] + if len (vce) > 1: # vce should not become empty (-> exception when taking vcelyr [0][0]) + del vce[-1] # lbar was the only element in measure vce[-1] + lyr = lyr.strip () # strip leading \n (because we split on '\nw:...') + if lyr: # no lyrics for this measures-lyrics block + lyr = lyr_block.parseString (lyr).asList () + xs = alignLyr (vce, lyr) # put all syllables into corresponding notes + else: xs = vce + vcelyr += xs + elem1 = vcelyr [0][0] # the first element of the first measure + if elem1.name == 'inline'and elem1.t[0] == 'V': # is a voice definition + voicedef = elem1 + del vcelyr [0][0] # do not read voicedef twice + else: + voicedef = '' + ps.append ((id, voicedef, vcelyr)) + except ParseException, err: + if err.loc > 40: # limit length of error message, compatible with markInputline + err.pstr = err.pstr [err.loc - 40: err.loc + 40] + err.loc = 40 + xs = err.line[err.col-1:] + try: info (err.line.encode ('utf-8'), warn=0) # err.line is a unicode string!! + except: info (err.line.encode ('latin-1'), warn=0) + info ((err.col-1) * '-' + '^', warn=0) + if re.search (r'\[U:[XYZxyz]', xs): + info ('Error: illegal user defined symbol: %s' % xs[1:], warn=0) + elif re.search (r'\[[OAPZNGHRBDFSXTCIU]:', xs): + info ('Error: header-only field %s appears after K:' % xs[1:], warn=0) + else: + info ('Syntax error at column %d' % err.col, warn=0) + raise err + + s.unitL = (1, 8) + s.title = '' + s.creator = {} # {creator type -> name string} + s.credits = {} # {credit type -> string} + score = E.Element ('score-partwise') + attrmap = {'Div': str (s.divisions), 'K':'C treble', 'M':'4/4'} + for res in hs: + if res.name == 'field': + s.doHeaderField (res, attrmap) + else: + info ('unexpected header item: %s' % res) + + vdefs = s.voiceNamesAndMaps (ps) + vdefs = s.parseStaveDef (vdefs) + + lev = 0 + vids, parts, partAttr = [], [], {} + for vid, _, vce in ps: # voice id, voice parse tree + pname, psubnm, voicedef = vdefs [vid] # part name + attrmap ['V'] = voicedef # abc text of first voice definition (after V:vid) or empty + pid = 'P%s' % vid # let part id start with an alpha + s.vid = vid # avoid parameter passing, needed in mkNote for instrument id + part = s.mkPart (vce, pid, lev + 1, attrmap, s.gNstaves.get (vid, 0)) + if 'Q' in attrmap: del attrmap ['Q'] # header tempo only in first part + parts.append (part) + vids.append (vid) + partAttr [vid] = (pname, psubnm, s.midprg) + parts, vidsnew = mergeParts (parts, vids, s.staves) # merge parts into staves as indicated by %%score + parts, _ = mergeParts (parts, vidsnew, s.grands, 1) # merge grand staves + + s.mkIdentification (score, lev) + s.mkDefaults (score, lev + 1) + s.mkCredits (score, lev) + + partlist, nInstrs = s.mkPartlist (vids, partAttr, lev + 1) + addElem (score, partlist, lev + 1) + for ip, part in enumerate (parts): + if nInstrs [ip] < 2: # no instrument tag needed for one- or no-instrument parts + removeElems (part, 'measure/note', 'instrument') + addElem (score, part, lev + 1) + + return score + +def decodeInput (data_string): + try: enc = 'utf-8'; unicode_string = data_string.decode (enc) + except: + try: enc = 'latin-1'; unicode_string = data_string.decode (enc) + except: raise Exception ('data not encoded in utf-8 nor in latin-1') + info ('decoded from %s' % enc) + return unicode_string + +xmlVersion = "" +def fixDoctype (elem, enc): + xs = E.tostring (elem, encoding=enc) + ys = xs.split ('\n') + if enc == 'utf-8': ys.insert (0, xmlVersion) # crooked logic of ElementTree lib + ys.insert (1, '') + return '\n'.join (ys) + +def xml2mxl (pad, fnm, data): # write xml data to compressed .mxl file + from zipfile import ZipFile, ZIP_DEFLATED + fnmext = fnm + '.xml' # file name with extension, relative to the root within the archive + outfile = os.path.join (pad, fnm + '.mxl') + meta = '%s\n\n' % xmlVersion + meta += '\n' % fnmext + meta += '' + f = ZipFile (outfile, 'w', ZIP_DEFLATED) + f.writestr ('META-INF/container.xml', meta) + f.writestr (fnmext, data) + f.close () + info ('%s written' % outfile, warn=0) + +def convert (pad, fnm, abc_string, mxl): + # these globals should be initialised (as in the __main__ secion) before calling convert + global mxm # optimisation 1: keep instance of MusicXml + global abc_header, abc_voice, lyr_block, abc_scoredef # optimisation 2: keep computed grammars + score = mxm.parse (abc_string) + if pad: + data = fixDoctype (score, 'utf-8') + if not mxl or mxl in ['a', 'add']: + outfnm = os.path.join (pad, fnm + '.xml') + outfile = file (outfnm, 'wb') + outfile.write (data) + outfile.close () + info ('%s written' % outfnm, warn=0) + if mxl: xml2mxl (pad, fnm, data) # also write a compressed version + else: + outfile = sys.stdout + outfile.write (fixDoctype (score, 'utf-8')) + outfile.write ('\n') + +#---------------- +# Main Program +#---------------- +if __name__ == '__main__': + from optparse import OptionParser + from glob import glob + import time + global mxm # keep instance of MusicXml + global abc_header, abc_voice, lyr_block, abc_scoredef # keep computed grammars + mxm = MusicXml () + + parser = OptionParser (usage='%prog [-h] [-r] [-m SKIP NUM] [-o DIR] [-p PFMT] [-z MODE] [ ...]', version='version %d' % VERSION) + parser.add_option ("-o", action="store", help="store xml files in DIR", default='', metavar='DIR') + parser.add_option ("-m", action="store", help="skip SKIP tunes, then read at most NUM tunes", nargs=2, type='int', default=(0,1), metavar='SKIP NUM') + parser.add_option ("-p", action="store", help="page formatting in PFMT", default='', metavar='PFMT') + parser.add_option ("-z", "--mxl", dest="mxl", help="store as compressed mxl, MODE = a(dd) or r(eplace)", default='', metavar='MODE') + parser.add_option ("-r", action="store_true", help="show whole measure rests in merged staffs", default=False) + options, args = parser.parse_args () + if len (args) == 0: parser.error ('no input file given') + pad = options.o + if options.mxl and options.mxl not in ['a','add', 'r', 'replace']: + parser.error ('MODE should be a(dd) or r(eplace), not: %s' % options.mxl) + if pad: + if not os.path.exists (pad): os.mkdir (pad) + if not os.path.isdir (pad): parser.error ('%s is not a directory' % pad) + if options.p: # set page formatting values + try: # space, page-height, -width, margin-left, -right, -top, -bottom + mxm.pageFmtCmd = map (float, options.p.split (',')) + if len (mxm.pageFmtCmd) != 7: raise Exception ('-p needs 7 values') + except Exception, err: parser.error (err) + mxm.gmwr = options.r # ugly: needs to be globally accessable + + abc_header, abc_voice, lyr_block, abc_scoredef = abc_grammar () # compute grammar only once per file set + fnmext_list = [] + for i in args: fnmext_list += glob (i) + if not fnmext_list: parser.error ('none of the input files exist') + t_start = time.time () + for X, fnmext in enumerate (fnmext_list): + fnm, ext = os.path.splitext (fnmext) + if ext.lower () not in ('.abc'): + info ('skipped input file %s, it should have extension .abc' % fnmext) + continue + if os.path.isdir (fnmext): + info ('skipped directory %s. Only files are accepted' % fnmext) + continue + + fobj = open (fnmext, 'rb') + encoded_data = fobj.read () + fobj.close () + fragments = encoded_data.split ('X:') + preamble = fragments [0] # tunes can be preceeded by formatting instructions + tunes = fragments[1:] + if not tunes and preamble: tunes, preamble = ['1\n' + preamble], '' # tune without X: + skip, num = options.m # skip tunes, then read at most num tunes + numtunes = min ([len (tunes), num]) # number of tunes to be converted + for itune, tune in enumerate (tunes): + if itune < skip: continue + if itune >= skip + num: break + tune = preamble + 'X:' + tune # restore preamble before each tune + fnmNum = '%s%02d' % (fnm, itune + 1) if numtunes > 1 else fnm + try: # convert string abctext -> file pad/fnmNum.xml + convert (pad, fnmNum, tune, options.mxl) + except ParseException, err: pass # output already printed + except Exception, err: info ('an exception occurred.\n%s' % err) + info ('done in %.2f secs' % (time.time () - t_start)) diff --git a/abc2xml/pyparsing.py b/abc2xml/pyparsing.py new file mode 100644 index 0000000..9be97dc --- /dev/null +++ b/abc2xml/pyparsing.py @@ -0,0 +1,3749 @@ +# module pyparsing.py +# +# Copyright (c) 2003-2011 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +#from __future__ import generators + +__doc__ = \ +""" +pyparsing module - Classes and methods to define and execute parsing grammars + +The pyparsing module is an alternative approach to creating and executing simple grammars, +vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you +don't need to learn a new syntax for defining grammars or matching expressions - the parsing module +provides a library of classes that you use to construct the grammar directly in Python. + +Here is a program to parse "Hello, World!" (or any greeting of the form C{", !"}):: + + from pyparsing import Word, alphas + + # define grammar of a greeting + greet = Word( alphas ) + "," + Word( alphas ) + "!" + + hello = "Hello, World!" + print hello, "->", greet.parseString( hello ) + +The program outputs the following:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +The Python representation of the grammar is quite readable, owing to the self-explanatory +class names, and the use of '+', '|' and '^' operators. + +The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an +object with named attributes. + +The pyparsing module handles some of the problems that are typically vexing when writing text parsers: + - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) + - quoted strings + - embedded comments +""" + +__version__ = "1.5.6" +__versionTime__ = "26 June 2011 10:53" +__author__ = "Paul McGuire " + +import string +from weakref import ref as wkref +import copy +import sys +import warnings +import re +import sre_constants +#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) + +__all__ = [ +'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', +'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', +'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', +'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', +'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', +'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', +'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', +'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', +'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', +'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', +'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', +'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', +'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', +'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', +'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', +'indentedBlock', 'originalTextFor', +] + +""" +Detect if we are running version 3.X and make appropriate changes +Robert A. Clark +""" +_PY3K = sys.version_info[0] > 2 +if _PY3K: + _MAX_INT = sys.maxsize + basestring = str + unichr = chr + _ustr = str + alphas = string.ascii_lowercase + string.ascii_uppercase +else: + _MAX_INT = sys.maxint + range = xrange + set = lambda s : dict( [(c,0) for c in s] ) + alphas = string.lowercase + string.uppercase + + def _ustr(obj): + """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. + """ + if isinstance(obj,unicode): + return obj + + try: + # If this works, then _ustr(obj) has the same behaviour as str(obj), so + # it won't break any existing code. + return str(obj) + + except UnicodeEncodeError: + # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) + # state that "The return value must be a string object". However, does a + # unicode object (being a subclass of basestring) count as a "string + # object"? + # If so, then return a unicode object: + return unicode(obj) + # Else encode it... but how? There are many choices... :) + # Replace unprintables with escape codes? + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') + # Replace unprintables with question marks? + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') + # ... + + alphas = string.lowercase + string.uppercase + +# build list of single arg builtins, tolerant of Python version, that can be used as parse actions +singleArgBuiltins = [] +import __builtin__ +for fname in "sum len enumerate sorted reversed list tuple set any all".split(): + try: + singleArgBuiltins.append(getattr(__builtin__,fname)) + except AttributeError: + continue + +def _xml_escape(data): + """Escape &, <, >, ", ', etc. in a string of data.""" + + # ampersand must be replaced first + from_symbols = '&><"\'' + to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] + for from_,to_ in zip(from_symbols, to_symbols): + data = data.replace(from_, to_) + return data + +class _Constants(object): + pass + +nums = string.digits +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +_bslash = chr(92) +printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) + +class ParseBaseException(Exception): + """base exception class for all parsing runtime exceptions""" + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, pstr, loc=0, msg=None, elem=None ): + self.loc = loc + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr + self.parserElement = elem + + def __getattr__( self, aname ): + """supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + if( aname == "lineno" ): + return lineno( self.loc, self.pstr ) + elif( aname in ("col", "column") ): + return col( self.loc, self.pstr ) + elif( aname == "line" ): + return line( self.loc, self.pstr ) + else: + raise AttributeError(aname) + + def __str__( self ): + return "%s (at char %d), (line:%d, col:%d)" % \ + ( self.msg, self.loc, self.lineno, self.column ) + def __repr__( self ): + return _ustr(self) + def markInputline( self, markerString = ">!<" ): + """Extracts the exception line from the input string, and marks + the location of the exception with a special symbol. + """ + line_str = self.line + line_column = self.column - 1 + if markerString: + line_str = "".join( [line_str[:line_column], + markerString, line_str[line_column:]]) + return line_str.strip() + def __dir__(self): + return "loc msg pstr parserElement lineno col line " \ + "markInputLine __str__ __repr__".split() + +class ParseException(ParseBaseException): + """exception thrown when parse expressions don't match class; + supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + pass + +class ParseFatalException(ParseBaseException): + """user-throwable exception thrown when inconsistent parse content + is found; stops all parsing immediately""" + pass + +class ParseSyntaxException(ParseFatalException): + """just like C{ParseFatalException}, but thrown internally when an + C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because + an unbacktrackable syntax error has been found""" + def __init__(self, pe): + super(ParseSyntaxException, self).__init__( + pe.pstr, pe.loc, pe.msg, pe.parserElement) + +#~ class ReparseException(ParseBaseException): + #~ """Experimental class - parse actions can raise this exception to cause + #~ pyparsing to reparse the input string: + #~ - with a modified input string, and/or + #~ - with a modified start location + #~ Set the values of the ReparseException in the constructor, and raise the + #~ exception in a parse action to cause pyparsing to use the new string/location. + #~ Setting the values as None causes no change to be made. + #~ """ + #~ def __init_( self, newstring, restartLoc ): + #~ self.newParseText = newstring + #~ self.reparseLoc = restartLoc + +class RecursiveGrammarException(Exception): + """exception thrown by C{validate()} if the grammar could be improperly recursive""" + def __init__( self, parseElementList ): + self.parseElementTrace = parseElementList + + def __str__( self ): + return "RecursiveGrammarException: %s" % self.parseElementTrace + +class _ParseResultsWithOffset(object): + def __init__(self,p1,p2): + self.tup = (p1,p2) + def __getitem__(self,i): + return self.tup[i] + def __repr__(self): + return repr(self.tup) + def setOffset(self,i): + self.tup = (self.tup[0],i) + +class ParseResults(object): + """Structured parse results, to provide multiple means of access to the parsed data: + - as a list (C{len(results)}) + - by list index (C{results[0], results[1]}, etc.) + - by attribute (C{results.}) + """ + #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) + def __new__(cls, toklist, name=None, asList=True, modal=True ): + if isinstance(toklist, cls): + return toklist + retobj = object.__new__(cls) + retobj.__doinit = True + return retobj + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): + if self.__doinit: + self.__doinit = False + self.__name = None + self.__parent = None + self.__accumNames = {} + if isinstance(toklist, list): + self.__toklist = toklist[:] + else: + self.__toklist = [toklist] + self.__tokdict = dict() + + if name is not None and name: + if not modal: + self.__accumNames[name] = 0 + if isinstance(name,int): + name = _ustr(name) # will always return a str, but use _ustr for consistency + self.__name = name + if not toklist in (None,'',[]): + if isinstance(toklist,basestring): + toklist = [ toklist ] + if asList: + if isinstance(toklist,ParseResults): + self[name] = _ParseResultsWithOffset(toklist.copy(),0) + else: + self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) + self[name].__name = name + else: + try: + self[name] = toklist[0] + except (KeyError,TypeError,IndexError): + self[name] = toklist + + def __getitem__( self, i ): + if isinstance( i, (int,slice) ): + return self.__toklist[i] + else: + if i not in self.__accumNames: + return self.__tokdict[i][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[i] ]) + + def __setitem__( self, k, v, isinstance=isinstance ): + if isinstance(v,_ParseResultsWithOffset): + self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] + sub = v[0] + elif isinstance(k,int): + self.__toklist[k] = v + sub = v + else: + self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] + sub = v + if isinstance(sub,ParseResults): + sub.__parent = wkref(self) + + def __delitem__( self, i ): + if isinstance(i,(int,slice)): + mylen = len( self.__toklist ) + del self.__toklist[i] + + # convert int to slice + if isinstance(i, int): + if i < 0: + i += mylen + i = slice(i, i+1) + # get removed indices + removed = list(range(*i.indices(mylen))) + removed.reverse() + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for j in removed: + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) + else: + del self.__tokdict[i] + + def __contains__( self, k ): + return k in self.__tokdict + + def __len__( self ): return len( self.__toklist ) + def __bool__(self): return len( self.__toklist ) > 0 + __nonzero__ = __bool__ + def __iter__( self ): return iter( self.__toklist ) + def __reversed__( self ): return iter( self.__toklist[::-1] ) + def keys( self ): + """Returns all named result keys.""" + return self.__tokdict.keys() + + def pop( self, index=-1 ): + """Removes and returns item at specified index (default=last). + Will work with either numeric indices or dict-key indicies.""" + ret = self[index] + del self[index] + return ret + + def get(self, key, defaultValue=None): + """Returns named result matching the given key, or if there is no + such name, then returns the given C{defaultValue} or C{None} if no + C{defaultValue} is specified.""" + if key in self: + return self[key] + else: + return defaultValue + + def insert( self, index, insStr ): + """Inserts new element at location index in the list of parsed tokens.""" + self.__toklist.insert(index, insStr) + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) + + def items( self ): + """Returns all named result keys and values as a list of tuples.""" + return [(k,self[k]) for k in self.__tokdict] + + def values( self ): + """Returns all named result values.""" + return [ v[-1][0] for v in self.__tokdict.values() ] + + def __getattr__( self, name ): + if True: #name not in self.__slots__: + if name in self.__tokdict: + if name not in self.__accumNames: + return self.__tokdict[name][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[name] ]) + else: + return "" + return None + + def __add__( self, other ): + ret = self.copy() + ret += other + return ret + + def __iadd__( self, other ): + if other.__tokdict: + offset = len(self.__toklist) + addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) + otheritems = other.__tokdict.items() + otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) + for (k,vlist) in otheritems for v in vlist] + for k,v in otherdictitems: + self[k] = v + if isinstance(v[0],ParseResults): + v[0].__parent = wkref(self) + + self.__toklist += other.__toklist + self.__accumNames.update( other.__accumNames ) + return self + + def __radd__(self, other): + if isinstance(other,int) and other == 0: + return self.copy() + + def __repr__( self ): + return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) + + def __str__( self ): + out = "[" + sep = "" + for i in self.__toklist: + if isinstance(i, ParseResults): + out += sep + _ustr(i) + else: + out += sep + repr(i) + sep = ", " + out += "]" + return out + + def _asStringList( self, sep='' ): + out = [] + for item in self.__toklist: + if out and sep: + out.append(sep) + if isinstance( item, ParseResults ): + out += item._asStringList() + else: + out.append( _ustr(item) ) + return out + + def asList( self ): + """Returns the parse results as a nested list of matching tokens, all converted to strings.""" + out = [] + for res in self.__toklist: + if isinstance(res,ParseResults): + out.append( res.asList() ) + else: + out.append( res ) + return out + + def asDict( self ): + """Returns the named parse results as dictionary.""" + return dict( self.items() ) + + def copy( self ): + """Returns a new copy of a C{ParseResults} object.""" + ret = ParseResults( self.__toklist ) + ret.__tokdict = self.__tokdict.copy() + ret.__parent = self.__parent + ret.__accumNames.update( self.__accumNames ) + ret.__name = self.__name + return ret + + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): + """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" + nl = "\n" + out = [] + namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() + for v in vlist ] ) + nextLevelIndent = indent + " " + + # collapse out indents if formatting is not desired + if not formatted: + indent = "" + nextLevelIndent = "" + nl = "" + + selfTag = None + if doctag is not None: + selfTag = doctag + else: + if self.__name: + selfTag = self.__name + + if not selfTag: + if namedItemsOnly: + return "" + else: + selfTag = "ITEM" + + out += [ nl, indent, "<", selfTag, ">" ] + + worklist = self.__toklist + for i,res in enumerate(worklist): + if isinstance(res,ParseResults): + if i in namedItems: + out += [ res.asXML(namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + out += [ res.asXML(None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + # individual token, see if there is a name for it + resTag = None + if i in namedItems: + resTag = namedItems[i] + if not resTag: + if namedItemsOnly: + continue + else: + resTag = "ITEM" + xmlBodyText = _xml_escape(_ustr(res)) + out += [ nl, nextLevelIndent, "<", resTag, ">", + xmlBodyText, + "" ] + + out += [ nl, indent, "" ] + return "".join(out) + + def __lookup(self,sub): + for k,vlist in self.__tokdict.items(): + for v,loc in vlist: + if sub is v: + return k + return None + + def getName(self): + """Returns the results name for this token expression.""" + if self.__name: + return self.__name + elif self.__parent: + par = self.__parent() + if par: + return par.__lookup(self) + else: + return None + elif (len(self) == 1 and + len(self.__tokdict) == 1 and + self.__tokdict.values()[0][0][1] in (0,-1)): + return self.__tokdict.keys()[0] + else: + return None + + def dump(self,indent='',depth=0): + """Diagnostic method for listing out the contents of a C{ParseResults}. + Accepts an optional C{indent} argument so that this string can be embedded + in a nested display of other data.""" + out = [] + out.append( indent+_ustr(self.asList()) ) + keys = self.items() + keys.sort() + for k,v in keys: + if out: + out.append('\n') + out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) + if isinstance(v,ParseResults): + if v.keys(): + out.append( v.dump(indent,depth+1) ) + else: + out.append(_ustr(v)) + else: + out.append(_ustr(v)) + return "".join(out) + + # add support for pickle protocol + def __getstate__(self): + return ( self.__toklist, + ( self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name ) ) + + def __setstate__(self,state): + self.__toklist = state[0] + (self.__tokdict, + par, + inAccumNames, + self.__name) = state[1] + self.__accumNames = {} + self.__accumNames.update(inAccumNames) + if par is not None: + self.__parent = wkref(par) + else: + self.__parent = None + + def __dir__(self): + return dir(super(ParseResults,self)) + self.keys() + +def col (loc,strg): + """Returns current column within a string, counting newlines as line separators. + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return (loc} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return strg.count("\n",0,loc) + 1 + +def line( loc, strg ): + """Returns the line of text containing loc within a string, counting newlines as line separators. + """ + lastCR = strg.rfind("\n", 0, loc) + nextCR = strg.find("\n", loc) + if nextCR >= 0: + return strg[lastCR+1:nextCR] + else: + return strg[lastCR+1:] + +def _defaultStartDebugAction( instring, loc, expr ): + print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + +def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): + print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) + +def _defaultExceptionDebugAction( instring, loc, expr, exc ): + print ("Exception raised:" + _ustr(exc)) + +def nullDebugAction(*args): + """'Do-nothing' debug action, to suppress debugging output during parsing.""" + pass + +'decorator to trim function calls to match the arity of the target' +if not _PY3K: + def _trim_arity(func, maxargs=2): + limit = [0] + def wrapper(*args): + while 1: + try: + return func(*args[limit[0]:]) + except TypeError: + if limit[0] <= maxargs: + limit[0] += 1 + continue + raise + return wrapper +else: + def _trim_arity(func, maxargs=2): + limit = maxargs + def wrapper(*args): + #~ nonlocal limit + while 1: + try: + return func(*args[limit:]) + except TypeError: + if limit: + limit -= 1 + continue + raise + return wrapper + +class ParserElement(object): + """Abstract base level parser element class.""" + DEFAULT_WHITE_CHARS = " \n\t\r" + verbose_stacktrace = False + + def setDefaultWhitespaceChars( chars ): + """Overrides the default whitespace chars + """ + ParserElement.DEFAULT_WHITE_CHARS = chars + setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) + + def __init__( self, savelist=False ): + self.parseAction = list() + self.failAction = None + #~ self.name = "" # don't define self.name, let subclasses try/except upcall + self.strRepr = None + self.resultsName = None + self.saveAsList = savelist + self.skipWhitespace = True + self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + self.copyDefaultWhiteChars = True + self.mayReturnEmpty = False # used when checking for left-recursion + self.keepTabs = False + self.ignoreExprs = list() + self.debug = False + self.streamlined = False + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index + self.errmsg = "" + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = ( None, None, None ) #custom debug actions + self.re = None + self.callPreparse = True # used to avoid redundant calls to preParse + self.callDuringTry = False + + def copy( self ): + """Make a copy of this C{ParserElement}. Useful for defining different parse actions + for the same parsing pattern, using copies of the original parse element.""" + cpy = copy.copy( self ) + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + return cpy + + def setName( self, name ): + """Define name for this expression, for use in debugging.""" + self.name = name + self.errmsg = "Expected " + self.name + if hasattr(self,"exception"): + self.exception.msg = self.errmsg + return self + + def setResultsName( self, name, listAllMatches=False ): + """Define name for referencing matching tokens as a nested attribute + of the returned parse results. + NOTE: this returns a *copy* of the original C{ParserElement} object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + + You can also set results names using the abbreviated syntax, + C{expr("name")} in place of C{expr.setResultsName("name")} - + see L{I{__call__}<__call__>}. + """ + newself = self.copy() + if name.endswith("*"): + name = name[:-1] + listAllMatches=True + newself.resultsName = name + newself.modalResults = not listAllMatches + return newself + + def setBreak(self,breakFlag = True): + """Method to invoke the Python pdb debugger when this element is + about to be parsed. Set C{breakFlag} to True to enable, False to + disable. + """ + if breakFlag: + _parseMethod = self._parse + def breaker(instring, loc, doActions=True, callPreParse=True): + import pdb + pdb.set_trace() + return _parseMethod( instring, loc, doActions, callPreParse ) + breaker._originalParseMethod = _parseMethod + self._parse = breaker + else: + if hasattr(self._parse,"_originalParseMethod"): + self._parse = self._parse._originalParseMethod + return self + + def setParseAction( self, *fns, **kwargs ): + """Define action to perform when successfully matching parse element definition. + Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, + C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a ParseResults object + If the functions in fns modify the tokens, they can return them as the return + value from fn, and the modified list of tokens will replace the original. + Otherwise, fn does not need to return any value. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + self.parseAction = list(map(_trim_arity, list(fns))) + self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def addParseAction( self, *fns, **kwargs ): + """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" + self.parseAction += list(map(_trim_arity, list(fns))) + self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def setFailAction( self, fn ): + """Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + C{fn(s,loc,expr,err)} where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw C{ParseFatalException} + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + + def _skipIgnorables( self, instring, loc ): + exprsFound = True + while exprsFound: + exprsFound = False + for e in self.ignoreExprs: + try: + while 1: + loc,dummy = e._parse( instring, loc ) + exprsFound = True + except ParseException: + pass + return loc + + def preParse( self, instring, loc ): + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + + if self.skipWhitespace: + wt = self.whiteChars + instrlen = len(instring) + while loc < instrlen and instring[loc] in wt: + loc += 1 + + return loc + + def parseImpl( self, instring, loc, doActions=True ): + return loc, [] + + def postParse( self, instring, loc, tokenlist ): + return tokenlist + + #~ @profile + def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): + debugging = ( self.debug ) #and doActions ) + + if debugging or self.failAction: + #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + if (self.debugActions[0] ): + self.debugActions[0]( instring, loc, self ) + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = preloc + try: + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + except ParseBaseException: + #~ print ("Exception raised:", err) + err = None + if self.debugActions[2]: + err = sys.exc_info()[1] + self.debugActions[2]( instring, tokensStart, self, err ) + if self.failAction: + if err is None: + err = sys.exc_info()[1] + self.failAction( instring, tokensStart, self, err ) + raise + else: + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = preloc + if self.mayIndexError or loc >= len(instring): + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + else: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + + tokens = self.postParse( instring, loc, tokens ) + + retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) + if self.parseAction and (doActions or self.callDuringTry): + if debugging: + try: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + except ParseBaseException: + #~ print "Exception raised in user parse action:", err + if (self.debugActions[2] ): + err = sys.exc_info()[1] + self.debugActions[2]( instring, tokensStart, self, err ) + raise + else: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + + if debugging: + #~ print ("Matched",self,"->",retTokens.asList()) + if (self.debugActions[1] ): + self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) + + return loc, retTokens + + def tryParse( self, instring, loc ): + try: + return self._parse( instring, loc, doActions=False )[0] + except ParseFatalException: + raise ParseException( instring, loc, self.errmsg, self) + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): + lookup = (self,instring,loc,callPreParse,doActions) + if lookup in ParserElement._exprArgCache: + value = ParserElement._exprArgCache[ lookup ] + if isinstance(value, Exception): + raise value + return (value[0],value[1].copy()) + else: + try: + value = self._parseNoCache( instring, loc, doActions, callPreParse ) + ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) + return value + except ParseBaseException: + pe = sys.exc_info()[1] + ParserElement._exprArgCache[ lookup ] = pe + raise + + _parse = _parseNoCache + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + _exprArgCache = {} + def resetCache(): + ParserElement._exprArgCache.clear() + resetCache = staticmethod(resetCache) + + _packratEnabled = False + def enablePackrat(): + """Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method C{ParserElement.enablePackrat()}. If + your program uses C{psyco} to "compile as you go", you must call + C{enablePackrat} before calling C{psyco.full()}. If you do not do this, + Python will crash. For best results, call C{enablePackrat()} immediately + after importing pyparsing. + """ + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + ParserElement._parse = ParserElement._parseCache + enablePackrat = staticmethod(enablePackrat) + + def parseString( self, instring, parseAll=False ): + """Execute the parse expression with the given string. + This is the main interface to the client code, once the complete + expression has been built. + + If you want the grammar to require that the entire input string be + successfully parsed, then set C{parseAll} to True (equivalent to ending + the grammar with C{StringEnd()}). + + Note: C{parseString} implicitly calls C{expandtabs()} on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the C{loc} argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling C{parseWithTabs} on your grammar before calling C{parseString} + (see L{I{parseWithTabs}}) + - define your parse action using the full C{(s,loc,toks)} signature, and + reference the input string using the parse action's C{s} argument + - explictly expand the tabs in your input string before calling + C{parseString} + """ + ParserElement.resetCache() + if not self.streamlined: + self.streamline() + #~ self.saveAsList = True + for e in self.ignoreExprs: + e.streamline() + if not self.keepTabs: + instring = instring.expandtabs() + try: + loc, tokens = self._parse( instring, 0 ) + if parseAll: + loc = self.preParse( instring, loc ) + se = Empty() + StringEnd() + se._parse( instring, loc ) + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + else: + return tokens + + def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): + """Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + C{maxMatches} argument, to clip scanning after 'n' matches are found. If + C{overlap} is specified, then overlapping matches will be reported. + + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}} for more information on parsing + strings with embedded tabs.""" + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + + if not self.keepTabs: + instring = _ustr(instring).expandtabs() + instrlen = len(instring) + loc = 0 + preparseFn = self.preParse + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + try: + while loc <= instrlen and matches < maxMatches: + try: + preloc = preparseFn( instring, loc ) + nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) + except ParseException: + loc = preloc+1 + else: + if nextLoc > loc: + matches += 1 + yield tokens, preloc, nextLoc + if overlap: + nextloc = preparseFn( instring, loc ) + if nextloc > loc: + loc = nextLoc + else: + loc += 1 + else: + loc = nextLoc + else: + loc = preloc+1 + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def transformString( self, instring ): + """Extension to C{scanString}, to modify matching text with modified tokens that may + be returned from a parse action. To use C{transformString}, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking C{transformString()} on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. C{transformString()} returns the resulting transformed string.""" + out = [] + lastE = 0 + # force preservation of s, to minimize unwanted transformation of string, and to + # keep string locs straight between transformString and scanString + self.keepTabs = True + try: + for t,s,e in self.scanString( instring ): + out.append( instring[lastE:s] ) + if t: + if isinstance(t,ParseResults): + out += t.asList() + elif isinstance(t,list): + out += t + else: + out.append(t) + lastE = e + out.append(instring[lastE:]) + out = [o for o in out if o] + return "".join(map(_ustr,_flatten(out))) + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def searchString( self, instring, maxMatches=_MAX_INT ): + """Another extension to C{scanString}, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + C{maxMatches} argument, to clip searching after 'n' matches are found. + """ + try: + return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def __add__(self, other ): + """Implementation of + operator - returns And""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, other ] ) + + def __radd__(self, other ): + """Implementation of + operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other + self + + def __sub__(self, other): + """Implementation of - operator, returns C{And} with error stop""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, And._ErrorStop(), other ] ) + + def __rsub__(self, other ): + """Implementation of - operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other - self + + def __mul__(self,other): + """Implementation of * operator, allows use of C{expr * 3} in place of + C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer + tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples + may also include C{None} as in: + - C{expr*(n,None)} or C{expr*(n,)} is equivalent + to C{expr*n + ZeroOrMore(expr)} + (read as "at least n instances of C{expr}") + - C{expr*(None,n)} is equivalent to C{expr*(0,n)} + (read as "0 to n instances of C{expr}") + - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} + - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} + + Note that C{expr*(None,n)} does not raise an exception if + more than n exprs exist in the input stream; that is, + C{expr*(None,n)} does not enforce a maximum number of expr + occurrences. If this behavior is desired, then write + C{expr*(None,n) + ~expr} + + """ + if isinstance(other,int): + minElements, optElements = other,0 + elif isinstance(other,tuple): + other = (other + (None, None))[:2] + if other[0] is None: + other = (0, other[1]) + if isinstance(other[0],int) and other[1] is None: + if other[0] == 0: + return ZeroOrMore(self) + if other[0] == 1: + return OneOrMore(self) + else: + return self*other[0] + ZeroOrMore(self) + elif isinstance(other[0],int) and isinstance(other[1],int): + minElements, optElements = other + optElements -= minElements + else: + raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) + else: + raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) + + if minElements < 0: + raise ValueError("cannot multiply ParserElement by negative value") + if optElements < 0: + raise ValueError("second tuple value must be greater or equal to first tuple value") + if minElements == optElements == 0: + raise ValueError("cannot multiply ParserElement by 0 or (0,0)") + + if (optElements): + def makeOptionalList(n): + if n>1: + return Optional(self + makeOptionalList(n-1)) + else: + return Optional(self) + if minElements: + if minElements == 1: + ret = self + makeOptionalList(optElements) + else: + ret = And([self]*minElements) + makeOptionalList(optElements) + else: + ret = makeOptionalList(optElements) + else: + if minElements == 1: + ret = self + else: + ret = And([self]*minElements) + return ret + + def __rmul__(self, other): + return self.__mul__(other) + + def __or__(self, other ): + """Implementation of | operator - returns C{MatchFirst}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return MatchFirst( [ self, other ] ) + + def __ror__(self, other ): + """Implementation of | operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other | self + + def __xor__(self, other ): + """Implementation of ^ operator - returns C{Or}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Or( [ self, other ] ) + + def __rxor__(self, other ): + """Implementation of ^ operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other ^ self + + def __and__(self, other ): + """Implementation of & operator - returns C{Each}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Each( [ self, other ] ) + + def __rand__(self, other ): + """Implementation of & operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other & self + + def __invert__( self ): + """Implementation of ~ operator - returns C{NotAny}""" + return NotAny( self ) + + def __call__(self, name): + """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: + userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") + could be written as:: + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + + If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be + passed as C{True}. + """ + return self.setResultsName(name) + + def suppress( self ): + """Suppresses the output of this C{ParserElement}; useful to keep punctuation from + cluttering up returned output. + """ + return Suppress( self ) + + def leaveWhitespace( self ): + """Disables the skipping of whitespace before matching the characters in the + C{ParserElement}'s defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. + """ + self.skipWhitespace = False + return self + + def setWhitespaceChars( self, chars ): + """Overrides the default whitespace chars + """ + self.skipWhitespace = True + self.whiteChars = chars + self.copyDefaultWhiteChars = False + return self + + def parseWithTabs( self ): + """Overrides default behavior to expand C{}s to spaces before parsing the input string. + Must be called before C{parseString} when the input grammar contains elements that + match C{} characters.""" + self.keepTabs = True + return self + + def ignore( self, other ): + """Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + """ + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + self.ignoreExprs.append( other.copy() ) + else: + self.ignoreExprs.append( Suppress( other.copy() ) ) + return self + + def setDebugActions( self, startAction, successAction, exceptionAction ): + """Enable display of debugging messages while doing pattern matching.""" + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction) + self.debug = True + return self + + def setDebug( self, flag=True ): + """Enable display of debugging messages while doing pattern matching. + Set C{flag} to True to enable, False to disable.""" + if flag: + self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) + else: + self.debug = False + return self + + def __str__( self ): + return self.name + + def __repr__( self ): + return _ustr(self) + + def streamline( self ): + self.streamlined = True + self.strRepr = None + return self + + def checkRecursion( self, parseElementList ): + pass + + def validate( self, validateTrace=[] ): + """Check defined expressions for valid structure, check for infinite recursive definitions.""" + self.checkRecursion( [] ) + + def parseFile( self, file_or_filename, parseAll=False ): + """Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. + """ + try: + file_contents = file_or_filename.read() + except AttributeError: + f = open(file_or_filename, "rb") + file_contents = f.read() + f.close() + try: + return self.parseString(file_contents, parseAll) + except ParseBaseException: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def getException(self): + return ParseException("",0,self.errmsg,self) + + def __getattr__(self,aname): + if aname == "myException": + self.myException = ret = self.getException(); + return ret; + else: + raise AttributeError("no such attribute " + aname) + + def __eq__(self,other): + if isinstance(other, ParserElement): + return self is other or self.__dict__ == other.__dict__ + elif isinstance(other, basestring): + try: + self.parseString(_ustr(other), parseAll=True) + return True + except ParseBaseException: + return False + else: + return super(ParserElement,self)==other + + def __ne__(self,other): + return not (self == other) + + def __hash__(self): + return hash(id(self)) + + def __req__(self,other): + return self == other + + def __rne__(self,other): + return not (self == other) + + +class Token(ParserElement): + """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" + def __init__( self ): + super(Token,self).__init__( savelist=False ) + + def setName(self, name): + s = super(Token,self).setName(name) + self.errmsg = "Expected " + self.name + return s + + +class Empty(Token): + """An empty token, will always match.""" + def __init__( self ): + super(Empty,self).__init__() + self.name = "Empty" + self.mayReturnEmpty = True + self.mayIndexError = False + + +class NoMatch(Token): + """A token that will never match.""" + def __init__( self ): + super(NoMatch,self).__init__() + self.name = "NoMatch" + self.mayReturnEmpty = True + self.mayIndexError = False + self.errmsg = "Unmatchable token" + + def parseImpl( self, instring, loc, doActions=True ): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + +class Literal(Token): + """Token to exactly match a specified string.""" + def __init__( self, matchString ): + super(Literal,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Literal; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.__class__ = Empty + self.name = '"%s"' % _ustr(self.match) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + self.mayIndexError = False + + # Performance tuning: this routine gets called a *lot* + # if this is a single character match string and the first character matches, + # short-circuit as quickly as possible, and avoid calling startswith + #~ @profile + def parseImpl( self, instring, loc, doActions=True ): + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc +_L = Literal + +class Keyword(Token): + """Token to exactly match a specified string as a keyword, that is, it must be + immediately followed by a non-keyword character. Compare with C{Literal}:: + Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. + Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} + Accepts two optional constructor arguments in addition to the keyword string: + C{identChars} is a string of characters that would be valid identifier characters, + defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive + matching, default is C{False}. + """ + DEFAULT_KEYWORD_CHARS = alphanums+"_$" + + def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): + super(Keyword,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Keyword; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.name = '"%s"' % self.match + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + self.mayIndexError = False + self.caseless = caseless + if caseless: + self.caselessmatch = matchString.upper() + identChars = identChars.upper() + self.identChars = set(identChars) + + def parseImpl( self, instring, loc, doActions=True ): + if self.caseless: + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and + (loc == 0 or instring[loc-1].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + else: + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and + (loc == 0 or instring[loc-1] not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + def copy(self): + c = super(Keyword,self).copy() + c.identChars = Keyword.DEFAULT_KEYWORD_CHARS + return c + + def setDefaultKeywordChars( chars ): + """Overrides the default Keyword chars + """ + Keyword.DEFAULT_KEYWORD_CHARS = chars + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) + +class CaselessLiteral(Literal): + """Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + """ + def __init__( self, matchString ): + super(CaselessLiteral,self).__init__( matchString.upper() ) + # Preserve the defining literal. + self.returnString = matchString + self.name = "'%s'" % self.returnString + self.errmsg = "Expected " + self.name + + def parseImpl( self, instring, loc, doActions=True ): + if instring[ loc:loc+self.matchLen ].upper() == self.match: + return loc+self.matchLen, self.returnString + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class CaselessKeyword(Keyword): + def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): + super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) + + def parseImpl( self, instring, loc, doActions=True ): + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Word(Token): + """Token for matching words composed of allowed character sets. + Defined with string containing all allowed initial characters, + an optional string containing allowed body characters (if omitted, + defaults to the initial character set), and an optional minimum, + maximum, and/or exact length. The default value for C{min} is 1 (a + minimum value < 1 is not valid); the default values for C{max} and C{exact} + are 0, meaning no maximum or exact length restriction. An optional + C{exclude} parameter can list characters that might be found in + the input C{bodyChars} string; useful to define a word of all printables + except for one or two characters, for instance. + """ + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): + super(Word,self).__init__() + if excludeChars: + initChars = ''.join([c for c in initChars if c not in excludeChars]) + if bodyChars: + bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) + self.initCharsOrig = initChars + self.initChars = set(initChars) + if bodyChars : + self.bodyCharsOrig = bodyChars + self.bodyChars = set(bodyChars) + else: + self.bodyCharsOrig = initChars + self.bodyChars = set(initChars) + + self.maxSpecified = max > 0 + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.asKeyword = asKeyword + + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): + if self.bodyCharsOrig == self.initCharsOrig: + self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) + elif len(self.bodyCharsOrig) == 1: + self.reString = "%s[%s]*" % \ + (re.escape(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + else: + self.reString = "[%s][%s]*" % \ + (_escapeRegexRangeChars(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + if self.asKeyword: + self.reString = r"\b"+self.reString+r"\b" + try: + self.re = re.compile( self.reString ) + except: + self.re = None + + def parseImpl( self, instring, loc, doActions=True ): + if self.re: + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + return loc, result.group() + + if not(instring[ loc ] in self.initChars): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + instrlen = len(instring) + bodychars = self.bodyChars + maxloc = start + self.maxLen + maxloc = min( maxloc, instrlen ) + while loc < maxloc and instring[loc] in bodychars: + loc += 1 + + throwException = False + if loc - start < self.minLen: + throwException = True + if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: + throwException = True + if self.asKeyword: + if (start>0 and instring[start-1] in bodychars) or (loc4: + return s[:4]+"..." + else: + return s + + if ( self.initCharsOrig != self.bodyCharsOrig ): + self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) + else: + self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) + + return self.strRepr + + +class Regex(Token): + """Token for matching strings that match a given regular expression. + Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + """ + compiledREtype = type(re.compile("[A-Z]")) + def __init__( self, pattern, flags=0): + """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" + super(Regex,self).__init__() + + if isinstance(pattern, basestring): + if len(pattern) == 0: + warnings.warn("null string passed to Regex; use Empty() instead", + SyntaxWarning, stacklevel=2) + + self.pattern = pattern + self.flags = flags + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, stacklevel=2) + raise + + elif isinstance(pattern, Regex.compiledREtype): + self.re = pattern + self.pattern = \ + self.reString = str(pattern) + self.flags = flags + + else: + raise ValueError("Regex may only be constructed with a string or a compiled RE object") + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + d = result.groupdict() + ret = ParseResults(result.group()) + if d: + for k in d: + ret[k] = d[k] + return loc,ret + + def __str__( self ): + try: + return super(Regex,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "Re:(%s)" % repr(self.pattern) + + return self.strRepr + + +class QuotedString(Token): + """Token for matching strings that are delimited by quoting characters. + """ + def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): + """ + Defined with the following parameters: + - quoteChar - string of one or more characters defining the quote delimiting string + - escChar - character to escape quotes, typically backslash (default=None) + - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) + - multiline - boolean indicating whether quotes can span multiple lines (default=False) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + """ + super(QuotedString,self).__init__() + + # remove white space from quote chars - wont work anyway + quoteChar = quoteChar.strip() + if len(quoteChar) == 0: + warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + if endQuoteChar is None: + endQuoteChar = quoteChar + else: + endQuoteChar = endQuoteChar.strip() + if len(endQuoteChar) == 0: + warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + self.quoteChar = quoteChar + self.quoteCharLen = len(quoteChar) + self.firstQuoteChar = quoteChar[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + self.pattern = r'%s(?:[^%s%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + else: + self.flags = 0 + self.pattern = r'%s(?:[^%s\n\r%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + if len(self.endQuoteChar) > 1: + self.pattern += ( + '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' + ) + if escQuote: + self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + if escChar: + self.pattern += (r'|(?:%s.)' % re.escape(escChar)) + charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') + self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) + self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen:-self.endQuoteCharLen] + + if isinstance(ret,basestring): + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + def __str__( self ): + try: + return super(QuotedString,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) + + return self.strRepr + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given set. + Defined with string containing all disallowed characters, and an optional + minimum, maximum, and/or exact length. The default value for C{min} is 1 (a + minimum value < 1 is not valid); the default values for C{max} and C{exact} + are 0, meaning no maximum or exact length restriction. + """ + def __init__( self, notChars, min=1, max=0, exact=0 ): + super(CharsNotIn,self).__init__() + self.skipWhitespace = False + self.notChars = notChars + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = ( self.minLen == 0 ) + self.mayIndexError = False + + def parseImpl( self, instring, loc, doActions=True ): + if instring[loc] in self.notChars: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + start = loc + loc += 1 + notchars = self.notChars + maxlen = min( start+self.maxLen, len(instring) ) + while loc < maxlen and \ + (instring[loc] not in notchars): + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + def __str__( self ): + try: + return super(CharsNotIn, self).__str__() + except: + pass + + if self.strRepr is None: + if len(self.notChars) > 4: + self.strRepr = "!W:(%s...)" % self.notChars[:4] + else: + self.strRepr = "!W:(%s)" % self.notChars + + return self.strRepr + +class White(Token): + """Special matching class for matching whitespace. Normally, whitespace is ignored + by pyparsing grammars. This class is included when some whitespace structures + are significant. Define with a string containing the whitespace characters to be + matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, + as defined for the C{Word} class.""" + whiteStrs = { + " " : "", + "\t": "", + "\n": "", + "\r": "", + "\f": "", + } + def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): + super(White,self).__init__() + self.matchWhite = ws + self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) + #~ self.leaveWhitespace() + self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def parseImpl( self, instring, loc, doActions=True ): + if not(instring[ loc ] in self.matchWhite): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min( maxloc, len(instring) ) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + +class _PositionToken(Token): + def __init__( self ): + super(_PositionToken,self).__init__() + self.name=self.__class__.__name__ + self.mayReturnEmpty = True + self.mayIndexError = False + +class GoToColumn(_PositionToken): + """Token to advance to a specific column of input text; useful for tabular report scraping.""" + def __init__( self, colno ): + super(GoToColumn,self).__init__() + self.col = colno + + def preParse( self, instring, loc ): + if col(loc,instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + thiscol = col( loc, instring ) + if thiscol > self.col: + raise ParseException( instring, loc, "Text not in expected column", self ) + newloc = loc + self.col - thiscol + ret = instring[ loc: newloc ] + return newloc, ret + +class LineStart(_PositionToken): + """Matches if current position is at the beginning of a line within the parse string""" + def __init__( self ): + super(LineStart,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected start of line" + + def preParse( self, instring, loc ): + preloc = super(LineStart,self).preParse(instring,loc) + if instring[preloc] == "\n": + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + if not( loc==0 or + (loc == self.preParse( instring, 0 )) or + (instring[loc-1] == "\n") ): #col(loc, instring) != 1: + #~ raise ParseException( instring, loc, "Expected start of line" ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class LineEnd(_PositionToken): + """Matches if current position is at the end of a line within the parse string""" + def __init__( self ): + super(LineEnd,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected end of line" + + def parseImpl( self, instring, loc, doActions=True ): + if loc len(instring): + return loc, [] + else: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class WordStart(_PositionToken): + """Matches if the current position is at the beginning of a Word, and + is not preceded by any character in a given set of C{wordChars} + (default=C{printables}). To emulate the C{\b} behavior of regular expressions, + use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of + the string being parsed, or at the beginning of a line. + """ + def __init__(self, wordChars = printables): + super(WordStart,self).__init__() + self.wordChars = set(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True ): + if loc != 0: + if (instring[loc-1] in self.wordChars or + instring[loc] not in self.wordChars): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class WordEnd(_PositionToken): + """Matches if the current position is at the end of a Word, and + is not followed by any character in a given set of C{wordChars} + (default=C{printables}). To emulate the C{\b} behavior of regular expressions, + use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of + the string being parsed, or at the end of a line. + """ + def __init__(self, wordChars = printables): + super(WordEnd,self).__init__() + self.wordChars = set(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True ): + instrlen = len(instring) + if instrlen>0 and loc maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + else: + if loc2 > maxMatchLoc: + maxMatchLoc = loc2 + maxMatchExp = e + + if maxMatchLoc < 0: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + return maxMatchExp._parse( instring, loc, doActions ) + + def __ixor__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #Or( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class MatchFirst(ParseExpression): + """Requires that at least one C{ParseExpression} is found. + If two expressions match, the first one listed is the one that will match. + May be constructed using the C{'|'} operator. + """ + def __init__( self, exprs, savelist = False ): + super(MatchFirst,self).__init__(exprs, savelist) + if exprs: + self.mayReturnEmpty = False + for e in self.exprs: + if e.mayReturnEmpty: + self.mayReturnEmpty = True + break + else: + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + maxExcLoc = -1 + maxException = None + for e in self.exprs: + try: + ret = e._parse( instring, loc, doActions ) + return ret + except ParseException, err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + + # only got here if no expression matched, raise exception for match that made it the furthest + else: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + def __ior__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #MatchFirst( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class Each(ParseExpression): + """Requires all given C{ParseExpression}s to be found, but in any order. + Expressions may be separated by whitespace. + May be constructed using the C{'&'} operator. + """ + def __init__( self, exprs, savelist = True ): + super(Each,self).__init__(exprs, savelist) + self.mayReturnEmpty = True + for e in self.exprs: + if not e.mayReturnEmpty: + self.mayReturnEmpty = False + break + self.skipWhitespace = True + self.initExprGroups = True + + def parseImpl( self, instring, loc, doActions=True ): + if self.initExprGroups: + opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] + opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] + self.optionals = opt1 + opt2 + self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] + self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] + self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] + self.required += self.multirequired + self.initExprGroups = False + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + matchOrder = [] + + keepMatching = True + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired + failed = [] + for e in tmpExprs: + try: + tmpLoc = e.tryParse( instring, tmpLoc ) + except ParseException: + failed.append(e) + else: + matchOrder.append(e) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + if tmpReqd: + missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) + raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) + + # add any unmatched Optionals, in case they have default values defined + matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] + + resultlist = [] + for e in matchOrder: + loc,results = e._parse(instring,loc,doActions) + resultlist.append(results) + + finalResults = ParseResults([]) + for r in resultlist: + dups = {} + for k in r.keys(): + if k in finalResults.keys(): + tmp = ParseResults(finalResults[k]) + tmp += ParseResults(r[k]) + dups[k] = tmp + finalResults += ParseResults(r) + for k,v in dups.items(): + finalResults[k] = v + return loc, finalResults + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" + def __init__( self, expr, savelist=False ): + super(ParseElementEnhance,self).__init__(savelist) + if isinstance( expr, basestring ): + expr = Literal(expr) + self.expr = expr + self.strRepr = None + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.setWhitespaceChars( expr.whiteChars ) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def parseImpl( self, instring, loc, doActions=True ): + if self.expr is not None: + return self.expr._parse( instring, loc, doActions, callPreParse=False ) + else: + raise ParseException("",loc,self.errmsg,self) + + def leaveWhitespace( self ): + self.skipWhitespace = False + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leaveWhitespace() + return self + + def ignore( self, other ): + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + else: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + return self + + def streamline( self ): + super(ParseElementEnhance,self).streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def checkRecursion( self, parseElementList ): + if self in parseElementList: + raise RecursiveGrammarException( parseElementList+[self] ) + subRecCheckList = parseElementList[:] + [ self ] + if self.expr is not None: + self.expr.checkRecursion( subRecCheckList ) + + def validate( self, validateTrace=[] ): + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion( [] ) + + def __str__( self ): + try: + return super(ParseElementEnhance,self).__str__() + except: + pass + + if self.strRepr is None and self.expr is not None: + self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) + return self.strRepr + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. C{FollowedBy} + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression matches at the current + position. C{FollowedBy} always returns a null token list.""" + def __init__( self, expr ): + super(FollowedBy,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + self.expr.tryParse( instring, loc ) + return loc, [] + + +class NotAny(ParseElementEnhance): + """Lookahead to disallow matching with the given parse expression. C{NotAny} + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression does *not* match at the current + position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} + always returns a null token list. May be constructed using the '~' operator.""" + def __init__( self, expr ): + super(NotAny,self).__init__(expr) + #~ self.leaveWhitespace() + self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, "+_ustr(self.expr) + + def parseImpl( self, instring, loc, doActions=True ): + try: + self.expr.tryParse( instring, loc ) + except (ParseException,IndexError): + pass + else: + #~ raise ParseException(instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "~{" + _ustr(self.expr) + "}" + + return self.strRepr + + +class ZeroOrMore(ParseElementEnhance): + """Optional repetition of zero or more of the given expression.""" + def __init__( self, expr ): + super(ZeroOrMore,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + tokens = [] + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + + +class OneOrMore(ParseElementEnhance): + """Repetition of one or more of the given expression.""" + def parseImpl( self, instring, loc, doActions=True ): + # must be at least one + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + try: + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + _ustr(self.expr) + "}..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(OneOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + +class _NullToken(object): + def __bool__(self): + return False + __nonzero__ = __bool__ + def __str__(self): + return "" + +_optionalNotMatched = _NullToken() +class Optional(ParseElementEnhance): + """Optional matching of the given expression. + A default return string can also be specified, if the optional expression + is not found. + """ + def __init__( self, exprs, default=_optionalNotMatched ): + super(Optional,self).__init__( exprs, savelist=False ) + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + except (ParseException,IndexError): + if self.defaultValue is not _optionalNotMatched: + if self.expr.resultsName: + tokens = ParseResults([ self.defaultValue ]) + tokens[self.expr.resultsName] = self.defaultValue + else: + tokens = [ self.defaultValue ] + else: + tokens = [] + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]" + + return self.strRepr + + +class SkipTo(ParseElementEnhance): + """Token for skipping over all undefined text until the matched expression is found. + If C{include} is set to true, the matched expression is also parsed (the skipped text + and matched expression are returned as a 2-element list). The C{ignore} + argument is used to define grammars (typically quoted strings and comments) that + might contain false matches. + """ + def __init__( self, other, include=False, ignore=None, failOn=None ): + super( SkipTo, self ).__init__( other ) + self.ignoreExpr = ignore + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.asList = False + if failOn is not None and isinstance(failOn, basestring): + self.failOn = Literal(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for "+_ustr(self.expr) + + def parseImpl( self, instring, loc, doActions=True ): + startLoc = loc + instrlen = len(instring) + expr = self.expr + failParse = False + while loc <= instrlen: + try: + if self.failOn: + try: + self.failOn.tryParse(instring, loc) + except ParseBaseException: + pass + else: + failParse = True + raise ParseException(instring, loc, "Found expression " + str(self.failOn)) + failParse = False + if self.ignoreExpr is not None: + while 1: + try: + loc = self.ignoreExpr.tryParse(instring,loc) + # print "found ignoreExpr, advance to", loc + except ParseBaseException: + break + expr._parse( instring, loc, doActions=False, callPreParse=False ) + skipText = instring[startLoc:loc] + if self.includeMatch: + loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) + if mat: + skipRes = ParseResults( skipText ) + skipRes += mat + return loc, [ skipRes ] + else: + return loc, [ skipText ] + else: + return loc, [ skipText ] + except (ParseException,IndexError): + if failParse: + raise + else: + loc += 1 + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Forward(ParseElementEnhance): + """Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. + + Note: take care when assigning to C{Forward} not to overlook precedence of operators. + Specifically, '|' has a lower precedence than '<<', so that:: + fwdExpr << a | b | c + will actually be evaluated as:: + (fwdExpr << a) | b | c + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the C{Forward}:: + fwdExpr << (a | b | c) + """ + def __init__( self, other=None ): + super(Forward,self).__init__( other, savelist=False ) + + def __lshift__( self, other ): + if isinstance( other, basestring ): + other = Literal(other) + self.expr = other + self.mayReturnEmpty = other.mayReturnEmpty + self.strRepr = None + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.setWhitespaceChars( self.expr.whiteChars ) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + return None + + def leaveWhitespace( self ): + self.skipWhitespace = False + return self + + def streamline( self ): + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate( self, validateTrace=[] ): + if self not in validateTrace: + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion([]) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + self._revertClass = self.__class__ + self.__class__ = _ForwardNoRecurse + try: + if self.expr is not None: + retString = _ustr(self.expr) + else: + retString = "None" + finally: + self.__class__ = self._revertClass + return self.__class__.__name__ + ": " + retString + + def copy(self): + if self.expr is not None: + return super(Forward,self).copy() + else: + ret = Forward() + ret << self + return ret + +class _ForwardNoRecurse(Forward): + def __str__( self ): + return "..." + +class TokenConverter(ParseElementEnhance): + """Abstract subclass of C{ParseExpression}, for converting parsed results.""" + def __init__( self, expr, savelist=False ): + super(TokenConverter,self).__init__( expr )#, savelist ) + self.saveAsList = False + +class Upcase(TokenConverter): + """Converter to upper case all matching tokens.""" + def __init__(self, *args): + super(Upcase,self).__init__(*args) + warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", + DeprecationWarning,stacklevel=2) + + def postParse( self, instring, loc, tokenlist ): + return list(map( string.upper, tokenlist )) + + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the input string; + this can be disabled by specifying C{'adjacent=False'} in the constructor. + """ + def __init__( self, expr, joinString="", adjacent=True ): + super(Combine,self).__init__( expr ) + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leaveWhitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + self.callPreparse = True + + def ignore( self, other ): + if self.adjacent: + ParserElement.ignore(self, other) + else: + super( Combine, self).ignore( other ) + return self + + def postParse( self, instring, loc, tokenlist ): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) + + if self.resultsName and len(retToks.keys())>0: + return [ retToks ] + else: + return retToks + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" + def __init__( self, expr ): + super(Group,self).__init__( expr ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + return [ tokenlist ] + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also as a dictionary. + Each element can also be referenced using the first token in the expression as its key. + Useful for tabular report scraping when the first column can be used as a item key. + """ + def __init__( self, exprs ): + super(Dict,self).__init__( exprs ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + for i,tok in enumerate(tokenlist): + if len(tok) == 0: + continue + ikey = tok[0] + if isinstance(ikey,int): + ikey = _ustr(tok[0]).strip() + if len(tok)==1: + tokenlist[ikey] = _ParseResultsWithOffset("",i) + elif len(tok)==2 and not isinstance(tok[1],ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) + else: + dictvalue = tok.copy() #ParseResults(i) + del dictvalue[0] + if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) + + if self.resultsName: + return [ tokenlist ] + else: + return tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression.""" + def postParse( self, instring, loc, tokenlist ): + return [] + + def suppress( self ): + return self + + +class OnlyOnce(object): + """Wrapper for parse actions, to ensure they are only called once.""" + def __init__(self, methodCall): + self.callable = _trim_arity(methodCall) + self.called = False + def __call__(self,s,l,t): + if not self.called: + results = self.callable(s,l,t) + self.called = True + return results + raise ParseException(s,l,"") + def reset(self): + self.called = False + +def traceParseAction(f): + """Decorator for debugging parse actions.""" + f = _trim_arity(f) + def z(*paArgs): + thisFunc = f.func_name + s,l,t = paArgs[-3:] + if len(paArgs)>3: + thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) + try: + ret = f(*paArgs) + except Exception: + exc = sys.exc_info()[1] + sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) + try: + if len(symbols)==len("".join(symbols)): + return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) + else: + return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) + except: + warnings.warn("Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, stacklevel=2) + + + # last resort, just use MatchFirst + return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) + +def dictOf( key, value ): + """Helper to easily and clearly define a dictionary by specifying the respective patterns + for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens + in the proper order. The key pattern can include delimiting markers or punctuation, + as long as they are suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the C{Dict} results can include named token + fields. + """ + return Dict( ZeroOrMore( Group ( key + value ) ) ) + +def originalTextFor(expr, asString=True): + """Helper to return the original, untokenized text for a given expression. Useful to + restore the parsed fields of an HTML start tag into the raw tag text itself, or to + revert separate tokens with intervening whitespace back to the original matching + input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not + require the inspect module to chase up the call stack. By default, returns a + string containing the original parsed text. + + If the optional C{asString} argument is passed as C{False}, then the return value is a + C{ParseResults} containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if + the expression passed to C{L{originalTextFor}} contains expressions with defined + results names, you must set C{asString} to C{False} if you want to preserve those + results name values.""" + locMarker = Empty().setParseAction(lambda s,loc,t: loc) + endlocMarker = locMarker.copy() + endlocMarker.callPreparse = False + matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") + if asString: + extractText = lambda s,l,t: s[t._original_start:t._original_end] + else: + def extractText(s,l,t): + del t[:] + t.insert(0, s[t._original_start:t._original_end]) + del t["_original_start"] + del t["_original_end"] + matchExpr.setParseAction(extractText) + return matchExpr + +def ungroup(expr): + """Helper to undo pyparsing's default grouping of And expressions, even + if all but one are non-empty.""" + return TokenConverter(expr).setParseAction(lambda t:t[0]) + +# convenience constants for positional expressions +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") +stringStart = StringStart().setName("stringStart") +stringEnd = StringEnd().setName("stringEnd") + +_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) +_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) +_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) +_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) +_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) +_charRange = Group(_singleChar + Suppress("-") + _singleChar) +_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" + +_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) + +def srange(s): + r"""Helper to easily define string ranges for use in Word construction. Borrows + syntax from regexp '[]' string range definitions:: + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + The input string must be enclosed in []'s, and the returned string is the expanded + character set joined into a single string. + The values enclosed in the []'s may be:: + a single character + an escaped character with a leading backslash (such as \- or \]) + an escaped hex character with a leading '\x' (\x21, which is a '!' character) + (\0x## is also supported for backwards compatibility) + an escaped octal character with a leading '\0' (\041, which is a '!' character) + a range of any of the above, separated by a dash ('a-z', etc.) + any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) + """ + try: + return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) + except: + return "" + +def matchOnlyAtCol(n): + """Helper method for defining parse actions that require matching at a specific + column in the input text. + """ + def verifyCol(strg,locn,toks): + if col(locn,strg) != n: + raise ParseException(strg,locn,"matched token not at column %d" % n) + return verifyCol + +def replaceWith(replStr): + """Helper method for common parse actions that simply return a literal value. Especially + useful when used with C{transformString()}. + """ + def _replFunc(*args): + return [replStr] + return _replFunc + +def removeQuotes(s,l,t): + """Helper parse action for removing quotation marks from parsed quoted strings. + To use, add this parse action to quoted string using:: + quotedString.setParseAction( removeQuotes ) + """ + return t[0][1:-1] + +def upcaseTokens(s,l,t): + """Helper parse action to convert tokens to upper case.""" + return [ tt.upper() for tt in map(_ustr,t) ] + +def downcaseTokens(s,l,t): + """Helper parse action to convert tokens to lower case.""" + return [ tt.lower() for tt in map(_ustr,t) ] + +def keepOriginalText(s,startLoc,t): + """DEPRECATED - use new helper method C{originalTextFor}. + Helper parse action to preserve original parsed text, + overriding any nested parse actions.""" + try: + endloc = getTokensEndLoc() + except ParseException: + raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") + del t[:] + t += ParseResults(s[startLoc:endloc]) + return t + +def getTokensEndLoc(): + """Method to be called from within a parse action to determine the end + location of the parsed tokens.""" + import inspect + fstack = inspect.stack() + try: + # search up the stack (through intervening argument normalizers) for correct calling routine + for f in fstack[2:]: + if f[3] == "_parseNoCache": + endloc = f[0].f_locals["loc"] + return endloc + else: + raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") + finally: + del fstack + +def _makeTags(tagStr, xml): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr,basestring): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas,alphanums+"_-:") + if (xml): + tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) + openTag = Suppress("<") + tagStr("tag") + \ + Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + else: + printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) + tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) + openTag = Suppress("<") + tagStr("tag") + \ + Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ + Optional( Suppress("=") + tagAttrValue ) ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + closeTag = Combine(_L("") + + openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) + closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) + openTag.tag = resname + closeTag.tag = resname + return openTag, closeTag + +def makeHTMLTags(tagStr): + """Helper to construct opening and closing tag expressions for HTML, given a tag name""" + return _makeTags( tagStr, False ) + +def makeXMLTags(tagStr): + """Helper to construct opening and closing tag expressions for XML, given a tag name""" + return _makeTags( tagStr, True ) + +def withAttribute(*args,**attrDict): + """Helper to create a validating parse action to be used with start tags created + with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as + C{} or C{
}. + + Call C{withAttribute} with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in C{(align="right")}, or + - as an explicit dict with C{**} operator, when an attribute name is also a Python + reserved word, as in C{**{"class":"Customer", "align":"right"}} + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. + + To verify that the attribute exists, but without specifying a value, pass + C{withAttribute.ANY_VALUE} as the value. + """ + if args: + attrs = args[:] + else: + attrs = attrDict.items() + attrs = [(k,v) for k,v in attrs] + def pa(s,l,tokens): + for attrName,attrValue in attrs: + if attrName not in tokens: + raise ParseException(s,l,"no matching attribute " + attrName) + if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) + return pa +withAttribute.ANY_VALUE = object() + +opAssoc = _Constants() +opAssoc.LEFT = object() +opAssoc.RIGHT = object() + +def operatorPrecedence( baseExpr, opList ): + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants opAssoc.RIGHT and opAssoc.LEFT. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted) + """ + ret = Forward() + lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) + for i,operDef in enumerate(opList): + opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] + if arity == 3: + if opExpr is None or len(opExpr) != 2: + raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") + opExpr1, opExpr2 = opExpr + thisExpr = Forward()#.setName("expr%d" % i) + if rightLeftAssoc == opAssoc.LEFT: + if arity == 1: + matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) + else: + matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ + Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + elif rightLeftAssoc == opAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Optional): + opExpr = Optional(opExpr) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) + else: + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ + Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + else: + raise ValueError("operator must indicate right or left associativity") + if pa: + matchExpr.setParseAction( pa ) + thisExpr << ( matchExpr | lastExpr ) + lastExpr = thisExpr + ret << lastExpr + return ret + +dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") +sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") +quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()) + +def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): + """Helper method for defining nested lists enclosed in opening and closing + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default="("); can also be a pyparsing expression + - closer - closing character for a nested list (default=")"); can also be a pyparsing expression + - content - expression for items within the nested lists (default=None) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the C{ignoreExpr} argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. + The default is L{quotedString}, but if no expressions are to be ignored, + then pass C{None} for this argument. + """ + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener,basestring) and isinstance(closer,basestring): + if len(opener) == 1 and len(closer)==1: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t:t[0].strip())) + else: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + raise ValueError("opening and closing arguments must be strings if no content expression is given") + ret = Forward() + if ignoreExpr is not None: + ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) + else: + ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) + return ret + +def indentedBlock(blockStatementExpr, indentStack, indent=True): + """Helper method for defining space-delimited indentation blocks, such as + those used to define block statements in Python source code. + + Parameters: + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single grammar + should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond the + the current level; set to False for block of left-most statements + (default=True) + + A valid block must contain at least one C{blockStatement}. + """ + def checkPeerIndent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseFatalException(s,l,"illegal nesting") + raise ParseException(s,l,"not a peer entry") + + def checkSubIndent(s,l,t): + curCol = col(l,s) + if curCol > indentStack[-1]: + indentStack.append( curCol ) + else: + raise ParseException(s,l,"not a subentry") + + def checkUnindent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s,l,"not an unindent") + indentStack.pop() + + NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) + INDENT = Empty() + Empty().setParseAction(checkSubIndent) + PEER = Empty().setParseAction(checkPeerIndent) + UNDENT = Empty().setParseAction(checkUnindent) + if indent: + smExpr = Group( Optional(NL) + + #~ FollowedBy(blockStatementExpr) + + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) + else: + smExpr = Group( Optional(NL) + + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) +commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() +_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) +replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") + +htmlComment = Regex(r"") +restOfLine = Regex(r".*").leaveWhitespace() +dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") +cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) + print ("tokens = " + str(tokens)) + print ("tokens.columns = " + str(tokens.columns)) + print ("tokens.tables = " + str(tokens.tables)) + print (tokens.asXML("SQL",True)) + except ParseBaseException: + err = sys.exc_info()[1] + print (teststring + "->") + print (err.line) + print (" "*(err.column-1) + "^") + print (err) + print() + + selectToken = CaselessLiteral( "select" ) + fromToken = CaselessLiteral( "from" ) + + ident = Word( alphas, alphanums + "_$" ) + columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + columnNameList = Group( delimitedList( columnName ) )#.setName("columns") + tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + tableNameList = Group( delimitedList( tableName ) )#.setName("tables") + simpleSQL = ( selectToken + \ + ( '*' | columnNameList ).setResultsName( "columns" ) + \ + fromToken + \ + tableNameList.setResultsName( "tables" ) ) + + test( "SELECT * from XYZZY, ABC" ) + test( "select * from SYS.XYZZY" ) + test( "Select A from Sys.dual" ) + test( "Select AA,BB,CC from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Xelect A, B, C from Sys.dual" ) + test( "Select A, B, C frox Sys.dual" ) + test( "Select" ) + test( "Select ^^^ frox Sys.dual" ) + test( "Select A, B, C from Sys.dual, Table2 " ) diff --git a/dottes.html.tune b/dottes.html.tune index b802d02..57b45c0 100644 --- a/dottes.html.tune +++ b/dottes.html.tune @@ -64,6 +64,8 @@ href="../@MASTERBOOKE@/@TUNE@.ogg">OGG
  • ABC
  • +
  • XML