Source code for pyampact.symbolicUtils

"""
symbolicUtils
=======================

.. autosummary::
   :toctree: generated/

   _escape_cdata
   addMEINote
   addTieBreakers
   kernClefHelper
   combineRests
   combineUnisons
   fromJSON
   _id_gen
   indentMEI
   _kernChordHelper
   kernFooter
   kernHeader
   _kernNoteHelper
   kernNRCHelper
   noteRestHelper
   remove_namespaces
   removeTied
   snapTo
   truncate_and_scale_onsOffsList
   githubURLtoRaw
"""

import json, re, requests
import numpy as np
import pandas as pd
import re as _re
import xml.etree.ElementTree as ET
from fractions import Fraction

__all__ = [
    '_escape_cdata',
    'addMEINote',
    'addTieBreakers',
    'kernClefHelper',
    'combineRests',
    'combineUnisons',
    'fromJSON',
    '_id_gen',
    'idGen',
    'indentMEI',
    '_kernChordHelper',
    'kernFooter',
    'kernHeader',
    '_kernNoteHelper',
    'kernNRCHelper',
    'noteRestHelper',
    'remove_namespaces',
    'removeTied',
    'snapTo',
    'truncate_and_scale_onsOffsList',
    'githubURLtoRaw',
    'duration2MEI',
    '_duration2Kern',
    'meiDeclaration',
    'explode_kern_chords',
]

[docs] def _escape_cdata(text): """ Escape certain characters in a CDATA string for XML serialization. This function checks if the input text is a CDATA string. If it is, the text is returned as is. If it's not, the function escapes the characters "&", "<", and ">" by replacing them with their corresponding XML entities ("&amp;", "&lt;", and "&gt;"). This function is used to overwrite the default escape function in the xml.etree module. The default escape function does not escape characters in CDATA strings, which can cause XML serialization to fail. Parameters: text (str): The input string to be escaped. Returns: str: The escaped string, safe for XML serialization. Raises: TypeError: If the input is not a string. """ try: if text.startswith(" <![CDATA[") and text.endswith("]]> "): return text if "&" in text: text = text.replace("&", "&amp;") if "<" in text: text = text.replace("<", "&lt;") if ">" in text: text = text.replace(">", "&gt;") return text except TypeError: raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
ET._escape_cdata = _escape_cdata _duration2Kern = { # keys get rounded to 5 decimal places 56: '000..', 48: '000.', 32: '000', 28: '00..', 24: '00.', 16: '00', 14: '0..', 12: '0.', 8: '0', 7: '1..', 6: '1.', 4: '1', 3.5: '2..', 3: '2.', 2.66667: '3%2', 2: '2', 1.75: '4..', 1.5: '4.', 1.33333: '3', 1: '4', .875: '8..', .75: '8.', .66667: '6', .5: '8', .4375: '16..', .375: '16.', .33333: '12', .25: '16', .21875: '32..', .1875: '32.', .16667: '24', .125: '32', .10938: '64..', .09375: '64.', .08333: '48', .0625: '64', .05469: '128..', .04688: '128.', .04167: '96', .03125: '128', .02734: '256..', .02344: '256.', .02083: '192', .01563: '256', .01367: '512..', .01172: '512.', .01042: '384', .00781: '512', .00684: '1024.', .00586: '1024.', .00582: '768', .00391: '1024', 0: '', '128th': '128', # grace note durations '64th': '64', '32nd': '32', '16th': '16', 'eighth': '8', 'quarter': '8' # make quarter grace notes default to eighth notes too } duration2MEI = { 'complex': 'complex', # added this 3/5 'maxima': 'maxima', 'longa': 'longa', 'breve': 'breve', 'whole': '1', 'half': '2', 'quarter': '4', 'eighth': '8', '16th': '16', '32nd': '32', '64th': '64', '128th': '128', '256th': '256', '512th': '512', '1024th': '1024' } function_pattern = re.compile('[^TtPpDd]') imported_scores = {} tinyNotation_pattern = re.compile("^[-0-9a-zA-Zn _/'#:~.{}=]+$") volpiano_pattern = re.compile(r'^\d--[a-zA-Z0-9\-\)\?]*$') meiDeclaration = """<?xml version="1.0" encoding="UTF-8"?> <?xml-model href="https://music-encoding.org/schema/dev/mei-all.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?> """
[docs] def addMEINote(note, parent, syl=None): """ Add a note element to an MEI parent element from a music21 note object. This function creates a new 'note' subelement under the given parent element, and sets its attributes based on the properties of the given note. It also handles grace notes, accidentals, lyrics, and dynamics if any of these are found on the note. Parameters: note (music21.note.Note): The note to add. The note's properties (octave, step, id, duration, pitch, lyric, expressions) are used to set the attributes of the new MEI element. parent (xml.etree.ElementTree.Element): The parent element to which the new 'note' element will be added. syl (str, optional): A syllable to add to the note as a 'syl' element. If not provided, the note's lyric property is used. Returns: xml.etree.ElementTree.Element: The new 'note' element. """ note_el = ET.SubElement(parent, 'note', {'oct': f'{note.octave}', 'pname': f'{note.step.lower()}', 'xml:id': f'{note.id}', 'dots': f'{note.duration.dots}'}) if note.duration.isGrace: note_el.set('grace', 'acc') note_el.set('dur', duration2MEI[note.duration.type]) else: note_el.set('dur', duration2MEI[note.duration.type]) alter = note.pitch.alter or 0 if note.pitch.accidental and note.pitch.accidental.displayStatus: if alter > 0: note_el.set('accid', 's'*int(alter)) note_el.set('accid.ges', 's'*int(alter)) elif alter < 0: note_el.set('accid', 'f'*int(-alter)) note_el.set('accid.ges', 'f'*int(-alter)) else: note_el.set('accid', 'n') note_el.set('accid.ges', 'n') else: if alter > 0: note_el.set('accid.ges', 's'*int(alter)) elif alter < 0: note_el.set('accid.ges', 'f'*int(-alter)) else: note_el.set('accid.ges', 'n') if note.lyric: verse_el = ET.SubElement( note_el, 'verse', {'n': '1', 'xml:id': next(idGen)}) syl_el = ET.SubElement(verse_el, 'syl', {'xml:id': next(idGen)}) syl_el.text = note.lyric.strip().split('\n')[0] for exp in note.expressions: if 'Dynamic' in exp.classes: dyn_el = ET.SubElement(note_el, 'dynam', {'xml:id': next(idGen)}) dyn_el.text = exp.value
[docs] def addTieBreakers(partList): """ Add tie-breaker level to index. Changes parts in partList in place and returns None. :param partList: A list of pandas Series, each representing a part in the score. :return: None """ for part in partList: if isinstance(part.index, pd.MultiIndex): continue tieBreakers = [] nexts = part.index.to_series().shift(-1) for ii in range(-1, -1 - len(part.index), -1): if part.index[ii] == nexts.iat[ii]: tieBreakers.append(tieBreakers[-1] - 1) else: tieBreakers.append(0) tieBreakers.reverse() part.index = pd.MultiIndex.from_arrays((part.index, tieBreakers))
[docs] def kernClefHelper(clef): """ Parse a music21 clef object into the corresponding humdrum syntax token. :param clef: A music21 clef object. :return: A string representing the humdrum syntax token for the clef. """ octaveChange = '' if clef.octaveChange > 0: octaveChange = '^' * clef.octaveChange elif clef.octaveChange < 0: octaveChange = 'v' * abs(clef.octaveChange) return f'*clef{clef.sign}{octaveChange}{clef.line}'
[docs] def combineRests(col): """ Helper function for the `notes` method. Combine consecutive rests in a given voice. Non-first consecutive rests will be removed. :param col: A pandas Series representing a voice. :return: The same pandas Series with consecutive rests combined. """ col = col.dropna() return col[(col != 'r') | ((col == 'r') & (col.shift(1) != 'r'))]
[docs] def combineUnisons(col): """ Helper function for the `notes` method. Combine consecutive unisons in a given voice. Non-first consecutive unisons will be removed. :param col: A pandas Series representing a voice. :return: The same pandas Series with consecutive unisons combined. """ col = col.dropna() return col[(col == 'r') | (col != col.shift(1))]
[docs] def githubURLtoRaw(string): """ Convert a GitHub URL to a raw URL and return it. Otherwise return the string. """ if string.startswith('https://github.com/'): return 'https://raw.githubusercontent.com/' + string[19:].replace('/blob/', '/', 1) return string
[docs] def fromJSON(json_path): """ Load a JSON or dez file/url into a pandas DataFrame. The outermost keys of the JSON object are interpreted as the index values of the DataFrame and should be in seconds with decimal places allowed. The second-level keys become the columns of the DataFrame. :param json_path: Path to a JSON or dez file. :return: A pandas DataFrame representing the JSON data. See Also -------- :meth:`jsonCDATA` :meth:`nmats` """ if json_path.startswith('https://') or json_path.startswith('http://'): json_path = githubURLtoRaw(json_path) response = requests.get(json_path) data = json.loads(response.text) else: with open(json_path) as json_data: data = json.load(json_data) if ((isinstance(json_path, str) and json_path.lower().endswith('.dez')) or (hasattr(json_path, 'name') and json_path.name.lower().endswith('.dez'))): df = pd.DataFrame.from_records(data['labels']) if 'start' in df.columns: df['start'] = df['start'].fillna(0.0) else: # .json file df = pd.DataFrame(data).T df.index = df.index.astype(str) return df
[docs] def _id_gen(start=1): """ Generate a unique ID for each instance of the Score class. The ID is in the format 'pyAMPACT-{start}'. This isn't meant to be used directly so see the example below for usage. :param start: An integer representing the starting value for the ID counter. Default is 1. :yield: A string representing the unique ID. See Also -------- :meth:`insertAudioAnalysis` :meth:`xmlIDs` Example -------- .. code-block:: python newID = next(idGen) """ while True: yield f'pyAMPACT-{start}' start += 1
idGen = _id_gen()
[docs] def indentMEI(elem, indentation='\t', _level=0): """ Indent an MEI (Music Encoding Initiative) XML element and its children. This function recursively indents an XML element and its children for pretty printing. The indentation level is increased for each level of depth in the XML tree. Parameters: elem (xml.etree.ElementTree.Element): The XML element to indent. indentation (str, optional): The indentation string to use. Default is a tab character. Use a ' ' (space) for maximally compact output. _level (int, optional): The initial indentation level. This parameter is used internally in recursive calls but should not be set by the user. Returns: None. The function modifies the XML element in place. """ i = f'\n{_level*indentation}' if len(elem): if not elem.text or not elem.text.strip(): elem.text = f'{i}{indentation}' if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: indentMEI(elem, indentation, _level+1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if _level and (not elem.tail or not elem.tail.strip()): elem.tail = i
[docs] def _kernChordHelper(_chord): """ Parse a music21 chord object into a kern chord token. This method uses the `_kernNoteHelper` method to convert each note in the chord into a kern note token. The tokens are then joined together with spaces to form the kern chord token. :param _chord: A music21 chord object to be converted into a kern chord token. :return: A string representing the kern chord token. """ return ' '.join([_kernNoteHelper(note) for note in _chord.notes])
[docs] def kernFooter(fileExtension): """ Return a string of the kern format footer global comments. The footer includes the translation date and other relevant metadata. :return: A string representing the kern format footer. """ from datetime import datetime return f"""!!!RDF**kern: %=rational rhythm !!!RDF**kern: l=long note in original notation !!!RDF**kern: i=editorial accidental !!!ONB: Translated from a {fileExtension} file on {datetime.today().strftime("%Y-%m-%d")} via pyAMPACT !!!title: @{{OTL}}"""
[docs] def kernHeader(metadata): """ Return a string of the kern format header global comments. The header includes the composer and title metadata. :return: A string representing the kern format header. """ return f'!!!COM: {metadata["composer"]}\n!!!OTL: {metadata["title"]}'
[docs] def _kernNoteHelper(_note): """ Parse a music21 note object into a kern note token. This method handles the conversion of various musical notations such as ties, slurs, beams, durations, octaves, accidentals, longas, and grace notes into the kern format. :param _note: A music21 note object to be converted into a kern note token. :return: A string representing the kern note token. """ # TODO: this doesn't seem to be detecting longas in scores. Does m21 just not detect longas in kern files? Test with mei, midi, and xml startBracket, endBracket, beaming = '', '', '' if hasattr(_note, 'tie') and _note.tie is not None: if _note.tie.type == 'start': startBracket += '[' elif _note.tie.type == 'continue': endBracket += '_' elif _note.tie.type == 'stop': endBracket += ']' spanners = _note.getSpannerSites() for spanner in spanners: if 'Slur' in spanner.classes: if spanner.isFirst(_note): startBracket = '(' + startBracket elif spanner.isLast(_note): endBracket += ')' beams = _note.beams.beamsList for beam in beams: if beam.type == 'start': beaming += 'L' elif beam.type == 'stop': beaming += 'J' _oct = _note.octave if _oct > 3: letter = _note.step.lower() * (_oct - 3) else: letter = _note.step * (4 - _oct) acc = _note.pitch.accidental acc = acc.modifier if acc is not None else '' longa = 'l' if _note.duration.type == 'longa' else '' if _note.duration.isGrace: dur = _duration2Kern.get(_note.duration.type, '') grace = 'q' if _note.duration.slash else 'qq' else: grace = '' dur = _duration2Kern[round(float(_note.quarterLength), 5)] # TODO: make this sensitive to notehead and practical duration grace = 'q' if _note.duration.isGrace else '' fermata = '' for exp in _note.expressions: if exp.name == 'fermata': fermata = ';' return f'{startBracket}{dur}{letter}{acc}{longa}{grace}{fermata}{beaming}{endBracket}'
[docs] def kernNRCHelper(nrc): """ Convert a music21 note, rest, or chord object to its corresponding kern token. This method uses the `_kernNoteHelper` and `_kernChordHelper` methods to convert note and chord objects, respectively. Rest objects are converted directly in this method. :param nrc: A music21 note, rest, or chord object to be converted into a kern token. :return: A string representing the kern token. """ if nrc.isNote: return _kernNoteHelper(nrc) elif nrc.isRest: return f'{_duration2Kern.get(round(float(nrc.quarterLength), 5))}r' else: return ' '.join([_kernNoteHelper(note) for note in nrc.notes])
[docs] def noteRestHelper(nr): """ Helper function for the `notes` method. If the note/rest object `nr` is a rest, return 'r'. Otherwise, return the note's name with octave. :param nr: A note/rest object. :return: 'r' if `nr` is a rest, otherwise the note's name with octave. """ if nr.isRest: return 'r' return nr.nameWithOctave
[docs] def remove_namespaces(doc): """ Indent an MEI (Music Encoding Initiative) element for better readability. This function recursively indents an MEI element and its children, improving the readability of the MEI XML structure. It modifies the input element in-place. :param elem: An xml.etree.ElementTree.Element representing the MEI element. :param level: An integer representing the current indentation level. Default is 0. :return: None """ root = doc.getroot() namespace = '' if '}' in root.tag: namespace = root.tag[1:root.tag.index('}')] for elem in doc.iter(): if '}' in elem.tag: elem.tag = elem.tag[elem.tag.index('}') + 1:] if namespace: root.set('xmlns', namespace)
[docs] def removeTied(noteOrRest): """ Helper function for the `_m21ObjectsNoTies` method. Remove tied notes in a given note or rest. Only the first note in a tied group will be kept. :param noteOrRest: A music21 note or rest object. :return: np.nan if the note is tied and not the first in the group, otherwise the original note or rest. """ if hasattr(noteOrRest, 'tie') and noteOrRest.tie is not None and noteOrRest.tie.type != 'start': return np.nan return noteOrRest
[docs] def snapTo(data, snap_to=None, filler='forward', output='array'): """" Takes a `harm`, `keys`, `functions`, `chords`, or `cdata` as `data` and the `snap_to` and `filler` parameters as described in the former three's doc strings. The passed data is returned in the shape of the snap_to dataframe's columns, and any filling operations are applied. The output will be in the form of a 1D numpy array unless `output` is changed, in which case a series will be returned for harm, keys, functions, and chords data, and a dataframe for cdata data. :param data: Can be `harm`, `keys`, `functions`, `chords`, or `cdata`. :param snap_to: Described in the docstrings of `harm`, `keys`, and `functions`. :param filler: Described in the docstrings of `harm`, `keys`, and `functions`. :param output: If changed, a series will be returned for `harm`, `keys`, `functions`, and `chords` data, and a dataframe for `cdata` data. Default is a 1D numpy array. :return: The passed data in the shape of the `snap_to` dataframe's columns with any filling operations applied. """ if isinstance(data, list): if len(data) == 1: _data = data[0].copy() else: _data = pd.concat(data, axis=1) else: _data = data.copy() if snap_to is not None: if not _data.index.is_unique: _data = _data[~_data.index.duplicated(keep='last')] _data = _data.reindex(snap_to.columns) if filler != '.': _data.replace('.', np.nan, inplace=True) if isinstance(filler, str): filler = filler.lower() if filler == 'forward': _data = _data.infer_objects(copy=False).ffill() else: if filler in ('nan', 'drop'): _data.fillna(np.nan, inplace=True) else: _data.fillna(filler, inplace=True) if filler == 'drop': _data.dropna(inplace=True) if output == 'array': return _data.values else: return _data
[docs] def truncate_and_scale_onsOffsList(onsOffsList, target_length): """ Scale and truncate onsOffsList to match the target_length, adjusting values proportionally. Parameters: onsOffsList (list of lists): List containing [ONSET_SEC, OFFSET_SEC] pairs. target_length (int): Desired length of the output list. Returns: list of lists: Scaled and truncated onsOffsList. """ current_length = len(onsOffsList) if current_length == target_length: return onsOffsList elif current_length > target_length: # Scale down scale_factor = current_length / target_length truncated_list = [] for i in range(target_length): start_index = int(i * scale_factor) end_index = int((i + 1) * scale_factor) # Calculate mean values for the range start_time = np.mean( [onsOffsList[start_index][0], onsOffsList[end_index - 1][0]]) end_time = np.mean( [onsOffsList[start_index][1], onsOffsList[end_index - 1][1]]) truncated_list.append([start_time, end_time]) return truncated_list else: # Scale up by interpolation scaling_factor = target_length / current_length new_onsOffsList = [] for i in range(target_length): original_index = i / scaling_factor lower_index = int(np.floor(original_index)) upper_index = int(np.ceil(original_index)) if lower_index == upper_index: new_onsOffsList.append(onsOffsList[lower_index]) else: upper_index = upper_index - 1 lower_value = onsOffsList[lower_index] upper_value = onsOffsList[upper_index] fraction = original_index - lower_index new_onsOffsList.append([ lower_value[0] + fraction * (upper_value[0] - lower_value[0]), lower_value[1] + fraction * (upper_value[1] - lower_value[1]) ]) return new_onsOffsList
[docs] def explode_kern_chords(krn_text): """ Given Humdrum **kern file text, return (new_text, spine_groups) where: - new_text: every kern spine with chord tokens split into monophonic spines - spine_groups: list of lists mapping each original spine index to its output column indices, e.g. [[0],[1],[2,3,4],[5]] for a file with **function **harm **kern(3-voice) **kern(1-voice). Non-kern spines pass through unchanged. Returns (original_text, 1-to-1 groups) if all kern spines are monophonic. Files with spine-split (*^) or spine-merge (*v) operators are returned unchanged — music21 handles those natively and exploding would corrupt them. """ import re as _re lines = krn_text.splitlines() # If file uses spine splits/merges, return as-is with 1:1 groups for line in lines: if line.startswith('!') or line.startswith('**') or not line.startswith('*'): continue if any(cell in ('*^', '*v') for cell in line.split('\t')): spine_types = [] for ln in lines: if ln.startswith('**'): spine_types = ln.split('\t') break sg = [[i] for i in range(len(spine_types))] return krn_text, sg spine_types = [] kern_indices = [] max_voices = {} for line in lines: if line.startswith('!!!'): continue if line.startswith('**'): spine_types = line.split('\t') for i, t in enumerate(spine_types): if t == '**kern': kern_indices.append(i) max_voices[i] = 1 continue if not spine_types or line.startswith(('*', '=', '!')): continue cols = line.split('\t') for ki in kern_indices: if ki < len(cols): tok = cols[ki].strip() if tok and tok != '.': n = len(tok.split(' ')) if n > max_voices[ki]: max_voices[ki] = n # Build output column map (needed even for no-op to return spine_groups) out_col_map = {} out_types = [] out_idx = 0 for i, st in enumerate(spine_types): if st == '**kern': voices = max_voices.get(i, 1) out_col_map[i] = list(range(out_idx, out_idx + voices)) out_types.extend(['**kern'] * voices) out_idx += voices else: out_col_map[i] = [out_idx] out_types.append(st) out_idx += 1 spine_groups = [out_col_map[i] for i in range(len(spine_types))] # Fast exit if nothing to explode if not max_voices or all(v == 1 for v in max_voices.values()): return krn_text, spine_groups total = out_idx out_lines = [] for line in lines: if line.startswith('!!!'): out_lines.append(line); continue if line.startswith('**'): out_lines.append('\t'.join(out_types)); continue if line.startswith('*-'): out_lines.append('\t'.join(['*-'] * total)); continue if line.startswith(('*', '=', '!')): cols = line.split('\t') row = [] for i, tok in enumerate(cols): for _ in out_col_map.get(i, [i]): row.append(tok) out_lines.append('\t'.join(row)); continue cols = line.split('\t') row = ['.'] * total for i, tok in enumerate(cols): if i not in out_col_map: continue positions = out_col_map[i] tok = tok.strip() if i in kern_indices: if tok and tok != '.': notes = tok.split(' ') for j, pos in enumerate(positions): if j < len(notes): row[pos] = notes[j] else: dur = _re.search(r'(\d+\.?)', notes[0]) row[pos] = (dur.group(1) if dur else '4') + 'r' else: row[positions[0]] = tok out_lines.append('\t'.join(row)) return '\n'.join(out_lines) + '\n', spine_groups