Source code for pyampact.dataCompilation

"""
dataCompilation
===============




.. autosummary::
    :toctree: generated/
    
    data_compilation
"""

import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd

from matplotlib.patches import Patch
from pyampact.performance import estimate_perceptual_parameters
from pyampact.alignmentUtils import f0_est_weighted_sum_spec
from pyampact.symbolic import Score

__all__ = [
    "data_compilation",
    "export_selected_columns",
    "visualise_alignment_from_nmat"                                                 
]


[docs] def data_compilation(y, original_sr, hop_length, winms, tsr, spec, nmat, piece, audio_file_path, output_path="output"): """ This function takes the results of the alignment and the note matrix and compiles the data into a JSON object that can be used to insert the audio analysis into the score. Parameters ---------- nmat : np.ndarray The note matrix containing information about notes, including their timing and duration. hop_length : int Hop length spec : np.ndarray Spec (check type and fill out) audio_file : str The path to the audio file associated with the performance data. piece : Score An instantiation of the original Score object containing the data input for the musical piece. output_path : str, optional The file path for the output MEI file. Defaults to './output'. Returns ------- nmat : The note matrix with performance data appended. json_data : A JSON object containing the compiled data. xml_data : XML data representing the MEI output. """ all_note_vals = [] all_note_ids = [] fft_len = int(2**np.round(np.log(winms/1000*tsr)/np.log(2))) for key, df in nmat.items(): df = df.drop(columns=['MEASURE', 'ONSET', 'DURATION', 'PART', 'MIDI']) midiList = np.array(nmat[key]['MIDI']) note_vals = [] note_ids = [] is_monophonic = len(nmat) == 1 # Only one part = monophonic or speech for i in range(len(df)): onset = df['ONSET_SEC'].iloc[i] offset = df['OFFSET_SEC'].iloc[i] midi = midiList[i] if is_monophonic: try: # Slice audio y_seg = y[int(onset * original_sr):int(offset * original_sr)] # Estimate f0 and RMS f0, voiced_flag, voiced_probs = librosa.pyin( y_seg, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=original_sr ) pwr = librosa.feature.rms(y=y_seg).flatten() # time and frequency axes t = librosa.times_like(pwr, sr=original_sr) xf = librosa.fft_frequencies(sr=original_sr, n_fft=fft_len) M = spec # Estimate parameters note_vals.append(estimate_perceptual_parameters( f0, pwr, M, original_sr, 256, 1 )) except Exception as e: note_vals.append({ 'f0_vals': np.nan, 'ppitch': (np.nan, np.nan), 'jitter': np.nan, 'vibrato_depth': np.nan, 'vibrato_rate': np.nan, 'pwr_vals': np.nan, 'shimmer': np.nan, 'spec_centroid': np.nan, 'spec_bandwidth': np.nan, 'spec_contrast': np.nan, 'spec_flatness': np.nan, 'spec_rolloff': np.nan }) else: freqs, times, D = librosa.reassigned_spectrogram( y=y, sr=original_sr, hop_length=hop_length) f0, pwr, t, M_seg, xf = f0_est_weighted_sum_spec( onset, offset, midi, freqs, D, original_sr ) note_vals.append(estimate_perceptual_parameters( f0, pwr, M_seg, original_sr, 256, 1 )) note_ids.append(df.index[i]) all_note_vals.append(note_vals) all_note_ids.append(note_ids) loc = 0 # This stays for post-processing for key, df in nmat.items(): df['f0Vals'] = [all_note_vals[loc][i]['f0_vals'] for i in range(len(df))] df['meanf0'] = [np.mean(vals) for vals in df['f0Vals']] df['ppitch1'] = [all_note_vals[loc][i]['ppitch'][0] for i in range(len(df))] df['ppitch2'] = [all_note_vals[loc][i]['ppitch'][1] for i in range(len(df))] df['jitter'] = [all_note_vals[loc][i]['jitter'] for i in range(len(df))] df['vibratoDepth'] = [all_note_vals[loc][i]['vibrato_depth'] for i in range(len(df))] df['vibratoRate'] = [all_note_vals[loc][i]['vibrato_rate'] for i in range(len(df))] df['pwrVals'] = [all_note_vals[loc][i]['pwr_vals'] for i in range(len(df))] df['meanPwr'] = [np.mean(vals) for vals in df['pwrVals']] df['shimmer'] = [all_note_vals[loc][i]['shimmer'] for i in range(len(df))] df['specCentVals'] = [all_note_vals[loc][i]['spec_centroid'] for i in range(len(df))] df['meanSpecCent'] = [np.mean(vals) for vals in df['specCentVals']] df['specBandwidthVals'] = [all_note_vals[loc][i]['spec_bandwidth'] for i in range(len(df))] df['meanSpecBandwidth'] = [np.mean(vals) for vals in df['specBandwidthVals']] df['specContrastVals'] = [all_note_vals[loc][i]['spec_contrast'] for i in range(len(df))] df['meanSpecContrast'] = [np.mean(vals) for vals in df['specContrastVals']] df['specFlatnessVals'] = [all_note_vals[loc][i]['spec_flatness'] for i in range(len(df))] df['meanSpecFlatness'] = [np.mean(vals) for vals in df['specFlatnessVals']] df['specRolloffVals'] = [all_note_vals[loc][i]['spec_rolloff'] for i in range(len(df))] df['meanSpecRolloff'] = [np.mean(vals) for vals in df['specRolloffVals']] loc += 1 def convert_nmat_for_export(nmat): """ Converts columns with list values into stringified versions for export (e.g., MEI, CSV, JSON). Parameters ---------- nmat : dict of DataFrames The processed note matrix with internal Python lists. Returns ------- export_nmat : dict of DataFrames A new dictionary where list-valued columns are stringified. """ list_columns = [ 'f0Vals', 'pwrVals', 'specCentVals', 'specBandwidthVals', 'specContrastVals', 'specFlatnessVals', 'specRolloffVals' ] export_nmat = {} for part, df in nmat.items(): df_copy = df.copy() for col in list_columns: if col in df_copy.columns: df_copy[col] = df_copy[col].apply(lambda x: str(x)) export_nmat[part] = df_copy return export_nmat nmat_export = convert_nmat_for_export(nmat) if getattr(piece, 'fileExtension') != 'csv': fileOutput = piece.insertAudioAnalysis( output_path=f"{output_path}.mei", data=nmat_export, mimetype='audio/aiff', target=audio_file_path) print(f"Saved: {output_path}.mei") else: # Save each part of nmat to a CSV file for part, df in nmat_export.items(): # Construct the filename using the filepath, label, and part filename = f"{output_path}.csv" # Save the DataFrame to CSV fileOutput = df.to_csv(filename) print(f"Saved {filename}") return nmat, fileOutput
[docs] def export_selected_columns(nmat, columns, output_path="./output_selected_data.csv"): """ Export specified columns from a dictionary of DataFrames, a single DataFrame, or a NumPy array of DataFrames to a CSV. Missing columns are added as NaN. Parameters ---------- nmat : dict of DataFrames The dictionary of nmat with all relevant onset, offset, duration, and pitch information, columns : array of strings List of column names and order in which they are exported output_path : str, optional The file path for the output CSV file. Defaults to './output_selected_data'. Returns ------- plt : png file A PNG image of the spectrogram, overlay, and information """ save_dir = "./" os.makedirs(save_dir, exist_ok=True) selected = [] if isinstance(nmat, dict): dfs = nmat.values() elif isinstance(nmat, pd.DataFrame): dfs = [nmat] elif isinstance(nmat, np.ndarray) and all(isinstance(x, pd.DataFrame) for x in nmat): dfs = list(nmat) else: raise TypeError(f"Unsupported type for nmat: {type(nmat)}") for df in dfs: df = df.copy() for col in columns: if col not in df.columns: df[col] = pd.NA selected.append(df[columns]) if not selected: print("No data to export.") return combined = pd.concat(selected, ignore_index=True) combined.to_csv(output_path, index=False) print(f"Exported selected columns to: {output_path}")
# Internal utility function, no documentation # Visualization component def midi_to_freq(midi): return 440.0 * (2 ** ((midi - 69) / 12)) # Enharmonic flat conversion enharmonic_map = { "A#": "Bb", "C#": "Db", "D#": "Eb", "F#": "Gb", "G#": "Ab" } # Internal utility function, no documentation def midi_to_flat_note(midi): note = librosa.midi_to_note(midi, unicode=False) for sharp, flat in enharmonic_map.items(): if sharp in note: return note.replace(sharp, flat) return note # Change output_path to output_path like dataCompilation has
[docs] def visualise_alignment_from_nmat(nmat_dict, y, sr, output_path="output_visual"): """ Creates a spectrogram with pitch, voice, and duration information overlaid. Parameters ---------- nmat_dict : dict of DataFrames The dictionary of nmat with all relevant onset, offset, duration, and pitch information, y : ndarray Audio time series of the file. sr : int Original sample rate of the audio file. output_path : str, optional The file path for the output MEI file. Defaults to './output_visual'. Returns ------- plt : png file A PNG image of the spectrogram, overlay, and information """ S = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) plt.figure(figsize=(12, 6)) librosa.display.specshow(S, sr=sr, x_axis='time', y_axis='log', cmap='gray_r') plt.title(f"Spectrogram + Notes: {output_path}") # Generate consistent color palette base_colors = ['red', 'blue', 'green', 'orange', 'purple', 'cyan', 'magenta', 'yellow'] color_cycle = (base_colors * ((len(nmat_dict) // len(base_colors)) + 1))[:len(nmat_dict)] part_name_map = {} legend_elements = [] for idx, (original_name, notes) in enumerate(nmat_dict.items()): if notes.empty: continue new_name = f"Part-{idx + 1}" part_name_map[original_name] = new_name current_color = color_cycle[idx] legend_elements.append(Patch(facecolor=current_color, label=new_name)) for _, row in notes.iterrows(): if not all(k in row for k in ("MIDI", "ONSET_SEC", "OFFSET_SEC")): continue midi = row["MIDI"] start = row["ONSET_SEC"] end = row["OFFSET_SEC"] freq = midi_to_freq(midi) plt.fill_between([start, end], freq - 15, freq + 15, color=current_color, alpha=0.4) note_name = midi_to_flat_note(midi) plt.text((start + end) / 2, freq + 18, note_name, ha='center', va='bottom', fontsize=8, color=current_color) plt.ylim(20, 2000) plt.colorbar(format='%+2.0f dB') plt.legend(handles=legend_elements, loc='upper right') save_path = os.path.join(f"{output_path}.png") plt.savefig(save_path, dpi=300) plt.close() print(f"Saved: {save_path}")