#!/usr/bin/env python3 # analyze_audio.py import sys, os, json, tempfile, shutil, subprocess import numpy as np import librosa, soundfile as sf def run_spleeter(infile, stems=2, out_dir=None): # Use spleeter CLI via subprocess for reliability if out_dir is None: out_dir = tempfile.mkdtemp(prefix="spleeter_") # e.g. "spleeter separate -p spleeter:2stems -o out infile" preset = "spleeter:2stems" if stems == 2 else "spleeter:4stems" cmd = ["spleeter", "separate", "-p", preset, "-o", out_dir, infile] subprocess.check_call(cmd) # Spleeter creates folder with basename base = os.path.splitext(os.path.basename(infile))[0] folder = os.path.join(out_dir, base) files = [] for root, _, filenames in os.walk(folder): for f in filenames: if f.lower().endswith(('.wav','.mp3','.flac','.ogg')): files.append(os.path.join(root,f)) return folder, files # YAMNet model helper def load_yamnet(): import tensorflow as tf import tensorflow_hub as hub model = hub.load('https://tfhub.dev/google/yamnet/1') # load classmap labels_path = tf.keras.utils.get_file('yamnet_class_map.csv', 'https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv') class_names = [] with open(labels_path, 'r') as fh: for line in fh: parts = line.strip().split(',') class_names.append(parts[2] if len(parts)>=3 else parts[0]) return model, class_names def run_yamnet_analysis(infile): import tensorflow as tf model, class_names = load_yamnet() # load audio (16k) required wav, sr = librosa.load(infile, sr=16000, mono=True) # model expects float32 waveform scores, embeddings, spectrogram = model(wav) scores = scores.numpy() # shape (frames, classes) mean_scores = np.mean(scores, axis=0) # get top classes top_idx = np.argsort(mean_scores)[::-1][:10] top = [(class_names[i], float(mean_scores[i])) for i in top_idx] return top, mean_scores, class_names def aggregate_instruments(class_names, mean_scores): mapping = { 'Piano': ['Piano','electric piano'], 'Guitar': ['Acoustic guitar','Electric guitar','Guitar'], 'Drums': ['Drum kit','Drums','Snare drum','Kick drum'], 'Bass': ['Bass','Electric bass'], 'Voice (singing)': ['Male singing','Female singing','Child singing','Singing'], 'Brass': ['Trumpet','Trombone','Brass'], # add more mapping heuristics as needed } out = {} for key, keywords in mapping.items(): score = 0.0 for i,name in enumerate(class_names): lname = name.lower() for kw in keywords: if kw.lower() in lname: score += float(mean_scores[i]) out[key] = score return out def detect_beats(infile): y, sr = librosa.load(infile, sr=None, mono=True) # onset and tempo/beat tempo, beats = librosa.beat.beat_track(y=y, sr=sr) beat_times = librosa.frames_to_time(beats, sr=sr).tolist() return float(tempo), beat_times def main(): if len(sys.argv) < 2: print("Usage: analyze_audio.py [stems]") sys.exit(2) infile = sys.argv[1] stems = int(sys.argv[2]) if len(sys.argv) > 2 else 2 result = {} result['input_file'] = infile # 1) run spleeter (catch errors) try: stems_folder, stems_files = run_spleeter(infile, stems=stems) except Exception as e: # If spleeter not available, continue without stems stems_folder, stems_files = None, [] result['spleeter_error'] = str(e) # 2) tempo/beats try: tempo, beats = detect_beats(infile) except Exception as e: tempo, beats = None, [] result['beat_error'] = str(e) result['tempo'] = tempo result['beats'] = beats # 3) YAMNet analysis try: top, mean_scores, class_names = run_yamnet_analysis(infile) except Exception as e: top, mean_scores, class_names = [], None, [] result['yamnet_error'] = str(e) result['top_classes'] = [t[0] for t in top] # aggregate likely instruments if class_names and mean_scores is not None: instruments = aggregate_instruments(class_names, mean_scores) else: instruments = {} # Voice presence heuristic (if aggregate voice score > threshold) voice_score = instruments.get('Voice (singing)', 0.0) result['voice_presence'] = bool(voice_score > 0.01) # threshold adjustable result['instruments'] = instruments result['stems_folder'] = stems_folder or "" result['stems_files'] = stems_files # print final JSON as last line print(json.dumps(result)) sys.exit(0) if __name__ == '__main__': main()