detection and binaural

2025-12-23 23:29:39 +01:00 · 2023-03-29 21:20:32 +08:00
parent da29839f31
commit fdada40afa
28 changed files with 8786 additions and 1 deletions
--- a/audio_detection/audio_infer/utils/create_indexes.py
+++ b/audio_detection/audio_infer/utils/create_indexes.py
@@ -0,0 +1,126 @@
+import numpy as np
+import argparse
+import csv
+import os
+import glob
+import datetime
+import time
+import logging
+import h5py
+import librosa
+
+from utilities import create_folder, get_sub_filepaths
+import config
+
+
+def create_indexes(args):
+    """Create indexes a for dataloader to read for training. When users have 
+    a new task and their own data, they need to create similar indexes. The 
+    indexes contain meta information of "where to find the data for training".
+    """
+
+    # Arguments & parameters
+    waveforms_hdf5_path = args.waveforms_hdf5_path
+    indexes_hdf5_path = args.indexes_hdf5_path
+
+    # Paths
+    create_folder(os.path.dirname(indexes_hdf5_path))
+
+    with h5py.File(waveforms_hdf5_path, 'r') as hr:
+        with h5py.File(indexes_hdf5_path, 'w') as hw:
+            audios_num = len(hr['audio_name'])
+            hw.create_dataset('audio_name', data=hr['audio_name'][:], dtype='S20')
+            hw.create_dataset('target', data=hr['target'][:], dtype=np.bool)
+            hw.create_dataset('hdf5_path', data=[waveforms_hdf5_path.encode()] * audios_num, dtype='S200')
+            hw.create_dataset('index_in_hdf5', data=np.arange(audios_num), dtype=np.int32)
+
+    print('Write to {}'.format(indexes_hdf5_path))
+          
+
+def combine_full_indexes(args):
+    """Combine all balanced and unbalanced indexes hdf5s to a single hdf5. This 
+    combined indexes hdf5 is used for training with full data (~20k balanced 
+    audio clips + ~1.9m unbalanced audio clips).
+    """
+
+    # Arguments & parameters
+    indexes_hdf5s_dir = args.indexes_hdf5s_dir
+    full_indexes_hdf5_path = args.full_indexes_hdf5_path
+
+    classes_num = config.classes_num
+
+    # Paths
+    paths = get_sub_filepaths(indexes_hdf5s_dir)
+    paths = [path for path in paths if (
+        'train' in path and 'full_train' not in path and 'mini' not in path)]
+
+    print('Total {} hdf5 to combine.'.format(len(paths)))
+
+    with h5py.File(full_indexes_hdf5_path, 'w') as full_hf:
+        full_hf.create_dataset(
+            name='audio_name', 
+            shape=(0,), 
+            maxshape=(None,), 
+            dtype='S20')
+        
+        full_hf.create_dataset(
+            name='target', 
+            shape=(0, classes_num), 
+            maxshape=(None, classes_num), 
+            dtype=np.bool)
+
+        full_hf.create_dataset(
+            name='hdf5_path', 
+            shape=(0,), 
+            maxshape=(None,), 
+            dtype='S200')
+
+        full_hf.create_dataset(
+            name='index_in_hdf5', 
+            shape=(0,), 
+            maxshape=(None,), 
+            dtype=np.int32)
+
+        for path in paths:
+            with h5py.File(path, 'r') as part_hf:
+                print(path)
+                n = len(full_hf['audio_name'][:])
+                new_n = n + len(part_hf['audio_name'][:])
+
+                full_hf['audio_name'].resize((new_n,))
+                full_hf['audio_name'][n : new_n] = part_hf['audio_name'][:]
+
+                full_hf['target'].resize((new_n, classes_num))
+                full_hf['target'][n : new_n] = part_hf['target'][:]
+
+                full_hf['hdf5_path'].resize((new_n,))
+                full_hf['hdf5_path'][n : new_n] = part_hf['hdf5_path'][:]
+
+                full_hf['index_in_hdf5'].resize((new_n,))
+                full_hf['index_in_hdf5'][n : new_n] = part_hf['index_in_hdf5'][:]
+                
+    print('Write combined full hdf5 to {}'.format(full_indexes_hdf5_path))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest='mode')
+
+    parser_create_indexes = subparsers.add_parser('create_indexes')
+    parser_create_indexes.add_argument('--waveforms_hdf5_path', type=str, required=True, help='Path of packed waveforms hdf5.')
+    parser_create_indexes.add_argument('--indexes_hdf5_path', type=str, required=True, help='Path to write out indexes hdf5.')
+
+    parser_combine_full_indexes = subparsers.add_parser('combine_full_indexes')
+    parser_combine_full_indexes.add_argument('--indexes_hdf5s_dir', type=str, required=True, help='Directory containing indexes hdf5s to be combined.')
+    parser_combine_full_indexes.add_argument('--full_indexes_hdf5_path', type=str, required=True, help='Path to write out full indexes hdf5 file.')
+
+    args = parser.parse_args()
+    
+    if args.mode == 'create_indexes':
+        create_indexes(args)
+
+    elif args.mode == 'combine_full_indexes':
+        combine_full_indexes(args)
+
+    else:
+        raise Exception('Incorrect arguments!')