1
|
#!/usr/bin/python2
|
2
|
|
3
|
import argparse
|
4
|
import wave
|
5
|
import struct
|
6
|
import subprocess
|
7
|
import os
|
8
|
import os.path
|
9
|
import shutil
|
10
|
|
11
|
import numpy as np
|
12
|
import matplotlib.pyplot as plt
|
13
|
|
14
|
|
15
|
# ----------
|
16
|
|
17
|
|
18
|
frame_shift_sec = 0.005
|
19
|
frame_len_sec = 0.025
|
20
|
|
21
|
fft_len = 512
|
22
|
sp_len = fft_len/2 + 1
|
23
|
|
24
|
band1_beg_hz = 350
|
25
|
band1_end_hz = 1100
|
26
|
|
27
|
band2_beg_hz = 2300
|
28
|
band2_end_hz = 5500
|
29
|
|
30
|
|
31
|
# ----------
|
32
|
|
33
|
|
34
|
def read_data( file_name ):
|
35
|
f = open( file_name, "rb" )
|
36
|
data = f.read()
|
37
|
f.close()
|
38
|
return data
|
39
|
|
40
|
|
41
|
# ----------
|
42
|
|
43
|
|
44
|
def write_data( file_name, data ):
|
45
|
f = open( file_name, "wb" )
|
46
|
f.write( data )
|
47
|
f.close()
|
48
|
|
49
|
|
50
|
# ----------
|
51
|
|
52
|
|
53
|
def get_files( base_dir, file_ext ):
|
54
|
|
55
|
return [ fileName[:-1-len( file_ext )] for fileName in os.listdir( base_dir ) if fileName.endswith( file_ext ) and not fileName.startswith( "." ) ]
|
56
|
|
57
|
|
58
|
# ----------
|
59
|
|
60
|
|
61
|
def main():
|
62
|
|
63
|
sptk_dir = "/home/zhanzlic/tools/SPTK/bin"
|
64
|
|
65
|
ext_audio = "wav"
|
66
|
ext_slope = "slope.asf"
|
67
|
|
68
|
parser = argparse.ArgumentParser( description="Test reand and write MMF file." )
|
69
|
|
70
|
parser.add_argument( type=str, metavar="DIR_SRC", dest="dir_src", help="directory with source audio files" )
|
71
|
parser.add_argument( type=str, metavar="DIR_OUT", dest="dir_out", help="output directory for new audio files" )
|
72
|
|
73
|
args = parser.parse_args()
|
74
|
|
75
|
if not os.path.exists( args.dir_out ):
|
76
|
os.makedirs( args.dir_out )
|
77
|
|
78
|
temp_dir = args.dir_out + "/tmp"
|
79
|
|
80
|
if not os.path.exists( temp_dir ):
|
81
|
os.makedirs( temp_dir )
|
82
|
|
83
|
files = get_files( args.dir_src, ext_audio )
|
84
|
nfiles = len( files )
|
85
|
|
86
|
|
87
|
for fileName in sorted( files ):
|
88
|
|
89
|
file_wav = "{}/{}.{}".format( args.dir_src, fileName, ext_audio )
|
90
|
file_samples = "{}/samples".format( temp_dir )
|
91
|
file_frames = "{}/frames".format( temp_dir )
|
92
|
file_sp = "{}/sp".format( temp_dir )
|
93
|
file_slope = "{}/{}.{}".format( args.dir_out, fileName, ext_slope )
|
94
|
|
95
|
if os.path.exists( file_slope ):
|
96
|
continue
|
97
|
|
98
|
print "Processing", fileName, "..."
|
99
|
|
100
|
# ----- wave to samples
|
101
|
|
102
|
w = wave.open( file_wav )
|
103
|
freq_samp = w.getframerate()
|
104
|
frame_num = w.getnframes()
|
105
|
samp_width = w.getsampwidth()
|
106
|
|
107
|
if samp_width == 1:
|
108
|
samples = struct.unpack( "b"*frame_num, w.readframes( frame_num ) )
|
109
|
elif samp_width == 2:
|
110
|
samples = struct.unpack( "h"*frame_num, w.readframes( frame_num ) )
|
111
|
|
112
|
w.close()
|
113
|
|
114
|
frame_shift = frame_shift_sec * freq_samp
|
115
|
frame_len = frame_len_sec * freq_samp
|
116
|
|
117
|
write_data( file_samples, struct.pack( "f"*frame_num, *samples ) )
|
118
|
|
119
|
# ----- samples to frames
|
120
|
|
121
|
os.system( "{sptk_dir}/frame -l {len} -p {shift} {samples} | {sptk_dir}/window -l {len} -L {fft_len} -w 2 -n 1 > {frames}".format(
|
122
|
sptk_dir=sptk_dir, len=frame_len, shift=frame_shift, fft_len=fft_len, samples=file_samples, frames=file_frames ) )
|
123
|
|
124
|
# ----- frames to spectra
|
125
|
|
126
|
os.system( "{}/fftr -l {} -A -H {} > {}".format( sptk_dir, fft_len, file_frames, file_sp ) )
|
127
|
|
128
|
data = read_data( file_sp )
|
129
|
data_len = len( data )/4
|
130
|
sp = struct.unpack( "f"*data_len, data )
|
131
|
sp_num = data_len / sp_len
|
132
|
|
133
|
times = [ frame_shift_sec*x for x in range( sp_num ) ]
|
134
|
|
135
|
# ----- spectra to slope
|
136
|
|
137
|
band1_beg = band1_beg_hz * sp_len / ( freq_samp / 2 )
|
138
|
band1_end = band1_end_hz * sp_len / ( freq_samp / 2 )
|
139
|
|
140
|
band2_beg = band2_beg_hz * sp_len / ( freq_samp / 2 )
|
141
|
band2_end = band2_end_hz * sp_len / ( freq_samp / 2 )
|
142
|
|
143
|
width = ( band2_end_hz + band2_beg_hz ) / 2 - ( band1_end_hz + band1_beg_hz ) / 2
|
144
|
|
145
|
data = read_data( file_sp )
|
146
|
data_len = len( data )/4
|
147
|
sp = struct.unpack( "f"*data_len, data )
|
148
|
sp_num = data_len / sp_len
|
149
|
|
150
|
slopes = [ 0.0 ] * sp_num
|
151
|
band1 = [ 0.0 ] * sp_num
|
152
|
band2 = [ 0.0 ] * sp_num
|
153
|
|
154
|
for idx in range( sp_num ):
|
155
|
spec = sp[ idx*sp_len : (idx+1)*sp_len ]
|
156
|
|
157
|
band1[ idx ] = np.mean( spec[ band1_beg:band1_end ] )
|
158
|
band2[ idx ] = np.mean( spec[ band2_beg:band2_end ] )
|
159
|
|
160
|
slopes[ idx ] = ( band2[ idx ] - band1[ idx ] ) / width
|
161
|
|
162
|
f = open( file_slope, "wt" )
|
163
|
|
164
|
f.write( "#!ASF!#\n" )
|
165
|
f.write( "\n" )
|
166
|
f.write( "# band_1_avg, band_2_avg ... average amplitudes inside frequency bands\n" )
|
167
|
f.write( "# width ... distance between centra of band 1 and band2 [Hz]\n" )
|
168
|
f.write( "# slope = ( band_2_avg - band_1_avg ) / width \n" )
|
169
|
f.write( "\n" )
|
170
|
f.write( "<band_1_begin = {}>\n".format( band1_beg_hz ) )
|
171
|
f.write( "<band_1_end = {}>\n".format( band1_end_hz ) )
|
172
|
f.write( "<band_2_begin = {}>\n".format( band2_beg_hz ) )
|
173
|
f.write( "<band_2_end = {}>\n".format( band2_end_hz ) )
|
174
|
f.write( "\n" )
|
175
|
f.write( "[ time | band_1_avg | band_2_avg | slope ]\n" )
|
176
|
f.write( "\n" )
|
177
|
|
178
|
for idx in range( len( times ) ):
|
179
|
f.write( "| {:7.3f} | {:10.3f} | {:10.3f} | {:8.5f} |\n".format( times[idx], band1[ idx ], band2[ idx ], slopes[idx] ) )
|
180
|
|
181
|
f.close()
|
182
|
|
183
|
|
184
|
#plt.plot( slopes )
|
185
|
#plt.show()
|
186
|
#break
|
187
|
|
188
|
|
189
|
# ----------
|
190
|
|
191
|
|
192
|
if ( __name__ == "__main__" ):
|
193
|
main()
|