829 lines
25 KiB
Python
829 lines
25 KiB
Python
"""
|
|
Module to read / write wav files using NumPy arrays
|
|
|
|
Functions
|
|
---------
|
|
`read`: Return the sample rate (in samples/sec) and data from a WAV file.
|
|
|
|
`write`: Write a NumPy array as a WAV file.
|
|
|
|
"""
|
|
import io
|
|
import sys
|
|
import numpy
|
|
import struct
|
|
import warnings
|
|
from enum import IntEnum
|
|
|
|
|
|
__all__ = [
|
|
'WavFileWarning',
|
|
'read',
|
|
'write'
|
|
]
|
|
|
|
|
|
class WavFileWarning(UserWarning):
|
|
pass
|
|
|
|
|
|
class WAVE_FORMAT(IntEnum):
|
|
"""
|
|
WAVE form wFormatTag IDs
|
|
|
|
Complete list is in mmreg.h in Windows 10 SDK. ALAC and OPUS are the
|
|
newest additions, in v10.0.14393 2016-07
|
|
"""
|
|
UNKNOWN = 0x0000
|
|
PCM = 0x0001
|
|
ADPCM = 0x0002
|
|
IEEE_FLOAT = 0x0003
|
|
VSELP = 0x0004
|
|
IBM_CVSD = 0x0005
|
|
ALAW = 0x0006
|
|
MULAW = 0x0007
|
|
DTS = 0x0008
|
|
DRM = 0x0009
|
|
WMAVOICE9 = 0x000A
|
|
WMAVOICE10 = 0x000B
|
|
OKI_ADPCM = 0x0010
|
|
DVI_ADPCM = 0x0011
|
|
IMA_ADPCM = 0x0011 # Duplicate
|
|
MEDIASPACE_ADPCM = 0x0012
|
|
SIERRA_ADPCM = 0x0013
|
|
G723_ADPCM = 0x0014
|
|
DIGISTD = 0x0015
|
|
DIGIFIX = 0x0016
|
|
DIALOGIC_OKI_ADPCM = 0x0017
|
|
MEDIAVISION_ADPCM = 0x0018
|
|
CU_CODEC = 0x0019
|
|
HP_DYN_VOICE = 0x001A
|
|
YAMAHA_ADPCM = 0x0020
|
|
SONARC = 0x0021
|
|
DSPGROUP_TRUESPEECH = 0x0022
|
|
ECHOSC1 = 0x0023
|
|
AUDIOFILE_AF36 = 0x0024
|
|
APTX = 0x0025
|
|
AUDIOFILE_AF10 = 0x0026
|
|
PROSODY_1612 = 0x0027
|
|
LRC = 0x0028
|
|
DOLBY_AC2 = 0x0030
|
|
GSM610 = 0x0031
|
|
MSNAUDIO = 0x0032
|
|
ANTEX_ADPCME = 0x0033
|
|
CONTROL_RES_VQLPC = 0x0034
|
|
DIGIREAL = 0x0035
|
|
DIGIADPCM = 0x0036
|
|
CONTROL_RES_CR10 = 0x0037
|
|
NMS_VBXADPCM = 0x0038
|
|
CS_IMAADPCM = 0x0039
|
|
ECHOSC3 = 0x003A
|
|
ROCKWELL_ADPCM = 0x003B
|
|
ROCKWELL_DIGITALK = 0x003C
|
|
XEBEC = 0x003D
|
|
G721_ADPCM = 0x0040
|
|
G728_CELP = 0x0041
|
|
MSG723 = 0x0042
|
|
INTEL_G723_1 = 0x0043
|
|
INTEL_G729 = 0x0044
|
|
SHARP_G726 = 0x0045
|
|
MPEG = 0x0050
|
|
RT24 = 0x0052
|
|
PAC = 0x0053
|
|
MPEGLAYER3 = 0x0055
|
|
LUCENT_G723 = 0x0059
|
|
CIRRUS = 0x0060
|
|
ESPCM = 0x0061
|
|
VOXWARE = 0x0062
|
|
CANOPUS_ATRAC = 0x0063
|
|
G726_ADPCM = 0x0064
|
|
G722_ADPCM = 0x0065
|
|
DSAT = 0x0066
|
|
DSAT_DISPLAY = 0x0067
|
|
VOXWARE_BYTE_ALIGNED = 0x0069
|
|
VOXWARE_AC8 = 0x0070
|
|
VOXWARE_AC10 = 0x0071
|
|
VOXWARE_AC16 = 0x0072
|
|
VOXWARE_AC20 = 0x0073
|
|
VOXWARE_RT24 = 0x0074
|
|
VOXWARE_RT29 = 0x0075
|
|
VOXWARE_RT29HW = 0x0076
|
|
VOXWARE_VR12 = 0x0077
|
|
VOXWARE_VR18 = 0x0078
|
|
VOXWARE_TQ40 = 0x0079
|
|
VOXWARE_SC3 = 0x007A
|
|
VOXWARE_SC3_1 = 0x007B
|
|
SOFTSOUND = 0x0080
|
|
VOXWARE_TQ60 = 0x0081
|
|
MSRT24 = 0x0082
|
|
G729A = 0x0083
|
|
MVI_MVI2 = 0x0084
|
|
DF_G726 = 0x0085
|
|
DF_GSM610 = 0x0086
|
|
ISIAUDIO = 0x0088
|
|
ONLIVE = 0x0089
|
|
MULTITUDE_FT_SX20 = 0x008A
|
|
INFOCOM_ITS_G721_ADPCM = 0x008B
|
|
CONVEDIA_G729 = 0x008C
|
|
CONGRUENCY = 0x008D
|
|
SBC24 = 0x0091
|
|
DOLBY_AC3_SPDIF = 0x0092
|
|
MEDIASONIC_G723 = 0x0093
|
|
PROSODY_8KBPS = 0x0094
|
|
ZYXEL_ADPCM = 0x0097
|
|
PHILIPS_LPCBB = 0x0098
|
|
PACKED = 0x0099
|
|
MALDEN_PHONYTALK = 0x00A0
|
|
RACAL_RECORDER_GSM = 0x00A1
|
|
RACAL_RECORDER_G720_A = 0x00A2
|
|
RACAL_RECORDER_G723_1 = 0x00A3
|
|
RACAL_RECORDER_TETRA_ACELP = 0x00A4
|
|
NEC_AAC = 0x00B0
|
|
RAW_AAC1 = 0x00FF
|
|
RHETOREX_ADPCM = 0x0100
|
|
IRAT = 0x0101
|
|
VIVO_G723 = 0x0111
|
|
VIVO_SIREN = 0x0112
|
|
PHILIPS_CELP = 0x0120
|
|
PHILIPS_GRUNDIG = 0x0121
|
|
DIGITAL_G723 = 0x0123
|
|
SANYO_LD_ADPCM = 0x0125
|
|
SIPROLAB_ACEPLNET = 0x0130
|
|
SIPROLAB_ACELP4800 = 0x0131
|
|
SIPROLAB_ACELP8V3 = 0x0132
|
|
SIPROLAB_G729 = 0x0133
|
|
SIPROLAB_G729A = 0x0134
|
|
SIPROLAB_KELVIN = 0x0135
|
|
VOICEAGE_AMR = 0x0136
|
|
G726ADPCM = 0x0140
|
|
DICTAPHONE_CELP68 = 0x0141
|
|
DICTAPHONE_CELP54 = 0x0142
|
|
QUALCOMM_PUREVOICE = 0x0150
|
|
QUALCOMM_HALFRATE = 0x0151
|
|
TUBGSM = 0x0155
|
|
MSAUDIO1 = 0x0160
|
|
WMAUDIO2 = 0x0161
|
|
WMAUDIO3 = 0x0162
|
|
WMAUDIO_LOSSLESS = 0x0163
|
|
WMASPDIF = 0x0164
|
|
UNISYS_NAP_ADPCM = 0x0170
|
|
UNISYS_NAP_ULAW = 0x0171
|
|
UNISYS_NAP_ALAW = 0x0172
|
|
UNISYS_NAP_16K = 0x0173
|
|
SYCOM_ACM_SYC008 = 0x0174
|
|
SYCOM_ACM_SYC701_G726L = 0x0175
|
|
SYCOM_ACM_SYC701_CELP54 = 0x0176
|
|
SYCOM_ACM_SYC701_CELP68 = 0x0177
|
|
KNOWLEDGE_ADVENTURE_ADPCM = 0x0178
|
|
FRAUNHOFER_IIS_MPEG2_AAC = 0x0180
|
|
DTS_DS = 0x0190
|
|
CREATIVE_ADPCM = 0x0200
|
|
CREATIVE_FASTSPEECH8 = 0x0202
|
|
CREATIVE_FASTSPEECH10 = 0x0203
|
|
UHER_ADPCM = 0x0210
|
|
ULEAD_DV_AUDIO = 0x0215
|
|
ULEAD_DV_AUDIO_1 = 0x0216
|
|
QUARTERDECK = 0x0220
|
|
ILINK_VC = 0x0230
|
|
RAW_SPORT = 0x0240
|
|
ESST_AC3 = 0x0241
|
|
GENERIC_PASSTHRU = 0x0249
|
|
IPI_HSX = 0x0250
|
|
IPI_RPELP = 0x0251
|
|
CS2 = 0x0260
|
|
SONY_SCX = 0x0270
|
|
SONY_SCY = 0x0271
|
|
SONY_ATRAC3 = 0x0272
|
|
SONY_SPC = 0x0273
|
|
TELUM_AUDIO = 0x0280
|
|
TELUM_IA_AUDIO = 0x0281
|
|
NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285
|
|
FM_TOWNS_SND = 0x0300
|
|
MICRONAS = 0x0350
|
|
MICRONAS_CELP833 = 0x0351
|
|
BTV_DIGITAL = 0x0400
|
|
INTEL_MUSIC_CODER = 0x0401
|
|
INDEO_AUDIO = 0x0402
|
|
QDESIGN_MUSIC = 0x0450
|
|
ON2_VP7_AUDIO = 0x0500
|
|
ON2_VP6_AUDIO = 0x0501
|
|
VME_VMPCM = 0x0680
|
|
TPC = 0x0681
|
|
LIGHTWAVE_LOSSLESS = 0x08AE
|
|
OLIGSM = 0x1000
|
|
OLIADPCM = 0x1001
|
|
OLICELP = 0x1002
|
|
OLISBC = 0x1003
|
|
OLIOPR = 0x1004
|
|
LH_CODEC = 0x1100
|
|
LH_CODEC_CELP = 0x1101
|
|
LH_CODEC_SBC8 = 0x1102
|
|
LH_CODEC_SBC12 = 0x1103
|
|
LH_CODEC_SBC16 = 0x1104
|
|
NORRIS = 0x1400
|
|
ISIAUDIO_2 = 0x1401
|
|
SOUNDSPACE_MUSICOMPRESS = 0x1500
|
|
MPEG_ADTS_AAC = 0x1600
|
|
MPEG_RAW_AAC = 0x1601
|
|
MPEG_LOAS = 0x1602
|
|
NOKIA_MPEG_ADTS_AAC = 0x1608
|
|
NOKIA_MPEG_RAW_AAC = 0x1609
|
|
VODAFONE_MPEG_ADTS_AAC = 0x160A
|
|
VODAFONE_MPEG_RAW_AAC = 0x160B
|
|
MPEG_HEAAC = 0x1610
|
|
VOXWARE_RT24_SPEECH = 0x181C
|
|
SONICFOUNDRY_LOSSLESS = 0x1971
|
|
INNINGS_TELECOM_ADPCM = 0x1979
|
|
LUCENT_SX8300P = 0x1C07
|
|
LUCENT_SX5363S = 0x1C0C
|
|
CUSEEME = 0x1F03
|
|
NTCSOFT_ALF2CM_ACM = 0x1FC4
|
|
DVM = 0x2000
|
|
DTS2 = 0x2001
|
|
MAKEAVIS = 0x3313
|
|
DIVIO_MPEG4_AAC = 0x4143
|
|
NOKIA_ADAPTIVE_MULTIRATE = 0x4201
|
|
DIVIO_G726 = 0x4243
|
|
LEAD_SPEECH = 0x434C
|
|
LEAD_VORBIS = 0x564C
|
|
WAVPACK_AUDIO = 0x5756
|
|
OGG_VORBIS_MODE_1 = 0x674F
|
|
OGG_VORBIS_MODE_2 = 0x6750
|
|
OGG_VORBIS_MODE_3 = 0x6751
|
|
OGG_VORBIS_MODE_1_PLUS = 0x676F
|
|
OGG_VORBIS_MODE_2_PLUS = 0x6770
|
|
OGG_VORBIS_MODE_3_PLUS = 0x6771
|
|
ALAC = 0x6C61
|
|
_3COM_NBX = 0x7000 # Can't have leading digit
|
|
OPUS = 0x704F
|
|
FAAD_AAC = 0x706D
|
|
AMR_NB = 0x7361
|
|
AMR_WB = 0x7362
|
|
AMR_WP = 0x7363
|
|
GSM_AMR_CBR = 0x7A21
|
|
GSM_AMR_VBR_SID = 0x7A22
|
|
COMVERSE_INFOSYS_G723_1 = 0xA100
|
|
COMVERSE_INFOSYS_AVQSBC = 0xA101
|
|
COMVERSE_INFOSYS_SBC = 0xA102
|
|
SYMBOL_G729_A = 0xA103
|
|
VOICEAGE_AMR_WB = 0xA104
|
|
INGENIENT_G726 = 0xA105
|
|
MPEG4_AAC = 0xA106
|
|
ENCORE_G726 = 0xA107
|
|
ZOLL_ASAO = 0xA108
|
|
SPEEX_VOICE = 0xA109
|
|
VIANIX_MASC = 0xA10A
|
|
WM9_SPECTRUM_ANALYZER = 0xA10B
|
|
WMF_SPECTRUM_ANAYZER = 0xA10C
|
|
GSM_610 = 0xA10D
|
|
GSM_620 = 0xA10E
|
|
GSM_660 = 0xA10F
|
|
GSM_690 = 0xA110
|
|
GSM_ADAPTIVE_MULTIRATE_WB = 0xA111
|
|
POLYCOM_G722 = 0xA112
|
|
POLYCOM_G728 = 0xA113
|
|
POLYCOM_G729_A = 0xA114
|
|
POLYCOM_SIREN = 0xA115
|
|
GLOBAL_IP_ILBC = 0xA116
|
|
RADIOTIME_TIME_SHIFT_RADIO = 0xA117
|
|
NICE_ACA = 0xA118
|
|
NICE_ADPCM = 0xA119
|
|
VOCORD_G721 = 0xA11A
|
|
VOCORD_G726 = 0xA11B
|
|
VOCORD_G722_1 = 0xA11C
|
|
VOCORD_G728 = 0xA11D
|
|
VOCORD_G729 = 0xA11E
|
|
VOCORD_G729_A = 0xA11F
|
|
VOCORD_G723_1 = 0xA120
|
|
VOCORD_LBC = 0xA121
|
|
NICE_G728 = 0xA122
|
|
FRACE_TELECOM_G729 = 0xA123
|
|
CODIAN = 0xA124
|
|
FLAC = 0xF1AC
|
|
EXTENSIBLE = 0xFFFE
|
|
DEVELOPMENT = 0xFFFF
|
|
|
|
|
|
KNOWN_WAVE_FORMATS = {WAVE_FORMAT.PCM, WAVE_FORMAT.IEEE_FLOAT}
|
|
|
|
|
|
def _raise_bad_format(format_tag):
|
|
try:
|
|
format_name = WAVE_FORMAT(format_tag).name
|
|
except ValueError:
|
|
format_name = f'{format_tag:#06x}'
|
|
raise ValueError(f"Unknown wave file format: {format_name}. Supported "
|
|
"formats: " +
|
|
', '.join(x.name for x in KNOWN_WAVE_FORMATS))
|
|
|
|
|
|
def _read_fmt_chunk(fid, is_big_endian):
|
|
"""
|
|
Returns
|
|
-------
|
|
size : int
|
|
size of format subchunk in bytes (minus 8 for "fmt " and itself)
|
|
format_tag : int
|
|
PCM, float, or compressed format
|
|
channels : int
|
|
number of channels
|
|
fs : int
|
|
sampling frequency in samples per second
|
|
bytes_per_second : int
|
|
overall byte rate for the file
|
|
block_align : int
|
|
bytes per sample, including all channels
|
|
bit_depth : int
|
|
bits per sample
|
|
|
|
Notes
|
|
-----
|
|
Assumes file pointer is immediately after the 'fmt ' id
|
|
"""
|
|
if is_big_endian:
|
|
fmt = '>'
|
|
else:
|
|
fmt = '<'
|
|
|
|
size = struct.unpack(fmt+'I', fid.read(4))[0]
|
|
|
|
if size < 16:
|
|
raise ValueError("Binary structure of wave file is not compliant")
|
|
|
|
res = struct.unpack(fmt+'HHIIHH', fid.read(16))
|
|
bytes_read = 16
|
|
|
|
format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
|
|
|
|
if format_tag == WAVE_FORMAT.EXTENSIBLE and size >= (16+2):
|
|
ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
|
|
bytes_read += 2
|
|
if ext_chunk_size >= 22:
|
|
extensible_chunk_data = fid.read(22)
|
|
bytes_read += 22
|
|
raw_guid = extensible_chunk_data[2+4:2+4+16]
|
|
# GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
|
|
# MS GUID byte order: first three groups are native byte order,
|
|
# rest is Big Endian
|
|
if is_big_endian:
|
|
tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
|
|
else:
|
|
tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
|
|
if raw_guid.endswith(tail):
|
|
format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
|
|
else:
|
|
raise ValueError("Binary structure of wave file is not compliant")
|
|
|
|
if format_tag not in KNOWN_WAVE_FORMATS:
|
|
_raise_bad_format(format_tag)
|
|
|
|
# move file pointer to next chunk
|
|
if size > bytes_read:
|
|
fid.read(size - bytes_read)
|
|
|
|
# fmt should always be 16, 18 or 40, but handle it just in case
|
|
_handle_pad_byte(fid, size)
|
|
|
|
return (size, format_tag, channels, fs, bytes_per_second, block_align,
|
|
bit_depth)
|
|
|
|
|
|
def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian,
|
|
block_align, mmap=False):
|
|
"""
|
|
Notes
|
|
-----
|
|
Assumes file pointer is immediately after the 'data' id
|
|
|
|
It's possible to not use all available bits in a container, or to store
|
|
samples in a container bigger than necessary, so bytes_per_sample uses
|
|
the actual reported container size (nBlockAlign / nChannels). Real-world
|
|
examples:
|
|
|
|
Adobe Audition's "24-bit packed int (type 1, 20-bit)"
|
|
|
|
nChannels = 2, nBlockAlign = 6, wBitsPerSample = 20
|
|
|
|
http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/AFsp/M1F1-int12-AFsp.wav
|
|
is:
|
|
|
|
nChannels = 2, nBlockAlign = 4, wBitsPerSample = 12
|
|
|
|
http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/multichaudP.pdf
|
|
gives an example of:
|
|
|
|
nChannels = 2, nBlockAlign = 8, wBitsPerSample = 20
|
|
"""
|
|
if is_big_endian:
|
|
fmt = '>'
|
|
else:
|
|
fmt = '<'
|
|
|
|
# Size of the data subchunk in bytes
|
|
size = struct.unpack(fmt+'I', fid.read(4))[0]
|
|
|
|
# Number of bytes per sample (sample container size)
|
|
bytes_per_sample = block_align // channels
|
|
n_samples = size // bytes_per_sample
|
|
|
|
if format_tag == WAVE_FORMAT.PCM:
|
|
if 1 <= bit_depth <= 8:
|
|
dtype = 'u1' # WAV of 8-bit integer or less are unsigned
|
|
elif bytes_per_sample in {3, 5, 6, 7}:
|
|
# No compatible dtype. Load as raw bytes for reshaping later.
|
|
dtype = 'V1'
|
|
elif bit_depth <= 64:
|
|
# Remaining bit depths can map directly to signed numpy dtypes
|
|
dtype = f'{fmt}i{bytes_per_sample}'
|
|
else:
|
|
raise ValueError("Unsupported bit depth: the WAV file "
|
|
f"has {bit_depth}-bit integer data.")
|
|
elif format_tag == WAVE_FORMAT.IEEE_FLOAT:
|
|
if bit_depth in {32, 64}:
|
|
dtype = f'{fmt}f{bytes_per_sample}'
|
|
else:
|
|
raise ValueError("Unsupported bit depth: the WAV file "
|
|
f"has {bit_depth}-bit floating-point data.")
|
|
else:
|
|
_raise_bad_format(format_tag)
|
|
|
|
start = fid.tell()
|
|
if not mmap:
|
|
try:
|
|
count = size if dtype == 'V1' else n_samples
|
|
data = numpy.fromfile(fid, dtype=dtype, count=count)
|
|
except io.UnsupportedOperation: # not a C-like file
|
|
fid.seek(start, 0) # just in case it seeked, though it shouldn't
|
|
data = numpy.frombuffer(fid.read(size), dtype=dtype)
|
|
|
|
if dtype == 'V1':
|
|
# Rearrange raw bytes into smallest compatible numpy dtype
|
|
dt = numpy.int32 if bytes_per_sample == 3 else numpy.int64
|
|
a = numpy.zeros((len(data) // bytes_per_sample, dt().itemsize),
|
|
dtype='V1')
|
|
a[:, -bytes_per_sample:] = data.reshape((-1, bytes_per_sample))
|
|
data = a.view(dt).reshape(a.shape[:-1])
|
|
else:
|
|
if bytes_per_sample in {1, 2, 4, 8}:
|
|
start = fid.tell()
|
|
data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
|
|
shape=(n_samples,))
|
|
fid.seek(start + size)
|
|
else:
|
|
raise ValueError("mmap=True not compatible with "
|
|
f"{bytes_per_sample}-byte container size.")
|
|
|
|
_handle_pad_byte(fid, size)
|
|
|
|
if channels > 1:
|
|
data = data.reshape(-1, channels)
|
|
return data
|
|
|
|
|
|
def _skip_unknown_chunk(fid, is_big_endian):
|
|
if is_big_endian:
|
|
fmt = '>I'
|
|
else:
|
|
fmt = '<I'
|
|
|
|
data = fid.read(4)
|
|
# call unpack() and seek() only if we have really read data from file
|
|
# otherwise empty read at the end of the file would trigger
|
|
# unnecessary exception at unpack() call
|
|
# in case data equals somehow to 0, there is no need for seek() anyway
|
|
if data:
|
|
size = struct.unpack(fmt, data)[0]
|
|
fid.seek(size, 1)
|
|
_handle_pad_byte(fid, size)
|
|
|
|
|
|
def _read_riff_chunk(fid):
|
|
str1 = fid.read(4) # File signature
|
|
if str1 == b'RIFF':
|
|
is_big_endian = False
|
|
fmt = '<I'
|
|
elif str1 == b'RIFX':
|
|
is_big_endian = True
|
|
fmt = '>I'
|
|
else:
|
|
# There are also .wav files with "FFIR" or "XFIR" signatures?
|
|
raise ValueError(f"File format {repr(str1)} not understood. Only "
|
|
"'RIFF' and 'RIFX' supported.")
|
|
|
|
# Size of entire file
|
|
file_size = struct.unpack(fmt, fid.read(4))[0] + 8
|
|
|
|
str2 = fid.read(4)
|
|
if str2 != b'WAVE':
|
|
raise ValueError(f"Not a WAV file. RIFF form type is {repr(str2)}.")
|
|
|
|
return file_size, is_big_endian
|
|
|
|
|
|
def _handle_pad_byte(fid, size):
|
|
# "If the chunk size is an odd number of bytes, a pad byte with value zero
|
|
# is written after ckData." So we need to seek past this after each chunk.
|
|
if size % 2:
|
|
fid.seek(1, 1)
|
|
|
|
|
|
def read(filename, mmap=False):
|
|
"""
|
|
Open a WAV file.
|
|
|
|
Return the sample rate (in samples/sec) and data from an LPCM WAV file.
|
|
|
|
Parameters
|
|
----------
|
|
filename : string or open file handle
|
|
Input WAV file.
|
|
mmap : bool, optional
|
|
Whether to read data as memory-mapped (default: False). Not compatible
|
|
with some bit depths; see Notes. Only to be used on real files.
|
|
|
|
.. versionadded:: 0.12.0
|
|
|
|
Returns
|
|
-------
|
|
rate : int
|
|
Sample rate of WAV file.
|
|
data : numpy array
|
|
Data read from WAV file. Data-type is determined from the file;
|
|
see Notes. Data is 1-D for 1-channel WAV, or 2-D of shape
|
|
(Nsamples, Nchannels) otherwise. If a file-like input without a
|
|
C-like file descriptor (e.g., :class:`python:io.BytesIO`) is
|
|
passed, this will not be writeable.
|
|
|
|
Notes
|
|
-----
|
|
Common data types: [1]_
|
|
|
|
===================== =========== =========== =============
|
|
WAV format Min Max NumPy dtype
|
|
===================== =========== =========== =============
|
|
32-bit floating-point -1.0 +1.0 float32
|
|
32-bit integer PCM -2147483648 +2147483647 int32
|
|
24-bit integer PCM -2147483648 +2147483392 int32
|
|
16-bit integer PCM -32768 +32767 int16
|
|
8-bit integer PCM 0 255 uint8
|
|
===================== =========== =========== =============
|
|
|
|
WAV files can specify arbitrary bit depth, and this function supports
|
|
reading any integer PCM depth from 1 to 64 bits. Data is returned in the
|
|
smallest compatible numpy int type, in left-justified format. 8-bit and
|
|
lower is unsigned, while 9-bit and higher is signed.
|
|
|
|
For example, 24-bit data will be stored as int32, with the MSB of the
|
|
24-bit data stored at the MSB of the int32, and typically the least
|
|
significant byte is 0x00. (However, if a file actually contains data past
|
|
its specified bit depth, those bits will be read and output, too. [2]_)
|
|
|
|
This bit justification and sign matches WAV's native internal format, which
|
|
allows memory mapping of WAV files that use 1, 2, 4, or 8 bytes per sample
|
|
(so 24-bit files cannot be memory-mapped, but 32-bit can).
|
|
|
|
IEEE float PCM in 32- or 64-bit format is supported, with or without mmap.
|
|
Values exceeding [-1, +1] are not clipped.
|
|
|
|
Non-linear PCM (mu-law, A-law) is not supported.
|
|
|
|
References
|
|
----------
|
|
.. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
|
|
Interface and Data Specifications 1.0", section "Data Format of the
|
|
Samples", August 1991
|
|
http://www.tactilemedia.com/info/MCI_Control_Info.html
|
|
.. [2] Adobe Systems Incorporated, "Adobe Audition 3 User Guide", section
|
|
"Audio file formats: 24-bit Packed Int (type 1, 20-bit)", 2007
|
|
|
|
Examples
|
|
--------
|
|
>>> from os.path import dirname, join as pjoin
|
|
>>> from scipy.io import wavfile
|
|
>>> import scipy.io
|
|
|
|
Get the filename for an example .wav file from the tests/data directory.
|
|
|
|
>>> data_dir = pjoin(dirname(scipy.io.__file__), 'tests', 'data')
|
|
>>> wav_fname = pjoin(data_dir, 'test-44100Hz-2ch-32bit-float-be.wav')
|
|
|
|
Load the .wav file contents.
|
|
|
|
>>> samplerate, data = wavfile.read(wav_fname)
|
|
>>> print(f"number of channels = {data.shape[1]}")
|
|
number of channels = 2
|
|
>>> length = data.shape[0] / samplerate
|
|
>>> print(f"length = {length}s")
|
|
length = 0.01s
|
|
|
|
Plot the waveform.
|
|
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> import numpy as np
|
|
>>> time = np.linspace(0., length, data.shape[0])
|
|
>>> plt.plot(time, data[:, 0], label="Left channel")
|
|
>>> plt.plot(time, data[:, 1], label="Right channel")
|
|
>>> plt.legend()
|
|
>>> plt.xlabel("Time [s]")
|
|
>>> plt.ylabel("Amplitude")
|
|
>>> plt.show()
|
|
|
|
"""
|
|
if hasattr(filename, 'read'):
|
|
fid = filename
|
|
mmap = False
|
|
else:
|
|
fid = open(filename, 'rb')
|
|
|
|
try:
|
|
file_size, is_big_endian = _read_riff_chunk(fid)
|
|
fmt_chunk_received = False
|
|
data_chunk_received = False
|
|
while fid.tell() < file_size:
|
|
# read the next chunk
|
|
chunk_id = fid.read(4)
|
|
|
|
if not chunk_id:
|
|
if data_chunk_received:
|
|
# End of file but data successfully read
|
|
warnings.warn(
|
|
"Reached EOF prematurely; finished at {:d} bytes, "
|
|
"expected {:d} bytes from header."
|
|
.format(fid.tell(), file_size),
|
|
WavFileWarning, stacklevel=2)
|
|
break
|
|
else:
|
|
raise ValueError("Unexpected end of file.")
|
|
elif len(chunk_id) < 4:
|
|
msg = f"Incomplete chunk ID: {repr(chunk_id)}"
|
|
# If we have the data, ignore the broken chunk
|
|
if fmt_chunk_received and data_chunk_received:
|
|
warnings.warn(msg + ", ignoring it.", WavFileWarning,
|
|
stacklevel=2)
|
|
else:
|
|
raise ValueError(msg)
|
|
|
|
if chunk_id == b'fmt ':
|
|
fmt_chunk_received = True
|
|
fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
|
|
format_tag, channels, fs = fmt_chunk[1:4]
|
|
bit_depth = fmt_chunk[6]
|
|
block_align = fmt_chunk[5]
|
|
elif chunk_id == b'fact':
|
|
_skip_unknown_chunk(fid, is_big_endian)
|
|
elif chunk_id == b'data':
|
|
data_chunk_received = True
|
|
if not fmt_chunk_received:
|
|
raise ValueError("No fmt chunk before data")
|
|
data = _read_data_chunk(fid, format_tag, channels, bit_depth,
|
|
is_big_endian, block_align, mmap)
|
|
elif chunk_id == b'LIST':
|
|
# Someday this could be handled properly but for now skip it
|
|
_skip_unknown_chunk(fid, is_big_endian)
|
|
elif chunk_id in {b'JUNK', b'Fake'}:
|
|
# Skip alignment chunks without warning
|
|
_skip_unknown_chunk(fid, is_big_endian)
|
|
else:
|
|
warnings.warn("Chunk (non-data) not understood, skipping it.",
|
|
WavFileWarning, stacklevel=2)
|
|
_skip_unknown_chunk(fid, is_big_endian)
|
|
finally:
|
|
if not hasattr(filename, 'read'):
|
|
fid.close()
|
|
else:
|
|
fid.seek(0)
|
|
|
|
return fs, data
|
|
|
|
|
|
def write(filename, rate, data):
|
|
"""
|
|
Write a NumPy array as a WAV file.
|
|
|
|
Parameters
|
|
----------
|
|
filename : string or open file handle
|
|
Output wav file.
|
|
rate : int
|
|
The sample rate (in samples/sec).
|
|
data : ndarray
|
|
A 1-D or 2-D NumPy array of either integer or float data-type.
|
|
|
|
Notes
|
|
-----
|
|
* Writes a simple uncompressed WAV file.
|
|
* To write multiple-channels, use a 2-D array of shape
|
|
(Nsamples, Nchannels).
|
|
* The bits-per-sample and PCM/float will be determined by the data-type.
|
|
|
|
Common data types: [1]_
|
|
|
|
===================== =========== =========== =============
|
|
WAV format Min Max NumPy dtype
|
|
===================== =========== =========== =============
|
|
32-bit floating-point -1.0 +1.0 float32
|
|
32-bit PCM -2147483648 +2147483647 int32
|
|
16-bit PCM -32768 +32767 int16
|
|
8-bit PCM 0 255 uint8
|
|
===================== =========== =========== =============
|
|
|
|
Note that 8-bit PCM is unsigned.
|
|
|
|
References
|
|
----------
|
|
.. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
|
|
Interface and Data Specifications 1.0", section "Data Format of the
|
|
Samples", August 1991
|
|
http://www.tactilemedia.com/info/MCI_Control_Info.html
|
|
|
|
Examples
|
|
--------
|
|
Create a 100Hz sine wave, sampled at 44100Hz.
|
|
Write to 16-bit PCM, Mono.
|
|
|
|
>>> from scipy.io.wavfile import write
|
|
>>> samplerate = 44100; fs = 100
|
|
>>> t = np.linspace(0., 1., samplerate)
|
|
>>> amplitude = np.iinfo(np.int16).max
|
|
>>> data = amplitude * np.sin(2. * np.pi * fs * t)
|
|
>>> write("example.wav", samplerate, data.astype(np.int16))
|
|
|
|
"""
|
|
if hasattr(filename, 'write'):
|
|
fid = filename
|
|
else:
|
|
fid = open(filename, 'wb')
|
|
|
|
fs = rate
|
|
|
|
try:
|
|
dkind = data.dtype.kind
|
|
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
|
|
data.dtype.itemsize == 1)):
|
|
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
|
|
|
header_data = b''
|
|
|
|
header_data += b'RIFF'
|
|
header_data += b'\x00\x00\x00\x00'
|
|
header_data += b'WAVE'
|
|
|
|
# fmt chunk
|
|
header_data += b'fmt '
|
|
if dkind == 'f':
|
|
format_tag = WAVE_FORMAT.IEEE_FLOAT
|
|
else:
|
|
format_tag = WAVE_FORMAT.PCM
|
|
if data.ndim == 1:
|
|
channels = 1
|
|
else:
|
|
channels = data.shape[1]
|
|
bit_depth = data.dtype.itemsize * 8
|
|
bytes_per_second = fs*(bit_depth // 8)*channels
|
|
block_align = channels * (bit_depth // 8)
|
|
|
|
fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
|
|
bytes_per_second, block_align, bit_depth)
|
|
if not (dkind == 'i' or dkind == 'u'):
|
|
# add cbSize field for non-PCM files
|
|
fmt_chunk_data += b'\x00\x00'
|
|
|
|
header_data += struct.pack('<I', len(fmt_chunk_data))
|
|
header_data += fmt_chunk_data
|
|
|
|
# fact chunk (non-PCM files)
|
|
if not (dkind == 'i' or dkind == 'u'):
|
|
header_data += b'fact'
|
|
header_data += struct.pack('<II', 4, data.shape[0])
|
|
|
|
# check data size (needs to be immediately before the data chunk)
|
|
if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
|
|
raise ValueError("Data exceeds wave file size limit")
|
|
|
|
fid.write(header_data)
|
|
|
|
# data chunk
|
|
fid.write(b'data')
|
|
fid.write(struct.pack('<I', data.nbytes))
|
|
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
|
|
sys.byteorder == 'big'):
|
|
data = data.byteswap()
|
|
_array_tofile(fid, data)
|
|
|
|
# Determine file size and place it in correct
|
|
# position at start of the file.
|
|
size = fid.tell()
|
|
fid.seek(4)
|
|
fid.write(struct.pack('<I', size-8))
|
|
|
|
finally:
|
|
if not hasattr(filename, 'write'):
|
|
fid.close()
|
|
else:
|
|
fid.seek(0)
|
|
|
|
|
|
def _array_tofile(fid, data):
|
|
# ravel gives a c-contiguous buffer
|
|
fid.write(data.ravel().view('b').data)
|