Skip to content
Snippets Groups Projects
Commit 2ce58855 authored by Guillaume Allegre's avatar Guillaume Allegre
Browse files

Update Conversion IRMAS/model_cnn_Irmas.ipynb, Conversion IRMAS/conversion_mel_IRMAS files

parents
No related branches found
No related tags found
No related merge requests found
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"conversion_mel_IRMAS","provenance":[],"collapsed_sections":[],"toc_visible":true,"mount_file_id":"1obbS6-vX1z--9i1mMt84W34a1OqQO8Dw","authorship_tag":"ABX9TyPpHeWmgS16jAN/ZFzmeKZ4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"id":"zQQC4ApGdNxA","executionInfo":{"status":"ok","timestamp":1618244287875,"user_tz":-120,"elapsed":828,"user":{"displayName":"guillaume ALLEGRE","photoUrl":"","userId":"15788823947103069938"}}},"source":["import scipy\n","import os\n","from scipy.io import wavfile\n","from scipy import signal\n","import matplotlib.pyplot as plt\n","import numpy as np\n","import os\n","import librosa\n","import librosa.display\n","\n","\n","import librosa\n","import numpy\n","\n","\n","\n","# settings\n","hop_length = 512 # number of samples per time-step in spectrogram\n","n_mels = 256 # number of bins in spectrogram. Height of image\n","time_steps = 256 # number of time-steps. Width of image\n"],"execution_count":31,"outputs":[]},{"cell_type":"code","metadata":{"id":"2Y62suo5gFCc","executionInfo":{"status":"ok","timestamp":1618244288081,"user_tz":-120,"elapsed":431,"user":{"displayName":"guillaume ALLEGRE","photoUrl":"","userId":"15788823947103069938"}}},"source":["data_path = './drive/MyDrive/Automatant-Musique/Data_IRMAS/IRMAS-TrainingData'\n","data_converted_path = './drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS'\n","lst_instru = ['org','cel','cla','sax','flu','gac','gel','pia','tru','vio','voi']\n","\n","train_img = []\n","train_label = []\n","\n","\n","LIMIT = False\n","num_limit = 10 # max is 100"],"execution_count":32,"outputs":[]},{"cell_type":"code","metadata":{"id":"jz5Z5PUCvhdA","executionInfo":{"status":"ok","timestamp":1618244288903,"user_tz":-120,"elapsed":541,"user":{"displayName":"guillaume ALLEGRE","photoUrl":"","userId":"15788823947103069938"}}},"source":["def index(intru):\n","\n"," for i in range(len(lst_instru)):\n"," if instru == lst_instru[i]:\n"," return i\n","\n","def scale_minmax(X, min=0.0, max=1.0):\n"," X_std = (X - X.min()) / (X.max() - X.min())\n"," X_scaled = X_std * (max - min) + min\n"," return X_scaled\n","\n","def spectrogram_image(y, sr, out, hop_length, n_mels):\n"," # use log-melspectrogram\n"," mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,\n"," n_fft=hop_length*2, hop_length=hop_length)\n"," mels = numpy.log(mels + 1e-9) # add small number to avoid log(0)\n","\n"," # min-max scale to fit inside 8-bit range\n"," img = scale_minmax(mels, 0, 255).astype(numpy.uint8)\n"," img =np.expand_dims(img, axis=2)\n","\n"," train_img.append(img)\n"," train_label.append(index(instru))\n"," \n","\n","def save_img(data_path, instru, wavfile):\n","\n"," x_path = './drive/MyDrive/Automatant-Musique/Data_IRMAS/IRMAS-TrainingData' + '/' + instru + '/' + wavfile\n"," x_save_path = './drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS' + '/' + instru + '/' + wavfile[:-3] + \"png\"\n","\n"," y, sr = librosa.load(x_path, offset=1.0, duration=2.0)\n","\n"," # extract a fixed length window\n"," start_sample = 0 # starting at beginning\n"," length_samples = time_steps*hop_length\n"," window = y\n","\n"," \n"," # convert to PNG\n"," spectrogram_image(window, sr=sr, out=x_save_path, hop_length=hop_length, n_mels=n_mels)\n"],"execution_count":33,"outputs":[]},{"cell_type":"code","metadata":{"id":"soaiQoJbtbSQ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1618245214678,"user_tz":-120,"elapsed":924867,"user":{"displayName":"guillaume ALLEGRE","photoUrl":"","userId":"15788823947103069938"}},"outputId":"5ca52630-1cc3-41fa-b70d-c654737612d5"},"source":["\n","for instru in os.listdir(data_path):\n"," print(instru)\n"," for wavfile in os.listdir(data_path + '/' + instru):\n"," if wavfile[-3:] != 'wav':\n"," continue\n"," \n"," save_img(data_converted_path, instru, wavfile)\n","\n"],"execution_count":34,"outputs":[{"output_type":"stream","text":["cla\n","org\n","vio\n","pia\n","sax\n","cel\n","gac\n","flu\n","gel\n","tru\n","voi\n",".ipynb_checkpoints\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"VAIlPGE9ZbDZ","executionInfo":{"status":"ok","timestamp":1618245726173,"user_tz":-120,"elapsed":2121,"user":{"displayName":"guillaume ALLEGRE","photoUrl":"","userId":"15788823947103069938"}},"outputId":"86517ad6-d98b-4402-8402-469e064a95ea"},"source":["train_img = np.array(train_img)\n","print(np.shape(train_img))\n","np.save('./drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS/train_images.npy', train_img)\n","np.save('./drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS/train_labels.npy', train_label)"],"execution_count":35,"outputs":[{"output_type":"stream","text":["(6705, 256, 87, 1)\n"],"name":"stdout"}]}]}
%% Cell type:code id: tags:
```
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers, models, losses
from tensorflow.keras.models import Model
import keras_preprocessing
import matplotlib.pyplot as plt
import matplotlib
from PIL import Image
import os
from sklearn.utils import shuffle
```
%% Cell type:code id: tags:
```
shape_image = (256,87,1)
data_converted_path = './drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS'
lst_instru = ['org','cel','cla','sax','flu','gac','gel','pia','tru','vio','voi']
nb_instru = len(lst_instru)
```
%% Cell type:code id: tags:
```
def index(num):
if num == 0:
return [1,0,0,0,0,0,0,0,0,0,0]
elif num == 1:
return [0,1,0,0,0,0,0,0,0,0,0]
elif num == 2:
return [0,0,1,0,0,0,0,0,0,0,0]
elif num == 3:
return [0,0,0,1,0,0,0,0,0,0,0]
elif num == 4:
return [0,0,0,0,1,0,0,0,0,0,0]
elif num == 5:
return [0,0,0,0,0,1,0,0,0,0,0]
elif num == 6:
return [0,0,0,0,0,0,1,0,0,0,0]
elif num == 7:
return [0,0,0,0,0,0,0,1,0,0,0]
elif num == 8:
return [0,0,0,0,0,0,0,0,1,0,0]
elif num == 9:
return [0,0,0,0,0,0,0,0,0,1,0]
elif num == 10:
return [0,0,0,0,0,0,0,0,0,0,1]
return None
def vect(label):
vect_label = []
for num in label:
vect_label.append(index(num))
return np.array(vect_label)
```
%% Cell type:code id: tags:
```
train_label, train_images = shuffle(np.load('./drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS/train_labels.npy'), np.load('./drive/MyDrive/Automatant-Musique/Data_converted_train_IRMAS/train_images.npy'))
print(np.shape(train_images))
train_label = vect(train_label)
test_labels, test_images = train_label[6000:6705], train_images[6000:6705]
train_labels, train_images = train_label[:6000], train_images[:6000]
print(np.shape(train_images))
```
%% Output
(6705, 256, 87, 1)
(6000, 256, 87, 1)
%% Cell type:code id: tags:
```
model_cnn = keras.models.Sequential()
model_cnn.add(keras.layers.Conv2D(32, 3, activation = 'relu', padding = 'same', input_shape = shape_image))
model_cnn.add(keras.layers.BatchNormalization())
model_cnn.add(keras.layers.MaxPooling2D())
model_cnn.add(keras.layers.Conv2D(64, 3, activation = 'relu', padding = 'same'))
model_cnn.add(keras.layers.BatchNormalization())
model_cnn.add(keras.layers.MaxPooling2D())
model_cnn.add(keras.layers.Conv2D(128, 3, activation = 'relu', padding = 'same'))
model_cnn.add(keras.layers.BatchNormalization())
model_cnn.add(keras.layers.MaxPooling2D())
model_cnn.add(keras.layers.Conv2D(256, 3, activation = 'relu', padding = 'same'))
model_cnn.add(keras.layers.BatchNormalization())
model_cnn.add(keras.layers.MaxPooling2D())
model_cnn.add(keras.layers.Flatten(input_shape = shape_image))
model_cnn.add(keras.layers.Dense(64, activation='relu'))
model_cnn.add(keras.layers.Dense(nb_instru, activation ='softmax'))
model_cnn.summary()
```
%% Output
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 256, 87, 32) 320
_________________________________________________________________
batch_normalization_3 (Batch (None, 256, 87, 32) 128
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 128, 43, 32) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 128, 43, 64) 18496
_________________________________________________________________
batch_normalization_4 (Batch (None, 128, 43, 64) 256
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 64, 21, 64) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 64, 21, 128) 73856
_________________________________________________________________
batch_normalization_5 (Batch (None, 64, 21, 128) 512
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 32, 10, 128) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 40960) 0
_________________________________________________________________
dense_2 (Dense) (None, 64) 2621504
_________________________________________________________________
dense_3 (Dense) (None, 11) 715
=================================================================
Total params: 2,715,787
Trainable params: 2,715,339
Non-trainable params: 448
_________________________________________________________________
%% Cell type:code id: tags:
```
model_cnn.compile(optimizer = 'Adam', loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ['accuracy'])
#SparseCategoricalCrossentropy ne fonctionne pas, pb logits et label dim
train_images = np.array(train_images)
test_images = np.array(test_images)
test_labels = np.array(test_labels)
train_labels = np.array(train_labels)
history = model_cnn.fit(train_images, train_labels, epochs = 20, validation_data = (test_images, test_labels))
```
%% Output
Epoch 1/20
188/188 [==============================] - 7s 32ms/step - loss: 0.8608 - accuracy: 0.6921 - val_loss: 4.1454 - val_accuracy: 0.2809
Epoch 2/20
188/188 [==============================] - 6s 31ms/step - loss: 0.7915 - accuracy: 0.7282 - val_loss: 2.5450 - val_accuracy: 0.4411
Epoch 3/20
188/188 [==============================] - 6s 31ms/step - loss: 0.7402 - accuracy: 0.7463 - val_loss: 3.7460 - val_accuracy: 0.3660
Epoch 4/20
188/188 [==============================] - 6s 31ms/step - loss: 0.7273 - accuracy: 0.7601 - val_loss: 2.7690 - val_accuracy: 0.4525
Epoch 5/20
188/188 [==============================] - 6s 31ms/step - loss: 0.7068 - accuracy: 0.7618 - val_loss: 3.5838 - val_accuracy: 0.3773
Epoch 6/20
188/188 [==============================] - 6s 32ms/step - loss: 0.6409 - accuracy: 0.7822 - val_loss: 3.8081 - val_accuracy: 0.3872
Epoch 7/20
188/188 [==============================] - 6s 31ms/step - loss: 0.6467 - accuracy: 0.7876 - val_loss: 4.2032 - val_accuracy: 0.3872
Epoch 8/20
188/188 [==============================] - 6s 31ms/step - loss: 0.5913 - accuracy: 0.8029 - val_loss: 4.0078 - val_accuracy: 0.3702
Epoch 9/20
188/188 [==============================] - 6s 31ms/step - loss: 0.5861 - accuracy: 0.8070 - val_loss: 3.0717 - val_accuracy: 0.4270
Epoch 10/20
188/188 [==============================] - 6s 31ms/step - loss: 0.6195 - accuracy: 0.7882 - val_loss: 3.1771 - val_accuracy: 0.4184
Epoch 11/20
188/188 [==============================] - 6s 31ms/step - loss: 0.5372 - accuracy: 0.8250 - val_loss: 3.4834 - val_accuracy: 0.4099
Epoch 12/20
188/188 [==============================] - 6s 31ms/step - loss: 0.4904 - accuracy: 0.8423 - val_loss: 2.9262 - val_accuracy: 0.4184
Epoch 13/20
188/188 [==============================] - 6s 31ms/step - loss: 0.4642 - accuracy: 0.8550 - val_loss: 17.2489 - val_accuracy: 0.3319
Epoch 14/20
188/188 [==============================] - 6s 31ms/step - loss: 0.4383 - accuracy: 0.8613 - val_loss: 3.7113 - val_accuracy: 0.4142
Epoch 15/20
188/188 [==============================] - 6s 31ms/step - loss: 0.4332 - accuracy: 0.8632 - val_loss: 4.9130 - val_accuracy: 0.4057
Epoch 16/20
188/188 [==============================] - 6s 31ms/step - loss: 0.3346 - accuracy: 0.8886 - val_loss: 4.0616 - val_accuracy: 0.4525
Epoch 17/20
188/188 [==============================] - 6s 31ms/step - loss: 0.3199 - accuracy: 0.9003 - val_loss: 3.9002 - val_accuracy: 0.4482
Epoch 18/20
188/188 [==============================] - 6s 31ms/step - loss: 0.3085 - accuracy: 0.9057 - val_loss: 3.8370 - val_accuracy: 0.4468
Epoch 19/20
188/188 [==============================] - 6s 30ms/step - loss: 0.3479 - accuracy: 0.8922 - val_loss: 6.5073 - val_accuracy: 0.2865
Epoch 20/20
188/188 [==============================] - 6s 31ms/step - loss: 0.4420 - accuracy: 0.8569 - val_loss: 4.2696 - val_accuracy: 0.4525
%% Cell type:code id: tags:
```
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')
plt.show()
```
%% Output
%% Cell type:code id: tags:
```
from sklearn.metrics import confusion_matrix
y_pred_vector = model_cnn(test_images)
y_pred = []
for vect in y_pred_vector:
i = 0
M = vect[0]
for j in range(1,10):
if M < vect[j]:
M = vect[j]
i = j
y_pred.append(i)
y_pred = np.argmax(y_pred_vector, axis=1)
y_test = np.argmax(test_labels, axis=1)
cm = confusion_matrix(y_test, y_pred, normalize = 'true')
def show_confusion_matrix(matrix, labels):
fig, ax = plt.subplots(figsize=(nb_instru,nb_instru))
im = ax.imshow(matrix)
N = len(labels)
# We want to show all ticks...
ax.set_xticks(np.arange(N))
ax.set_yticks(np.arange(N))
# ... and label them with the respective list entries
ax.set_xticklabels(labels)
ax.set_yticklabels(labels)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
for i in range(N):
for j in range(N):
text = ax.text(j, i, round(cm[i, j],3),
ha="center", va="center", color="w")
ax.set_title("Matrice de confusion")
fig.tight_layout()
plt.show()
show_confusion_matrix(cm, lst_instru)
```
%% Output
%% Cell type:code id: tags:
```
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment