-1
Good morning,
I’m using the model available in this Git Googlenet in Keras for transfer Learning and I’m trying to adapt the fit_generator of Keras to use in the model. As can be seen in the link, there are 3 model outputs. These exits are individually identical to each other, but I need to go through all three to make it work. Example: [y,y,y] for an input of x.
The entry is right, there is no problem. The problem is that at the exit Keras is trying to use a batch whole for each input. With a batch equal to 32, the following error message appeared:
ValueError: Error when checking target: expected new_loss1_classifier_act to have shape (1681,) but got array with shape (32,)
where 1681 is the number of coded cells that each output receives.
First, I modified the Batchfromfilesmixin class, to deliver a list of 3 outputs instead of a:
import os
import threading
import numpy as np
from keras_preprocessing import get_keras_submodule
try:
IteratorType = get_keras_submodule('utils').Sequence
except ImportError:
IteratorType = object
from keras_preprocessing.image.utils import (array_to_img,
img_to_array,
load_img)
class GoogleNet_BatchFromFilesMixin():
"""Adds methods related to getting batches from filenames
It includes the logic to transform image files to batches.
"""
def set_processing_attrs(self,
image_data_generator,
target_size,
color_mode,
data_format,
save_to_dir,
save_prefix,
save_format,
subset,
interpolation):
self.image_data_generator = image_data_generator
self.target_size = tuple(target_size)
if color_mode not in {'rgb', 'rgba', 'grayscale'}:
raise ValueError('Invalid color mode:', color_mode,
'; expected "rgb", "rgba", or "grayscale".')
self.color_mode = color_mode
self.data_format = data_format
if self.color_mode == 'rgba':
if self.data_format == 'channels_last':
self.image_shape = self.target_size + (4,)
else:
self.image_shape = (4,) + self.target_size
elif self.color_mode == 'rgb':
if self.data_format == 'channels_last':
self.image_shape = self.target_size + (3,)
else:
self.image_shape = (3,) + self.target_size
else:
if self.data_format == 'channels_last':
self.image_shape = self.target_size + (1,)
else:
self.image_shape = (1,) + self.target_size
self.save_to_dir = save_to_dir
self.save_prefix = save_prefix
self.save_format = save_format
self.interpolation = interpolation
if subset is not None:
validation_split = self.image_data_generator._validation_split
if subset == 'validation':
split = (0, validation_split)
elif subset == 'training':
split = (validation_split, 1)
else:
raise ValueError(
'Invalid subset name: %s;'
'expected "training" or "validation"' % (subset,))
else:
split = None
self.split = split
self.subset = subset
def _get_batches_of_transformed_samples(self, index_array):
"""Gets a batch of transformed samples.
# Arguments
index_array: Array of sample indices to include in batch.
# Returns
A batch of transformed samples.
"""
batch_x = np.zeros((len(index_array),) + self.image_shape, dtype=self.dtype)
# build batch of image data
# self.filepaths is dynamic, is better to call it once outside the loop
filepaths = self.filepaths
for i, j in enumerate(index_array):
img = load_img(filepaths[j],
color_mode=self.color_mode,
target_size=self.target_size,
interpolation=self.interpolation)
x = img_to_array(img, data_format=self.data_format)
# Pillow images should be closed after `load_img`,
# but not PIL images.
if hasattr(img, 'close'):
img.close()
if self.image_data_generator:
params = self.image_data_generator.get_random_transform(x.shape)
x = self.image_data_generator.apply_transform(x, params)
x = self.image_data_generator.standardize(x)
batch_x[i] = x
# optionally save augmented images to disk for debugging purposes
if self.save_to_dir:
for i, j in enumerate(index_array):
img = array_to_img(batch_x[i], self.data_format, scale=True)
fname = '{prefix}_{index}_{hash}.{format}'.format(
prefix=self.save_prefix,
index=j,
hash=np.random.randint(1e7),
format=self.save_format)
img.save(os.path.join(self.save_to_dir, fname))
# build batch of labels
if self.class_mode == 'input':
batch_y = batch_x.copy()
elif self.class_mode in {'binary', 'sparse'}:
batch_y = np.empty(len(batch_x), dtype=self.dtype)
for i, n_observation in enumerate(index_array):
batch_y[i] = self.classes[n_observation]
elif self.class_mode == 'categorical':
batch_y = np.zeros((len(batch_x), len(self.class_indices)),
dtype=self.dtype)
for i, n_observation in enumerate(index_array):
batch_y[i, self.classes[n_observation]] = 1.
elif self.class_mode == 'multi_output':
batch_y = [output[index_array] for output in self.labels]
elif self.class_mode == 'raw':
batch_y = self.labels[index_array]
else:
return batch_x
if self.sample_weight is None:
#MODIFICADO
return batch_x, [batch_y, batch_y, batch_y]
else:
#MODIFICADO
return batch_x, [batch_y, batch_y, batch_y], self.sample_weight[index_array]
@property
def filepaths(self):
"""List of absolute paths to image files"""
raise NotImplementedError(
'`filepaths` property method has not been implemented in {}.'
.format(type(self).__name__)
)
@property
def labels(self):
"""Class labels of every observation"""
raise NotImplementedError(
'`labels` property method has not been implemented in {}.'
.format(type(self).__name__)
)
@property
def sample_weight(self):
raise NotImplementedError(
'`sample_weight` property method has not been implemented in {}.'
.format(type(self).__name__)
)
Where I modified, is with the hashtag 'MODIFIED'.
Then I modified the Dataframeiterator class. I didn’t change anything in the body of the code, I just rewrote it to inherit the new Batchfromfilesmixin, which is now called Googlenet_batchfromfilesmixin:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import warnings
import numpy as np
from keras_preprocessing.image.iterator import Iterator
from keras_preprocessing.image.utils import validate_filename
class GoogleNet_DataFrameIterator(GoogleNet_BatchFromFilesMixin, Iterator):
...
super(GoogleNet_DataFrameIterator, self).set_processing_attrs(image_data_generator,
target_size,
color_mode,
data_format,
save_to_dir,
save_prefix,
save_format,
subset,
interpolation)
...
super(GoogleNet_DataFrameIterator, self).__init__(self.samples,
batch_size,
shuffle,
seed)
And finally created a class inherited from Imagedatagenerator, modifying only the flow_from_dataframe method, to use the newly created class
GoogleNet_DataFrameIterator.
class GoogleNet_ImageDataGenerator(ImageDataGenerator):
def flow_from_dataframe(self,
dataframe,
directory=None,
x_col="filename",
y_col="class",
weight_col=None,
target_size=(256, 256),
color_mode='rgb',
classes=None,
class_mode='categorical',
batch_size=32,
shuffle=True,
seed=None,
save_to_dir=None,
save_prefix='',
save_format='png',
subset=None,
interpolation='nearest',
validate_filenames=True,
**kwargs):
...
return GoogleNet_DataFrameIterator(
dataframe,
directory,
self,
x_col=x_col,
y_col=y_col,
weight_col=weight_col,
target_size=target_size,
color_mode=color_mode,
classes=classes,
class_mode=class_mode,
data_format=self.data_format,
batch_size=batch_size,
shuffle=shuffle,
seed=seed,
save_to_dir=save_to_dir,
save_prefix=save_prefix,
save_format=save_format,
subset=subset,
interpolation=interpolation,
validate_filenames=validate_filenames
)
So I created the instances and so on... The dataframe is already in the right format and etc, the problem is in Keras passing an entire batch at once, as I said before.
train_datagen = GoogleNet_ImageDataGenerator(data_format = 'channels_first')
test_datagen = GoogleNet_ImageDataGenerator(data_format = 'channels_first')
train_generator = train_datagen.flow_from_dataframe(
dataframe=train,
directory='img',
x_col="filename",
y_col=categories,
target_size=(224, 224),
batch_size=32,
class_mode='multi_output')
validation_generator = test_datagen.flow_from_dataframe(
dataframe=validate,
directory='img',
x_col="filename",
y_col=categories,
target_size=(224, 224),
batch_size=1,
class_mode='multi_output')
from keras.optimizers import SGD
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='binary_crossentropy')
model.fit_generator(
generator = train_generator,
epochs=2,
validation_data=validation_generator)
If I change the training batch to 64, it appears:
ValueError: Error when checking target: expected new_loss1_classifier_act to have shape (1681,) but got array with shape (64,)
Could someone help me find the mistake, or is a bug of Keras itself?
Versions used: Python 3.7 Keras 2.3.1
If the problem has been solved you could [Dit] your question to remove the solution, create your answer with your solution and then mark it as accepted. Behold: I can answer my own question?
– fernandosavio
Thanks, Fernando! I’m new around here, I didn’t know that.
– caiokameda