Aller au contenu

Créer son propre générateur de données


L'API ImageDataGenerator de Keras peut-être limitée dans certaines situations nécessitant la création d'un générateur de données personalisé, c'est l'objet de ce nouveau tutoriel.

Nous avons besoin d'une fonction qui permet de lire les données

def load_image(path, size):
    """custom function to load image"""
    image = Image.open(path)
    image = image.convert('RGB')
    image = image.resize(size)
    image = np.array(image)*(1./255)

    return image

class DataGenerator(tf.keras.utils.Sequence):
    """Custom generator"""
    def __init__(self, x_set, y_set, batch_size=32, target_size=(150, 150)):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.target_size = target_size

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) *
        self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) *
        self.batch_size]

        return np.array([
            load_image(file_name, target_size)
               for file_name in batch_x]), np.array(batch_y)

Appeler la classe

train_ds = DataGenerator(train_images, train_labels, batch_size=BATCH_SIZE, target_size=(WIDTH, HEIGHT))
val_ds = DataGenerator(val_images, val_labels, batch_size=BATCH_SIZE, target_size=(WIDTH, HEIGHT))

Code complet

def load_image(path, size):
    """custom function to load image"""
    image = Image.open(path)
    image = image.convert('RGB')
    image = image.resize(size)
    image = np.array(image)*(1./255)

    return image

class DataGenerator(tf.keras.utils.Sequence):
    """Custom generator"""
    def __init__(self, x_set, y_set, batch_size=32, target_size=(150, 150)):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.target_size = target_size

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) *
        self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) *
        self.batch_size]

        return np.array([
            load_image(file_name, target_size)
               for file_name in batch_x]), np.array(batch_y)

Partagez sur les réseaux sociaux

Commentaires