Neural Networks in Colab - Runtimeerror: CUDA error

Question

Neural Networks in Colab - Runtimeerror: CUDA error

Asked 4 years, 1 month ago

Viewed 10 times

0

I’m training a simple neural network for database classification Internet Firewall Data Data Set from the UCI Machine Learning site, but in the execution of the training occurs the following error in Colab:

What would that mistake be? And how can I get around this problem?

Libraries

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import torch
import torch.nn.functional as F
import torch.nn as nn
import torchvision

from torchvision import datasets, transforms
from torch import optim, nn
from torch.utils import data
from torch.utils.data import DataLoader
from torch.autograd import Variable

import os
import sys
import time
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
%matplotlib inline

plt.ion()

Dataset and Neural Network Model Classes

class CustomDataset(torch.utils.data.Dataset):
    # definição do nosso dataset
    def __init__(self, features, labels, n_classes):
        self.features = features
        self.labels = labels.astype(np.long)
        self.n_classes = n_classes

    def __len__(self):
        return len(self.features)  # retorna total de features a serem processadas

    def __getitem__(self, idx):  # idx representa o indice da features a ser carregada
        feature = self.features[idx]
        label = self.labels[idx]

        # transformando o dado de numpy para torch
        feature = torch.from_numpy(feature)
        # as labels devem ir de 0 a n_classes -1
        label = torch.tensor(label-1) 
        
        return feature, label


class CustomNetwork(nn.Module):
    
    def __init__(self, input_size, n_classes):

        super(CustomNetwork, self).__init__()

        self.n_outputs_1 = 44
        self.n_outputs_2 = 34
        self.n_outputs_3 = 24
        self.n_outputs_4 = 14

        self.fc = nn.Sequential(nn.Linear(input_size, self.n_outputs_1),       # Fully Connected Layer: 54 -> 44.
                   nn.ReLU(inplace=True),                         # ReLU Activation Layer.
                   #nn.Dropout(),                                  # Dropout

                   nn.Linear(self.n_outputs_1, self.n_outputs_2), # Fully Connected Layer: 44 -> 34.
                   nn.ReLU(inplace=True),                         # ReLU Activation Layer.
                   #nn.Dropout(),                                  # Dropout

                   nn.Linear(self.n_outputs_2, self.n_outputs_3), # Fully Connected Layer: 34 -> 24.
                   nn.ReLU(inplace=True),                         # ReLU Activation Layer.
                   #nn.Dropout(),                                  # Dropout

                   nn.Linear(self.n_outputs_3, self.n_outputs_4), # Fully Connected Layer: 24 -> 14.
                   nn.ReLU(inplace=True),                         # ReLU Activation Layer.
                   #nn.Dropout(),                                  # Dropout

                   nn.Linear(self.n_outputs_4, n_classes))        # Fully Connected Layer: 14 -> 7.

    # Forward function.
    def forward(self, x):
        # print('input', x.size()) # Comment this line for less text during execution.
        out = self.fc(x)
        # print('out', out.size()) # Comment this line for less text during execution.
        
        # Returning output.
        return out

Data Loading and Pre-processing

# Test if GPU is avaliable, if not, use cpu instead
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n = torch.cuda.device_count()
devices_ids = list(range(n))
device

# baixando o dataset
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00542/log2.csv

# Leitura dos dados
df = pd.read_csv('log2.csv')
df.head(10)

# Transformando a classe (Action) para valores
df['Action'] = df['Action'].astype('category')
df['Action'] = df['Action'].cat.codes
df.head(10)

# Transformando em um numpy array
full_data = df.to_numpy(dtype=np.float32)
print(full_data.shape)

# separando o label da base de dados
X, y = np.delete(full_data, 4, 1), full_data[:, 4]  
print(X.shape)
print(y.shape)

# dividindo em treino e teste
train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size=0.33, random_state=42)

# número de classes do problema
n_classes = len(np.unique(y))

Training and Validation

train_set = CustomDataset(train_features, train_labels, n_classes)
test_set = CustomDataset(test_features, test_labels, n_classes)

print('Amostras de treino: ' + str(len(train_set)) + '\nAmostras de Teste: ' + str(len(test_set)))

train_loader = DataLoader(train_set,  batch_size=100, shuffle=True, num_workers=2)
test_loader = DataLoader(test_set, batch_size=100,  shuffle=False, num_workers=2)

# número de features de entrada
input_size = 54

# Instanciando a rede MLP
model = CustomNetwork(input_size, n_classes)
model.to(device)

# parâmetros: número de epochs e learning rate (ou taxa de aprendizado)
num_epochs = 5
lr = 0.001

# função de custo (ou loss)
criterion =  nn.CrossEntropyLoss().cuda()

# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)


training_metrics = list() # List for accuracies in training procedure.
test_metrics = list() # List for accuracies in test procedure.

# Iterating over epochs.
for ep in range(num_epochs):
    
    print('##############################################')
    print('Starting epoch ' + str(ep + 1) + '/' + str(num_epochs) + '...')
    
    #####################################################################
    # Training Procedure. ###############################################
    #####################################################################
    
    print('    Training...')
    
    # Setting model to training mode.
    model.train()
    
    # Iterating over training batches.
    for it, data in enumerate(train_loader):

        # Obtaining data and labels for batch.
        inps, labs = data
        
        # GPU casting. In CPU version comment the following two lines.
        inps = inps.cuda()
        labs = labs.cuda()
        
        # Zeroing optimizer.
        optimizer.zero_grad()
        
        # Forwarding inps through NN.
        output = model(inps)
        
        # Computing loss according to network prediction for batch and targets.
        loss = criterion(output, labs)
        
        # Backpropagating loss.
        loss.backward() # All backward pass is computed from this line automatically by package torch.autograd.
        
        # Taking optimization step (updating NN weights).
        optimizer.step()
        
        # Appending metric for batch.
        training_metrics.append(loss.mean())

    #####################################################################
    # Testing Procedure.  ###############################################
    #####################################################################
    
    print('    Testing...')
    
    # Setting model to evaluation mode.
    model.eval()

    with torch.no_grad():

        label_list = list()
        output_list = list()

        # Iterating over test batches.
        for it, data in enumerate(test_loader):
            
            # Obtaining images and labels for batch.
            inps, labs = data
            
            # GPU casting. In CPU version comment the following line.
            inps = inps.cuda()
            labs = labs.cuda()
            
            # Forwarding inps through NN.
            output = model(inps)
            
            # Computing loss according to network prediction for batch and targets.
            loss = criterion(output, labs)

            # Appending metric for batch.
            test_metrics.append(loss.mean())

            # Getting labels and predictions from last epoch.
            label_list += labs.cpu().numpy().tolist()
            output_list += output.max(1)[1].cpu().numpy().tolist()

        label_array = np.asarray(label_list, dtype=np.int).ravel()
        output_array = np.asarray(output_list, dtype=np.int).ravel()

        print('Epoch: %d, Accuracy: %.2f%%' % (ep + 1, 100.0 * np.sum(label_array == output_array) / float(label_array.shape[0])))

No answers

Browser other questions tagged python machine-learning deep-learning

You are not signed in. Login or sign up in order to post.