Optimization of Cache Memory usage

Asked

Viewed 118 times

1

I received a code implementing Dijkstra in C, and my mission is to optimize this code. The code is this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define NUM_NODES                          1000
#define NONE                               9999

struct _NODE
{
  int iDist;
  int iPrev;
};
typedef struct _NODE NODE;

struct _QITEM
{
  int iNode;
  int iDist;
  int iPrev;
  struct _QITEM *qNext;
};
typedef struct _QITEM QITEM;

QITEM *qHead = NULL;




short int AdjMatrix[NUM_NODES][NUM_NODES];

int g_qCount = 0;
NODE rgnNodes[NUM_NODES];
int ch;
int iPrev, iNode;
int i, iCost, iDist;


void print_path (NODE *rgnNodes, int chNode)
{
  if (rgnNodes[chNode].iPrev != NONE)
    {
      print_path(rgnNodes, rgnNodes[chNode].iPrev);
    }
  printf (" %d", chNode);
  fflush(stdout);
}


void enqueue (int iNode, int iDist, int iPrev)
{
  QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));
  QITEM *qLast = qHead;

  if (!qNew) 
    {
      fprintf(stderr, "Out of memory.\n");
      exit(1);
    }
  qNew->iNode = iNode;
  qNew->iDist = iDist;
  qNew->iPrev = iPrev;
  qNew->qNext = NULL;

  if (!qLast) 
    {
      qHead = qNew;
    }
  else
    {
      while (qLast->qNext) qLast = qLast->qNext;
      qLast->qNext = qNew;
    }
  g_qCount++;
  //               ASSERT(g_qCount);
}


void dequeue (int *piNode, int *piDist, int *piPrev)
{
  QITEM *qKill = qHead;

  if (qHead)
    {
      //                 ASSERT(g_qCount);
      *piNode = qHead->iNode;
      *piDist = qHead->iDist;
      *piPrev = qHead->iPrev;
      qHead = qHead->qNext;
      free(qKill);
      g_qCount--;
    }
}


int qcount (void)
{
  return(g_qCount);
}

int dijkstra(int chStart, int chEnd) 
{



  for (ch = 0; ch < NUM_NODES; ch++)
    {
      rgnNodes[ch].iDist = NONE;
      rgnNodes[ch].iPrev = NONE;
    }

  if (chStart == chEnd) 
    {
      printf("Shortest path is 0 in cost. Just stay where you are.\n");
    }
  else
    {
      rgnNodes[chStart].iDist = 0;
      rgnNodes[chStart].iPrev = NONE;

      enqueue (chStart, 0, NONE);

     while (qcount() > 0)
 {
      dequeue (&iNode, &iDist, &iPrev);
      for (i = 0; i < NUM_NODES; i++)
        {
          if ((iCost = AdjMatrix[iNode][i]) != NONE)
        {
          if ((NONE == rgnNodes[i].iDist) || 
              (rgnNodes[i].iDist > (iCost + iDist)))
            {
              rgnNodes[i].iDist = iDist + iCost;
              rgnNodes[i].iPrev = iNode;
              enqueue (i, iDist + iCost, iNode);
            }
        }
        }
    }

      printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
      printf("Path is: ");
      print_path(rgnNodes, chEnd);
      printf("\n");

    }
}

  int main(int argc, char *argv[]) {
  int i,j,k;
  FILE *fp;
  char line[2048];
  char* token;

  if (argc<2) {
    fprintf(stderr, "Usage: dijkstra <filename>\n");
    fprintf(stderr, "Only supports matrix size is #define'd.\n");
  }

  /* open the adjacency matrix file */
  fp = fopen (argv[1],"r");

   /*
  // make a fully connected matrix 
  for (i=0;i<NUM_NODES;i++) {
    for (j=0;j<NUM_NODES;j++) {
      // make it more sparce 
       fscanf(fp,"%d",&k);
            AdjMatrix[i][j]= k;
    }
 }
  */

  for (i=0;i<1000;i++) {
      for (j=0;j<1000;j++) {
        AdjMatrix[i][j] = NONE;
      }
  }  

   while (fgets(line, sizeof(line), fp)) {
       token = strtok(line," ");
       i = atoi(token);
       token = strtok(NULL, " \n");
        while (token) {
            j = atoi(token);
            AdjMatrix[i][j]= 1;
            token = strtok(NULL, " \n");
        }
    }  


  // nao mudar esta lista de caminhos a serem pesquisados
  for (i=0,j=50;i<50;i++,j--) {
      printf("Path %d => %d\n",i,j);
     dijkstra(i,j);

  }

  for (i=50,j=100;i<100;i++,j--) {
      printf("Path %d => %d\n",i,j);
      dijkstra(i,j);

  }  

/*
  for (i=0;i<300;i++) {
      for (j=0;j<300;j++) {
        printf("%d ",AdjMatrix[i][j]);
      }
      printf("\n");
  }  
*/

/*
  printf("Path %d => %d\n",0,13);
  dijkstra(0,13);
  printf("Path %d => %d\n",1,77);
  dijkstra(1,77);
*/
  printf("Path %d => %d\n",51,99);
  dijkstra(51,99);
  exit(0);


}

Well, I did some optimizations, which are not a big deal, swap int for short int, change NUM_NODES to 300 because I know that the test files have 300 nodes, decrease the size of the char array that reads the lines because I know that a line does not have 2048 characters, add a pointer to the last element of the queue and change the order of the struct attributes, but the main optimization, and that I thought would drastically change the number of cache Isses, only got worse. In the given matrix code is declared as: int Adjmatrix[NUM_NODES][NUM_NODES]; I have been researching, and allocating in memory this matrix would be much more cache-friendly due to the contiguity of the addresses, and the same goes for the Array rgnNodes, my code was like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define NUM_NODES                          300
#define NONE                               9999

struct _NODE
{
  short int iDist;
  short int iPrev;
};
typedef struct _NODE NODE;

struct _QITEM
{
  struct _QITEM *qNext;
  short int iNode;
  short int iDist;
  short int iPrev;
};
typedef struct _QITEM QITEM;

short int *mat;
QITEM *qHead = NULL;
QITEM *qTail = NULL;


short int g_qCount = 0;
NODE *rgnNodes;
short int ch;
short int iPrev, iNode;
short int i, iDist;
int offset;

void print_path (NODE *rgnNodes, short int chNode)
{
  if (rgnNodes[chNode].iPrev != NONE)
    {
      print_path(rgnNodes, rgnNodes[chNode].iPrev);
    }
  printf (" %d", chNode);
  fflush(stdout);
}


void enqueue (short int iNode, short int iDist, short int iPrev)
{
  QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));

  if (!qNew) 
    {
      fprintf(stderr, "Out of memory.\n");
      exit(1);
    }
  qNew->iNode = iNode;
  qNew->iDist = iDist;
  qNew->iPrev = iPrev;
  qNew->qNext = NULL; //qNew recebe os parametros

  if (!qHead)
    {
      qHead = qTail = qNew;

    }
  else
    {
      qTail->qNext = qNew;
      qTail=qNew;
    }
  g_qCount++;
  //               ASSERT(g_qCount);
}


void dequeue (short int *piNode, short int *piDist, short int *piPrev)
{
  QITEM *qKill = qHead;

  if (qHead)
    {
      //                 ASSERT(g_qCount);
      *piNode = qHead->iNode;
      *piDist = qHead->iDist;
      *piPrev = qHead->iPrev;
      qHead = qHead->qNext;
      free(qKill);
      g_qCount--;
    }
}


short int qcount (void)
{
  return(g_qCount);
}

int dijkstra(short int chStart, short int chEnd) 
{



  for (ch = 0; ch < NUM_NODES; ch++)
    {
      rgnNodes[ch].iDist = NONE;
      rgnNodes[ch].iPrev = NONE;
    }

  if (chStart == chEnd) 
    {
      printf("Shortest path is 0 in cost. Just stay where you are.\n");
    }
  else
    {
      rgnNodes[chStart].iDist = 0;
      rgnNodes[chStart].iPrev = NONE;

      enqueue (chStart, 0, NONE);

     while (qcount() > 0)
    {
      dequeue (&iNode, &iDist, &iPrev);
      for (i = 0; i < NUM_NODES; i++)
        {
          offset= iNode * NUM_NODES + i;
          if (mat[offset] != NONE && ((NONE == rgnNodes[i].iDist) || (rgnNodes[i].iDist > (1 + iDist))))  
        {
              rgnNodes[i].iDist = iDist + 1;
              rgnNodes[i].iPrev = iNode; 
              enqueue (i, iDist + 1, iNode);
            }

        }
    }

      printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
      printf("Path is: ");
      print_path(rgnNodes, chEnd);
      printf("\n");

    }
}

int main(short int argc, char *argv[]) {
  short int i,j,k;
  FILE *fp;
  char line[512];
  char* token;
  mat=(short int *)malloc(NUM_NODES * NUM_NODES * sizeof(short int));
  rgnNodes = (NODE *)malloc(NUM_NODES * sizeof(NODE));
  if (argc<2) {
    fprintf(stderr, "Usage: dijkstra <filename>\n");
    fprintf(stderr, "Only supports matrix size is #define'd.\n");
  }

  /* open the adjacency matrix file */
  fp = fopen (argv[1],"r");

/*
  // make a fully connected matrix 
  for (i=0;i<NUM_NODES;i++) {
    for (j=0;j<NUM_NODES;j++) {
      // make it more sparce 
      fscanf(fp,"%d",&k);
            mat[i][j]= k;
    }
  }
  */

  for (i=0;i<300;i++) {
      for (j=0;j<300;j++) {
        offset= i * NUM_NODES + j;
        mat[offset]=NONE;
      }
  }  

   while (fgets(line, sizeof(line), fp)) { //completa a matriz com as devidas adjacencias
       token = strtok(line," ");
       i = atoi(token);
       token = strtok(NULL, " \n");
        while (token) {
            j = atoi(token);
            offset=i*NUM_NODES+j;
            mat[offset]= 1;
            token = strtok(NULL, " \n");
        }
    }  


  // nao mudar esta lista de caminhos a serem pesquisados
  for (i=0,j=50;i<50;i++,j--) {
      printf("Path %d => %d\n",i,j);
      dijkstra(i,j);

  }

  for (i=50,j=100;i<100;i++,j--) {
      printf("Path %d => %d\n",i,j);
      dijkstra(i,j);

  }  

/*
  for (i=0;i<300;i++) {
      for (j=0;j<300;j++) {
        printf("%d ",mat[i][j]);
      }
      printf("\n");
  }  
*/

/*
  printf("Path %d => %d\n",0,13);
  dijkstra(0,13);
  printf("Path %d => %d\n",1,77);
  dijkstra(1,77);
*/
  printf("Path %d => %d\n",51,99);
  dijkstra(51,99);


  exit(0);


}

It is important to mention that tests are being performed on Valgrind with the following configuration:

Valgrind --tool=cachegrind --I1=16384,1,32 --D1=16384,1,32 --LL=16384,1,32 . /dijkstra_large test300.adjlist

i.e., L1 data and instruction = 16KB, associativity 1, 32byte line size L2 = 16KB, associativity 1, line size 32bytes

And of course, the code that I optimized is better than the original, but matrix optimization and the array made the code worse, and I’d like to know why. Also, if anyone has any optimization tips I would be very grateful.

I can’t change cache settings

1 answer

1

TLDR the code ... int is faster than short int; the Elements of an array are contiguous (you do not need to malloc()/free()).

Browser other questions tagged

You are not signed in. Login or sign up in order to post.