1
I received a code implementing Dijkstra in C, and my mission is to optimize this code. The code is this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NUM_NODES 1000
#define NONE 9999
struct _NODE
{
int iDist;
int iPrev;
};
typedef struct _NODE NODE;
struct _QITEM
{
int iNode;
int iDist;
int iPrev;
struct _QITEM *qNext;
};
typedef struct _QITEM QITEM;
QITEM *qHead = NULL;
short int AdjMatrix[NUM_NODES][NUM_NODES];
int g_qCount = 0;
NODE rgnNodes[NUM_NODES];
int ch;
int iPrev, iNode;
int i, iCost, iDist;
void print_path (NODE *rgnNodes, int chNode)
{
if (rgnNodes[chNode].iPrev != NONE)
{
print_path(rgnNodes, rgnNodes[chNode].iPrev);
}
printf (" %d", chNode);
fflush(stdout);
}
void enqueue (int iNode, int iDist, int iPrev)
{
QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));
QITEM *qLast = qHead;
if (!qNew)
{
fprintf(stderr, "Out of memory.\n");
exit(1);
}
qNew->iNode = iNode;
qNew->iDist = iDist;
qNew->iPrev = iPrev;
qNew->qNext = NULL;
if (!qLast)
{
qHead = qNew;
}
else
{
while (qLast->qNext) qLast = qLast->qNext;
qLast->qNext = qNew;
}
g_qCount++;
// ASSERT(g_qCount);
}
void dequeue (int *piNode, int *piDist, int *piPrev)
{
QITEM *qKill = qHead;
if (qHead)
{
// ASSERT(g_qCount);
*piNode = qHead->iNode;
*piDist = qHead->iDist;
*piPrev = qHead->iPrev;
qHead = qHead->qNext;
free(qKill);
g_qCount--;
}
}
int qcount (void)
{
return(g_qCount);
}
int dijkstra(int chStart, int chEnd)
{
for (ch = 0; ch < NUM_NODES; ch++)
{
rgnNodes[ch].iDist = NONE;
rgnNodes[ch].iPrev = NONE;
}
if (chStart == chEnd)
{
printf("Shortest path is 0 in cost. Just stay where you are.\n");
}
else
{
rgnNodes[chStart].iDist = 0;
rgnNodes[chStart].iPrev = NONE;
enqueue (chStart, 0, NONE);
while (qcount() > 0)
{
dequeue (&iNode, &iDist, &iPrev);
for (i = 0; i < NUM_NODES; i++)
{
if ((iCost = AdjMatrix[iNode][i]) != NONE)
{
if ((NONE == rgnNodes[i].iDist) ||
(rgnNodes[i].iDist > (iCost + iDist)))
{
rgnNodes[i].iDist = iDist + iCost;
rgnNodes[i].iPrev = iNode;
enqueue (i, iDist + iCost, iNode);
}
}
}
}
printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
printf("Path is: ");
print_path(rgnNodes, chEnd);
printf("\n");
}
}
int main(int argc, char *argv[]) {
int i,j,k;
FILE *fp;
char line[2048];
char* token;
if (argc<2) {
fprintf(stderr, "Usage: dijkstra <filename>\n");
fprintf(stderr, "Only supports matrix size is #define'd.\n");
}
/* open the adjacency matrix file */
fp = fopen (argv[1],"r");
/*
// make a fully connected matrix
for (i=0;i<NUM_NODES;i++) {
for (j=0;j<NUM_NODES;j++) {
// make it more sparce
fscanf(fp,"%d",&k);
AdjMatrix[i][j]= k;
}
}
*/
for (i=0;i<1000;i++) {
for (j=0;j<1000;j++) {
AdjMatrix[i][j] = NONE;
}
}
while (fgets(line, sizeof(line), fp)) {
token = strtok(line," ");
i = atoi(token);
token = strtok(NULL, " \n");
while (token) {
j = atoi(token);
AdjMatrix[i][j]= 1;
token = strtok(NULL, " \n");
}
}
// nao mudar esta lista de caminhos a serem pesquisados
for (i=0,j=50;i<50;i++,j--) {
printf("Path %d => %d\n",i,j);
dijkstra(i,j);
}
for (i=50,j=100;i<100;i++,j--) {
printf("Path %d => %d\n",i,j);
dijkstra(i,j);
}
/*
for (i=0;i<300;i++) {
for (j=0;j<300;j++) {
printf("%d ",AdjMatrix[i][j]);
}
printf("\n");
}
*/
/*
printf("Path %d => %d\n",0,13);
dijkstra(0,13);
printf("Path %d => %d\n",1,77);
dijkstra(1,77);
*/
printf("Path %d => %d\n",51,99);
dijkstra(51,99);
exit(0);
}
Well, I did some optimizations, which are not a big deal, swap int for short int, change NUM_NODES to 300 because I know that the test files have 300 nodes, decrease the size of the char array that reads the lines because I know that a line does not have 2048 characters, add a pointer to the last element of the queue and change the order of the struct attributes, but the main optimization, and that I thought would drastically change the number of cache Isses, only got worse. In the given matrix code is declared as: int Adjmatrix[NUM_NODES][NUM_NODES]; I have been researching, and allocating in memory this matrix would be much more cache-friendly due to the contiguity of the addresses, and the same goes for the Array rgnNodes, my code was like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NUM_NODES 300
#define NONE 9999
struct _NODE
{
short int iDist;
short int iPrev;
};
typedef struct _NODE NODE;
struct _QITEM
{
struct _QITEM *qNext;
short int iNode;
short int iDist;
short int iPrev;
};
typedef struct _QITEM QITEM;
short int *mat;
QITEM *qHead = NULL;
QITEM *qTail = NULL;
short int g_qCount = 0;
NODE *rgnNodes;
short int ch;
short int iPrev, iNode;
short int i, iDist;
int offset;
void print_path (NODE *rgnNodes, short int chNode)
{
if (rgnNodes[chNode].iPrev != NONE)
{
print_path(rgnNodes, rgnNodes[chNode].iPrev);
}
printf (" %d", chNode);
fflush(stdout);
}
void enqueue (short int iNode, short int iDist, short int iPrev)
{
QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));
if (!qNew)
{
fprintf(stderr, "Out of memory.\n");
exit(1);
}
qNew->iNode = iNode;
qNew->iDist = iDist;
qNew->iPrev = iPrev;
qNew->qNext = NULL; //qNew recebe os parametros
if (!qHead)
{
qHead = qTail = qNew;
}
else
{
qTail->qNext = qNew;
qTail=qNew;
}
g_qCount++;
// ASSERT(g_qCount);
}
void dequeue (short int *piNode, short int *piDist, short int *piPrev)
{
QITEM *qKill = qHead;
if (qHead)
{
// ASSERT(g_qCount);
*piNode = qHead->iNode;
*piDist = qHead->iDist;
*piPrev = qHead->iPrev;
qHead = qHead->qNext;
free(qKill);
g_qCount--;
}
}
short int qcount (void)
{
return(g_qCount);
}
int dijkstra(short int chStart, short int chEnd)
{
for (ch = 0; ch < NUM_NODES; ch++)
{
rgnNodes[ch].iDist = NONE;
rgnNodes[ch].iPrev = NONE;
}
if (chStart == chEnd)
{
printf("Shortest path is 0 in cost. Just stay where you are.\n");
}
else
{
rgnNodes[chStart].iDist = 0;
rgnNodes[chStart].iPrev = NONE;
enqueue (chStart, 0, NONE);
while (qcount() > 0)
{
dequeue (&iNode, &iDist, &iPrev);
for (i = 0; i < NUM_NODES; i++)
{
offset= iNode * NUM_NODES + i;
if (mat[offset] != NONE && ((NONE == rgnNodes[i].iDist) || (rgnNodes[i].iDist > (1 + iDist))))
{
rgnNodes[i].iDist = iDist + 1;
rgnNodes[i].iPrev = iNode;
enqueue (i, iDist + 1, iNode);
}
}
}
printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
printf("Path is: ");
print_path(rgnNodes, chEnd);
printf("\n");
}
}
int main(short int argc, char *argv[]) {
short int i,j,k;
FILE *fp;
char line[512];
char* token;
mat=(short int *)malloc(NUM_NODES * NUM_NODES * sizeof(short int));
rgnNodes = (NODE *)malloc(NUM_NODES * sizeof(NODE));
if (argc<2) {
fprintf(stderr, "Usage: dijkstra <filename>\n");
fprintf(stderr, "Only supports matrix size is #define'd.\n");
}
/* open the adjacency matrix file */
fp = fopen (argv[1],"r");
/*
// make a fully connected matrix
for (i=0;i<NUM_NODES;i++) {
for (j=0;j<NUM_NODES;j++) {
// make it more sparce
fscanf(fp,"%d",&k);
mat[i][j]= k;
}
}
*/
for (i=0;i<300;i++) {
for (j=0;j<300;j++) {
offset= i * NUM_NODES + j;
mat[offset]=NONE;
}
}
while (fgets(line, sizeof(line), fp)) { //completa a matriz com as devidas adjacencias
token = strtok(line," ");
i = atoi(token);
token = strtok(NULL, " \n");
while (token) {
j = atoi(token);
offset=i*NUM_NODES+j;
mat[offset]= 1;
token = strtok(NULL, " \n");
}
}
// nao mudar esta lista de caminhos a serem pesquisados
for (i=0,j=50;i<50;i++,j--) {
printf("Path %d => %d\n",i,j);
dijkstra(i,j);
}
for (i=50,j=100;i<100;i++,j--) {
printf("Path %d => %d\n",i,j);
dijkstra(i,j);
}
/*
for (i=0;i<300;i++) {
for (j=0;j<300;j++) {
printf("%d ",mat[i][j]);
}
printf("\n");
}
*/
/*
printf("Path %d => %d\n",0,13);
dijkstra(0,13);
printf("Path %d => %d\n",1,77);
dijkstra(1,77);
*/
printf("Path %d => %d\n",51,99);
dijkstra(51,99);
exit(0);
}
It is important to mention that tests are being performed on Valgrind with the following configuration:
Valgrind --tool=cachegrind --I1=16384,1,32 --D1=16384,1,32 --LL=16384,1,32 . /dijkstra_large test300.adjlist
i.e., L1 data and instruction = 16KB, associativity 1, 32byte line size L2 = 16KB, associativity 1, line size 32bytes
And of course, the code that I optimized is better than the original, but matrix optimization and the array made the code worse, and I’d like to know why. Also, if anyone has any optimization tips I would be very grateful.
I can’t change cache settings