1
I have a file with a list of words, each in a line of the txt. I need to group those that are anagrams and return the size of the group, in descending order. Follow an example:
- cat, bed, aeds, drop, Saed, toga, stretcher
The grouping would be:
- (cat, drop, toga), (bed, litter), (aeds, Saed)
And your program should return the size of each group in descending order:
3, 2, 2, 2.
I need to encode in c. I read about and saw some problems of anagrams and solutions using hashMap.
My code is like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
char *pegaPalavra(const char *palavra, char *wbuf)
{
char *p1, *p2, *fimPalavra;
char t;
int trocas;
strcpy(wbuf, palavra);
fimPalavra = wbuf+strlen(wbuf);
do {
trocas = 0;
p1 = wbuf; p2 = fimPalavra-1;
while (p1<p2) {
if (*p2 > *p1) {
t = *p2; *p2 = *p1; *p1 = t;
trocas = 1;
}
p1++; p2--;
}
p1 = wbuf; p2 = p1+1;
while(p2 < fimPalavra) {
if (*p2 > *p1) {
t = *p2; *p2 = *p1; *p1 = t;
trocas = 1;
}
p1++; p2++;
}
} while (trocas);
return wbuf;
}
static
short cxmap[] = {
0x06, 0x1f, 0x4d, 0x0c, 0x5c, 0x28, 0x5d, 0x0e, 0x09, 0x33, 0x31, 0x56,
0x52, 0x19, 0x29, 0x53, 0x32, 0x48, 0x35, 0x55, 0x5e, 0x14, 0x27, 0x24,
0x02, 0x3e, 0x18, 0x4a, 0x3f, 0x4c, 0x45, 0x30, 0x08, 0x2c, 0x1a, 0x03,
0x0b, 0x0d, 0x4f, 0x07, 0x20, 0x1d, 0x51, 0x3b, 0x11, 0x58, 0x00, 0x49,
0x15, 0x2d, 0x41, 0x17, 0x5f, 0x39, 0x16, 0x42, 0x37, 0x22, 0x1c, 0x0f,
0x43, 0x5b, 0x46, 0x4b, 0x0a, 0x26, 0x2e, 0x40, 0x12, 0x21, 0x3c, 0x36,
0x38, 0x1e, 0x01, 0x1b, 0x05, 0x4e, 0x44, 0x3d, 0x04, 0x10, 0x5a, 0x2a,
0x23, 0x34, 0x25, 0x2f, 0x2b, 0x50, 0x3a, 0x54, 0x47, 0x59, 0x13, 0x57,
};
#define CXMAP_SIZE (sizeof(cxmap)/sizeof(short))
int Str_Hash( const char *key, int ix_max )
{
const char *cp;
short mash;
int hash = 33501551;
for (cp = key; *cp; cp++) {
mash = cxmap[*cp % CXMAP_SIZE];
hash = (hash >>4) ^ 0x5C5CF5C ^ ((hash<<1) + (mash<<5));
hash &= 0x3FFFFFFF;
}
return hash % ix_max;
}
typedef struct sDictpalavra *Dictpalavra;
struct sDictpalavra {
const char *palavra;
Dictpalavra next;
};
typedef struct sHashEntry *HashEntry;
struct sHashEntry {
const char *key;
HashEntry next;
Dictpalavra palavras;
HashEntry link;
short palavraCount;
};
#define HT_SIZE 8192
HashEntry hashTable[HT_SIZE];
HashEntry mostPerms = NULL;
int buildAnagrams( FILE *fin )
{
char buffer[40];
char bufr2[40];
char *hkey;
int hix;
HashEntry he, *hep;
Dictpalavra we;
int maxPC = 2;
int numpalavras = 0;
while ( fgets(buffer, 40, fin)) {
for(hkey = buffer; *hkey && (*hkey!='\n'); hkey++);
*hkey = 0;
hkey = pegaPalavra(buffer, bufr2);
hix = Str_Hash(hkey, HT_SIZE);
he = hashTable[hix]; hep = &hashTable[hix];
while( he && strcmp(he->key , hkey) ) {
hep = &he->next;
he = he->next;
}
if ( ! he ) {
he = malloc(sizeof(struct sHashEntry));
he->next = NULL;
he->key = strdup(hkey);
he->palavraCount = 0;
he->palavras = NULL;
he->link = NULL;
*hep = he;
}
we = malloc(sizeof(struct sDictpalavra));
we->palavra = strdup(buffer);
we->next = he->palavras;
he->palavras = we;
he->palavraCount++;
if ( maxPC < he->palavraCount) {
maxPC = he->palavraCount;
mostPerms = he;
he->link = NULL;
}
else if (maxPC == he->palavraCount) {
he->link = mostPerms;
mostPerms = he;
}
numpalavras++;
}
printf("%d palavras in dictionary max ana=%d\n", numpalavras, maxPC);
return maxPC;
}
int main( )
{
HashEntry he;
Dictpalavra we;
FILE *f1;
f1 = fopen("entrada.txt","r");
buildAnagrams(f1);
fclose(f1);
f1 = fopen("anaout.txt","w");
//f1 = stdout;
for (he = mostPerms; he; he = he->link) {
fprintf(f1,"%d:", he->palavraCount);
for(we = he->palavras; we; we = we->next) {
fprintf(f1,"%s, ", we->palavra);
}
fprintf(f1, "\n");
}
fclose(f1);
return 0;
}
It just prints the largest anagram. Where specifically would I have to switch to so that it prints all and in descending order?
I don’t understand much of this code (by the way, it helps to separate what code you wrote from what code someone else wrote), but I notice that in the end you’re just iterating on
mostPerms
, and not about the whole hashtable (or at least that’s what I understood). You need to iterate over the whole hashtable, perhaps by placing the values of the sizes in an array, for example (and then sorting that array).– mgibsonbr
Sorry, I pasted the code twice. It is in Portuguese and English because I had done it in English, but they asked me in Portuguese, so I posted it in the middle of the translation. I will change the iteration and I reply again. It was worth the mgibsonbr/
– Alan H.