Given a list of words, return the size of each descending anagram group

Asked

Viewed 542 times

1

I have a file with a list of words, each in a line of the txt. I need to group those that are anagrams and return the size of the group, in descending order. Follow an example:

  • cat, bed, aeds, drop, Saed, toga, stretcher

The grouping would be:

  • (cat, drop, toga), (bed, litter), (aeds, Saed)

And your program should return the size of each group in descending order:

3, 2, 2, 2.

I need to encode in c. I read about and saw some problems of anagrams and solutions using hashMap.

My code is like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>

char *pegaPalavra(const char *palavra, char *wbuf)
{
    char *p1, *p2, *fimPalavra;
    char t;
    int trocas;

    strcpy(wbuf, palavra);
    fimPalavra = wbuf+strlen(wbuf);
    do {
       trocas = 0;
       p1 = wbuf; p2 = fimPalavra-1;
       while (p1<p2) {
          if (*p2 > *p1) {
             t = *p2; *p2 = *p1; *p1 = t;
             trocas = 1;
          }
          p1++; p2--;
       }
       p1 = wbuf; p2 = p1+1;
       while(p2 < fimPalavra) {
           if (*p2 > *p1) {
             t = *p2; *p2 = *p1; *p1 = t;
             trocas = 1;
           }
           p1++; p2++;
       }
    } while (trocas);
    return wbuf;
}

static
short cxmap[] = {
    0x06, 0x1f, 0x4d, 0x0c, 0x5c, 0x28, 0x5d, 0x0e, 0x09, 0x33, 0x31, 0x56,
    0x52, 0x19, 0x29, 0x53, 0x32, 0x48, 0x35, 0x55, 0x5e, 0x14, 0x27, 0x24,
    0x02, 0x3e, 0x18, 0x4a, 0x3f, 0x4c, 0x45, 0x30, 0x08, 0x2c, 0x1a, 0x03,
    0x0b, 0x0d, 0x4f, 0x07, 0x20, 0x1d, 0x51, 0x3b, 0x11, 0x58, 0x00, 0x49,
    0x15, 0x2d, 0x41, 0x17, 0x5f, 0x39, 0x16, 0x42, 0x37, 0x22, 0x1c, 0x0f,
    0x43, 0x5b, 0x46, 0x4b, 0x0a, 0x26, 0x2e, 0x40, 0x12, 0x21, 0x3c, 0x36,
    0x38, 0x1e, 0x01, 0x1b, 0x05, 0x4e, 0x44, 0x3d, 0x04, 0x10, 0x5a, 0x2a,
    0x23, 0x34, 0x25, 0x2f, 0x2b, 0x50, 0x3a, 0x54, 0x47, 0x59, 0x13, 0x57,
   };
#define CXMAP_SIZE (sizeof(cxmap)/sizeof(short))


int Str_Hash( const char *key, int ix_max )
{
   const char *cp;
   short mash;
   int  hash = 33501551;
   for (cp = key; *cp; cp++) {
      mash = cxmap[*cp % CXMAP_SIZE];
      hash = (hash >>4) ^ 0x5C5CF5C ^ ((hash<<1) + (mash<<5));
      hash &= 0x3FFFFFFF;
      }
   return  hash % ix_max;
}

typedef struct sDictpalavra  *Dictpalavra;
struct sDictpalavra {
    const char *palavra;
    Dictpalavra next;
};

typedef struct sHashEntry *HashEntry;
struct sHashEntry {
    const char *key;
    HashEntry next;
    Dictpalavra  palavras;
    HashEntry link;
    short palavraCount;
};

#define HT_SIZE 8192

HashEntry hashTable[HT_SIZE];

HashEntry mostPerms = NULL;

int buildAnagrams( FILE *fin )
{
    char buffer[40];
    char bufr2[40];
    char *hkey;
    int hix;
    HashEntry he, *hep;
    Dictpalavra  we;
    int  maxPC = 2;
    int numpalavras = 0;

    while ( fgets(buffer, 40, fin)) {
        for(hkey = buffer; *hkey && (*hkey!='\n'); hkey++);
        *hkey = 0;
        hkey = pegaPalavra(buffer, bufr2);
        hix = Str_Hash(hkey, HT_SIZE);
        he = hashTable[hix]; hep = &hashTable[hix];
        while( he && strcmp(he->key , hkey) ) {
            hep = &he->next;
            he = he->next;
        }
        if ( ! he ) {
            he = malloc(sizeof(struct sHashEntry));
            he->next = NULL;
            he->key = strdup(hkey);
            he->palavraCount = 0;
            he->palavras = NULL;
            he->link = NULL;
            *hep = he;
        }
        we = malloc(sizeof(struct sDictpalavra));
        we->palavra = strdup(buffer);
        we->next = he->palavras;
        he->palavras = we;
        he->palavraCount++;
        if ( maxPC < he->palavraCount) {
            maxPC = he->palavraCount;
            mostPerms = he;
            he->link = NULL;
        }
        else if (maxPC == he->palavraCount) {
            he->link = mostPerms;
            mostPerms = he;
        }

        numpalavras++;
    }
    printf("%d palavras in dictionary max ana=%d\n", numpalavras, maxPC);
    return maxPC;
}


int main( )
{
    HashEntry he;
    Dictpalavra  we;
    FILE *f1;

    f1 = fopen("entrada.txt","r");
    buildAnagrams(f1);
    fclose(f1);

    f1 = fopen("anaout.txt","w");
    //f1 = stdout;

    for (he = mostPerms; he; he = he->link) {
        fprintf(f1,"%d:", he->palavraCount);
        for(we = he->palavras; we; we = we->next) {
            fprintf(f1,"%s, ", we->palavra);
        }
        fprintf(f1, "\n");
    }

    fclose(f1);
    return 0;
}

It just prints the largest anagram. Where specifically would I have to switch to so that it prints all and in descending order?

  • 1

    I don’t understand much of this code (by the way, it helps to separate what code you wrote from what code someone else wrote), but I notice that in the end you’re just iterating on mostPerms, and not about the whole hashtable (or at least that’s what I understood). You need to iterate over the whole hashtable, perhaps by placing the values of the sizes in an array, for example (and then sorting that array).

  • Sorry, I pasted the code twice. It is in Portuguese and English because I had done it in English, but they asked me in Portuguese, so I posted it in the middle of the translation. I will change the iteration and I reply again. It was worth the mgibsonbr/

1 answer

1

As I remember very little of C, I will leave the code in C++ for now and I update :P


Wow, that’s a lot! You know what anagrams have in common? They have the exact same letters. So how can we get there if two words are anagrams?

std::sort(palavra) == std::sort(possivel_anagrama)

We check the ordered strings. If they hit, they are anagrams.

So we ordered each of them:

for(std::string& palavra : lista_de_palavras)
{
     std::sort(palavra)
}

Soon we will have several repeated words. Now just see how many repetitions of each different word occur.

Browser other questions tagged

You are not signed in. Login or sign up in order to post.