UTHASH

2022.10.30

发布于未知归属地

C 学习分享

简介

C语言中没有hash table，UTHASH提供了一个用宏编写的开源hash table。

支持语法

add/replace

find

delete

count

iterate

sort

定义hash structure

在uthash中，hash table由结构体组成。

#include "uthash.h"

struct my_struct {
    int id;                    /* key */
    char name[10];
    UT_hash_handle hh;         /* makes this structure hashable */
};

hash操作

定义hash table

定义hash table时必须将其初始化为NULL指针。

struct my_struct *users = NULL;    /* important! initialize to NULL */

Add item

向hash table中添加新元素时，要保证键值唯一，因此需要先检查hash table中键值是否已经存在。

void add_user(int user_id, char *name) {
    struct my_struct *s;
    HASH_FIND_INT(users, &user_id, s);  /* id already in the hash? */
    if (s == NULL) {
      s = (struct my_struct *)malloc(sizeof *s);
      s->id = user_id;
      HASH_ADD_INT(users, id, s);  /* id: name of key field */
    }
    strcpy(s->name, name);
}

Find item

hash table中不存在该键值时返回NULL。

struct my_struct *find_user(int user_id) {
    struct my_struct *s;

    HASH_FIND_INT(users, &user_id, s);  /* s: output pointer */
    return s;
}

Delete item

注意HASH_DEL只是将该键值从hash table中移除，并没有释放内存。

void delete_user(struct my_struct *user) {
    HASH_DEL(users, user);  /* user: pointer to deletee */
    free(user);             /* optional; it's up to you! */
}

Delete all items from a hash -- Iterate

void delete_all() {
  struct my_struct *current_user, *tmp;

  HASH_ITER(hh, users, current_user, tmp) {
    HASH_DEL(users, current_user);  /* delete; users advances to next */
    free(current_user);             /* optional- if you want to free  */
  }
}

All-at-once deletion

HASH_CLEAR(hh, users);

Count items

unsigned int num_users;
num_users = HASH_COUNT(users);
printf("there are %u users\n", num_users);

Iterating and sorting

Iterating over all the items in a hash

void print_users() {
    struct my_struct *s;

    for (s = users; s != NULL; s = s->hh.next) {
        printf("user id %d: name %s\n", s->id, s->name);
    }
}

还可使用hh.prev向后遍历hash table，因此uthash也是一个双向链表。

Deletion-safe iteration

struct my_struct *s, *tmp;
HASH_ITER(hh, users, s, tmp) {
    printf("user id %d: name %s\n", s->id, s->name);
    /* ... it is safe to delete and free s here */
}

Sorting

HASH_SORT(users, name_sort);

int sort_function(void *a, void *b) {
  /* compare a to b (cast a and b appropriately)
   * return (int) -1 if (a < b)
   * return (int)  0 if (a == b)
   * return (int)  1 if (a > b)
   */
}

int by_name(const struct my_struct *a, const struct my_struct *b) {
    return strcmp(a->name, b->name);
}

int by_id(const struct my_struct *a, const struct my_struct *b) {
    return (a->id - b->id);
}

void sort_by_name() {
    HASH_SORT(users, by_name);
}

void sort_by_id() {
    HASH_SORT(users, by_id);
}

Standard key types

几乎可以使用所有类型，整数、字符串、指针和结构体等。
HASH_DELETE、HASH_SORT不区分类型。

Integer keys

HASH_ADD_INT
HASH_FIND_INT

String keys

char[] 和 char* 是不同的。char[]意味着键值存放在结构体内部，char*意味着键值存放在结构体外部。
char[] 使用HASH_ADD_STR
char * 使用HASH_ADD_KEYPTR

Pointer keys

指针本身是一个键值，区别于指针指向的内容是一个键值（如上文的char*）
HASH_FIND_PTR
HASH_ADD_PTR

Structure keys

HASH_ADD
HASH_FIND

A key which is a structure

#include <stdlib.h>
#include <stdio.h>
#include "uthash.h"

typedef struct {
  char a;
  int b;
} record_key_t;

typedef struct {
    record_key_t key;
    /* ... other data ... */
    UT_hash_handle hh;
} record_t;

int main(int argc, char *argv[]) {
    record_t l, *p, *r, *tmp, *records = NULL;

    r = (record_t *)malloc(sizeof *r);
    memset(r, 0, sizeof *r);
    r->key.a = 'a';
    r->key.b = 1;
    HASH_ADD(hh, records, key, sizeof(record_key_t), r);

    memset(&l, 0, sizeof(record_t));
    l.key.a = 'a';
    l.key.b = 1;
    HASH_FIND(hh, records, &l.key, sizeof(record_key_t), p);

    if (p) printf("found %c %d\n", p->key.a, p->key.b);

    HASH_ITER(hh, records, p, tmp) {
      HASH_DEL(records, p);
      free(p);
    }
    return 0;
}

链接

官方文档