分享|apply KMP
63
2025.12.11
2025.12.11
发布于 广东

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <ctype.h>

#define MAX_LINE_LEN 1024

#define MAX_PATTERNS 100

#define MAX_WORD_LEN 100

// 链表节点结构体

typedef struct LineNode {

int line_number;

struct LineNode *next;

} LineNode;

// 模式串信息结构体

typedef struct {

char pattern[MAX_WORD_LEN];

int count;

int *next; // KMP的next数组

LineNode *lines_head;

LineNode *lines_tail;

} PatternInfo;

// 函数声明

void add_line_number(PatternInfo *pattern_info, int line_num);

void free_patterns(PatternInfo patterns[], int pattern_count);

void print_results(PatternInfo patterns[], int pattern_count);

int is_word_char(char c);

void compute_next_array(char *pattern, int pattern_len, int next[]);

int kmp_search(char *line, char *pattern, int line_len, int pattern_len, int next[], int line_num, PatternInfo *pattern_info);

int main() {

char filename[256];

char line[MAX_LINE_LEN];

PatternInfo patterns[MAX_PATTERNS];

int pattern_count = 0;

int line_num = 1;

FILE *file;

printf("=== 文学研究助手(KMP算法版) ===\n\n");

// 1. 输入要搜索的模式串

printf("请输入要搜索的单词(输入空行结束):\n");

while (pattern_count < MAX_PATTERNS) {

printf("模式 %d: ", pattern_count + 1);

fgets(patterns[pattern_count].pattern, MAX_WORD_LEN, stdin);

patterns[pattern_count].pattern[strcspn(patterns[pattern_count].pattern, "\n")] = '\0';

if (patterns[pattern_count].pattern[0] == '\0') {

break;

}

patterns[pattern_count].count = 0;

patterns[pattern_count].lines_head = NULL;

patterns[pattern_count].lines_tail = NULL;

// 为next数组分配内存

int pattern_len = strlen(patterns[pattern_count].pattern);

patterns[pattern_count].next = (int*)malloc((pattern_len + 1) * sizeof(int));

compute_next_array(patterns[pattern_count].pattern, pattern_len,

patterns[pattern_count].next);

pattern_count++;

}

if (pattern_count == 0) {

printf("错误:没有输入任何模式串\n");

return 1;

}

// 2. 输入文件名

printf("\n请输入要分析的文件名: ");

fgets(filename, sizeof(filename), stdin);

filename[strcspn(filename, "\n")] = '\0';

// 3. 打开文件

file = fopen(filename, "r");

if (file == NULL) {

printf("错误:无法打开文件 %s\n", filename);

// 释放next数组内存

for (int i = 0; i < pattern_count; i++) {

free(patterns[i].next);

}

return 1;

}

printf("\n开始分析文件...\n");

// 4. 逐行读取文件并分析

while (fgets(line, sizeof(line), file) != NULL) {

int line_len = strlen(line);

// 对于每一行,使用KMP算法搜索所有模式串

for (int p = 0; p < pattern_count; p++) {

char *pattern = patterns[p].pattern;

int pattern_len = strlen(pattern);

// 使用KMP算法搜索

int matches = kmp_search(line, pattern, line_len, pattern_len,

patterns[p].next, line_num, &patterns[p]);

patterns[p].count += matches;

}

line_num++;

}

// 5. 关闭文件

fclose(file);

// 6. 输出结果

printf("\n===== 统计结果 =====\n");

print_results(patterns, pattern_count);

// 7. 释放内存

free_patterns(patterns, pattern_count);

return 0;

}

// 计算KMP算法的next数组

void compute_next_array(char *pattern, int pattern_len, int next[]) {

int i = 0;

int j = -1;

next[0] = -1;

while (i < pattern_len) {

if (j == -1 || pattern[i] == pattern[j]) {

i++;

j++;

next[i] = j;

} else {

j = next[j];

}

}

}

// 使用KMP算法搜索

int kmp_search(char *line, char *pattern, int line_len, int pattern_len,

int next[], int line_num, PatternInfo *pattern_info) {

int i = 0; // 行索引

int j = 0; // 模式串索引

int found = 0;

int line_added = 0; // 标记是否已添加行号

while (i < line_len) {

if (j == -1 || line[i] == pattern[j]) {

i++;

j++;

} else {

j = next[j];

}

// 如果找到匹配

if (j == pattern_len) {

// 检查是否是完整单词

int start_pos = i - pattern_len;

int is_word_start = (start_pos == 0 || !is_word_char(line[start_pos - 1]));

int is_word_end = (i >= line_len || !is_word_char(line[i]));

if (is_word_start && is_word_end) {

found++;

// 如果是第一次在这一行找到,记录行号

if (!line_added) {

add_line_number(pattern_info, line_num);

line_added = 1;

}

}

// 继续搜索下一个匹配

j = next[j];

}

}

return found;

}

// 判断字符是否是单词字符

int is_word_char(char c) {

return isalpha(c) || isdigit(c) || c == '_';

}

// 添加行号到链表

void add_line_number(PatternInfo *pattern_info, int line_num) {

LineNode *new_node = (LineNode *)malloc(sizeof(LineNode));

new_node->line_number = line_num;

new_node->next = NULL;

if (pattern_info->lines_head == NULL) {

pattern_info->lines_head = new_node;

pattern_info->lines_tail = new_node;

} else {

pattern_info->lines_tail->next = new_node;

pattern_info->lines_tail = new_node;

}

}

// 打印结果

void print_results(PatternInfo patterns[], int pattern_count) {

for (int i = 0; i < pattern_count; i++) {

printf("\n单词: \"%s\"\n", patterns[i].pattern);

printf("出现次数: %d\n", patterns[i].count);

if (patterns[i].count > 0) {

printf("出现在行号: ");

LineNode *current = patterns[i].lines_head;

while (current != NULL) {

printf("%d", current->line_number);

if (current->next != NULL) {

printf(", ");

}

current = current->next;

}

printf("\n");

} else {

printf("未找到\n");

}

}

}

// 释放内存

void free_patterns(PatternInfo patterns[], int pattern_count) {

for (int i = 0; i < pattern_count; i++) {

// 释放链表

LineNode *current = patterns[i].lines_head;

while (current != NULL) {

LineNode *temp = current;

current = current->next;

free(temp);

}

// 释放next数组

free(patterns[i].next);

}

}

评论 (0)