#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_LINE_LEN 1024
#define MAX_PATTERNS 100
#define MAX_WORD_LEN 100
// 链表节点结构体
typedef struct LineNode {
int line_number;
struct LineNode *next;
} LineNode;
// 模式串信息结构体
typedef struct {
char pattern[MAX_WORD_LEN];
int count;
int *next; // KMP的next数组
LineNode *lines_head;
LineNode *lines_tail;
} PatternInfo;
// 函数声明
void add_line_number(PatternInfo *pattern_info, int line_num);
void free_patterns(PatternInfo patterns[], int pattern_count);
void print_results(PatternInfo patterns[], int pattern_count);
int is_word_char(char c);
void compute_next_array(char *pattern, int pattern_len, int next[]);
int kmp_search(char *line, char *pattern, int line_len, int pattern_len, int next[], int line_num, PatternInfo *pattern_info);
int main() {
char filename[256];
char line[MAX_LINE_LEN];
PatternInfo patterns[MAX_PATTERNS];
int pattern_count = 0;
int line_num = 1;
FILE *file;
printf("=== 文学研究助手(KMP算法版) ===\n\n");
// 1. 输入要搜索的模式串
printf("请输入要搜索的单词(输入空行结束):\n");
while (pattern_count < MAX_PATTERNS) {
printf("模式 %d: ", pattern_count + 1);
fgets(patterns[pattern_count].pattern, MAX_WORD_LEN, stdin);
patterns[pattern_count].pattern[strcspn(patterns[pattern_count].pattern, "\n")] = '\0';
if (patterns[pattern_count].pattern[0] == '\0') {
break;
}
patterns[pattern_count].count = 0;
patterns[pattern_count].lines_head = NULL;
patterns[pattern_count].lines_tail = NULL;
// 为next数组分配内存
int pattern_len = strlen(patterns[pattern_count].pattern);
patterns[pattern_count].next = (int*)malloc((pattern_len + 1) * sizeof(int));
compute_next_array(patterns[pattern_count].pattern, pattern_len,
patterns[pattern_count].next);
pattern_count++;
}
if (pattern_count == 0) {
printf("错误:没有输入任何模式串\n");
return 1;
}
// 2. 输入文件名
printf("\n请输入要分析的文件名: ");
fgets(filename, sizeof(filename), stdin);
filename[strcspn(filename, "\n")] = '\0';
// 3. 打开文件
file = fopen(filename, "r");
if (file == NULL) {
printf("错误:无法打开文件 %s\n", filename);
// 释放next数组内存
for (int i = 0; i < pattern_count; i++) {
free(patterns[i].next);
}
return 1;
}
printf("\n开始分析文件...\n");
// 4. 逐行读取文件并分析
while (fgets(line, sizeof(line), file) != NULL) {
int line_len = strlen(line);
// 对于每一行,使用KMP算法搜索所有模式串
for (int p = 0; p < pattern_count; p++) {
char *pattern = patterns[p].pattern;
int pattern_len = strlen(pattern);
// 使用KMP算法搜索
int matches = kmp_search(line, pattern, line_len, pattern_len,
patterns[p].next, line_num, &patterns[p]);
patterns[p].count += matches;
}
line_num++;
}
// 5. 关闭文件
fclose(file);
// 6. 输出结果
printf("\n===== 统计结果 =====\n");
print_results(patterns, pattern_count);
// 7. 释放内存
free_patterns(patterns, pattern_count);
return 0;
}
// 计算KMP算法的next数组
void compute_next_array(char *pattern, int pattern_len, int next[]) {
int i = 0;
int j = -1;
next[0] = -1;
while (i < pattern_len) {
if (j == -1 || pattern[i] == pattern[j]) {
i++;
j++;
next[i] = j;
} else {
j = next[j];
}
}
}
// 使用KMP算法搜索
int kmp_search(char *line, char *pattern, int line_len, int pattern_len,
int next[], int line_num, PatternInfo *pattern_info) {
int i = 0; // 行索引
int j = 0; // 模式串索引
int found = 0;
int line_added = 0; // 标记是否已添加行号
while (i < line_len) {
if (j == -1 || line[i] == pattern[j]) {
i++;
j++;
} else {
j = next[j];
}
// 如果找到匹配
if (j == pattern_len) {
// 检查是否是完整单词
int start_pos = i - pattern_len;
int is_word_start = (start_pos == 0 || !is_word_char(line[start_pos - 1]));
int is_word_end = (i >= line_len || !is_word_char(line[i]));
if (is_word_start && is_word_end) {
found++;
// 如果是第一次在这一行找到,记录行号
if (!line_added) {
add_line_number(pattern_info, line_num);
line_added = 1;
}
}
// 继续搜索下一个匹配
j = next[j];
}
}
return found;
}
// 判断字符是否是单词字符
int is_word_char(char c) {
return isalpha(c) || isdigit(c) || c == '_';
}
// 添加行号到链表
void add_line_number(PatternInfo *pattern_info, int line_num) {
LineNode *new_node = (LineNode *)malloc(sizeof(LineNode));
new_node->line_number = line_num;
new_node->next = NULL;
if (pattern_info->lines_head == NULL) {
pattern_info->lines_head = new_node;
pattern_info->lines_tail = new_node;
} else {
pattern_info->lines_tail->next = new_node;
pattern_info->lines_tail = new_node;
}
}
// 打印结果
void print_results(PatternInfo patterns[], int pattern_count) {
for (int i = 0; i < pattern_count; i++) {
printf("\n单词: \"%s\"\n", patterns[i].pattern);
printf("出现次数: %d\n", patterns[i].count);
if (patterns[i].count > 0) {
printf("出现在行号: ");
LineNode *current = patterns[i].lines_head;
while (current != NULL) {
printf("%d", current->line_number);
if (current->next != NULL) {
printf(", ");
}
current = current->next;
}
printf("\n");
} else {
printf("未找到\n");
}
}
}
// 释放内存
void free_patterns(PatternInfo patterns[], int pattern_count) {
for (int i = 0; i < pattern_count; i++) {
// 释放链表
LineNode *current = patterns[i].lines_head;
while (current != NULL) {
LineNode *temp = current;
current = current->next;
free(temp);
}
// 释放next数组
free(patterns[i].next);
}
}