343 lines
10 KiB
C
343 lines
10 KiB
C
|
|
#include "slide_combine_c.h"
|
||
|
|
#include <io.h>
|
||
|
|
|
||
|
|
// 递归查找所有bkmk文件
|
||
|
|
ErrorCode find_bkmk_files(const char* root_path, char*** files, int* count) {
|
||
|
|
if (!root_path || !files || !count) {
|
||
|
|
return ERROR_INVALID_PATH;
|
||
|
|
}
|
||
|
|
|
||
|
|
char search_pattern[MAX_PATH_LENGTH];
|
||
|
|
sprintf_s(search_pattern, sizeof(search_pattern), "%s\\*.*", root_path);
|
||
|
|
|
||
|
|
WIN32_FIND_DATAA find_data;
|
||
|
|
HANDLE hFind = FindFirstFileA(search_pattern, &find_data);
|
||
|
|
|
||
|
|
if (hFind == INVALID_HANDLE_VALUE) {
|
||
|
|
return ERROR_FILE_NOT_FOUND;
|
||
|
|
}
|
||
|
|
|
||
|
|
*files = NULL;
|
||
|
|
*count = 0;
|
||
|
|
int capacity = 10;
|
||
|
|
|
||
|
|
do {
|
||
|
|
// 跳过 . 和 .. 目录
|
||
|
|
if (strcmp(find_data.cFileName, ".") == 0 || strcmp(find_data.cFileName, "..") == 0) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
char full_path[MAX_PATH_LENGTH];
|
||
|
|
sprintf_s(full_path, sizeof(full_path), "%s\\%s", root_path, find_data.cFileName);
|
||
|
|
|
||
|
|
if (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
|
||
|
|
// 递归搜索子目录
|
||
|
|
char** sub_files = NULL;
|
||
|
|
int sub_count = 0;
|
||
|
|
|
||
|
|
ErrorCode result = find_bkmk_files(full_path, &sub_files, &sub_count);
|
||
|
|
if (result == ERROR_NONE && sub_count > 0) {
|
||
|
|
// 扩展文件数组
|
||
|
|
if (*files == NULL) {
|
||
|
|
*files = (char**)malloc(capacity * sizeof(char*));
|
||
|
|
} else if (*count + sub_count >= capacity) {
|
||
|
|
capacity = *count + sub_count + 10;
|
||
|
|
*files = (char**)realloc(*files, capacity * sizeof(char*));
|
||
|
|
}
|
||
|
|
|
||
|
|
// 添加子目录的文件
|
||
|
|
for (int i = 0; i < sub_count; i++) {
|
||
|
|
(*files)[*count] = _strdup(sub_files[i]);
|
||
|
|
(*count)++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放子文件数组
|
||
|
|
for (int i = 0; i < sub_count; i++) {
|
||
|
|
free(sub_files[i]);
|
||
|
|
}
|
||
|
|
free(sub_files);
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// 检查是否为bkmk文件
|
||
|
|
if (strstr(find_data.cFileName, "FreePic2Pdf_bkmk")) {
|
||
|
|
// 扩展文件数组
|
||
|
|
if (*files == NULL) {
|
||
|
|
*files = (char**)malloc(capacity * sizeof(char*));
|
||
|
|
} else if (*count >= capacity) {
|
||
|
|
capacity *= 2;
|
||
|
|
*files = (char**)realloc(*files, capacity * sizeof(char*));
|
||
|
|
}
|
||
|
|
|
||
|
|
(*files)[*count] = _strdup(full_path);
|
||
|
|
(*count)++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} while (FindNextFileA(hFind, &find_data));
|
||
|
|
|
||
|
|
FindClose(hFind);
|
||
|
|
|
||
|
|
return ERROR_NONE;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取基础文件名
|
||
|
|
char* get_base_filename(const char* folder_name) {
|
||
|
|
if (!folder_name) return NULL;
|
||
|
|
|
||
|
|
static char base_name[256];
|
||
|
|
strcpy_s(base_name, sizeof(base_name), folder_name);
|
||
|
|
|
||
|
|
// 查找第一个空格
|
||
|
|
char* space = strchr(base_name, ' ');
|
||
|
|
if (space) {
|
||
|
|
*space = '\0';
|
||
|
|
}
|
||
|
|
|
||
|
|
return base_name;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 按基础文件名分组
|
||
|
|
ErrorCode group_files_by_base_name(char** files, int file_count, FileGroup** groups, int* group_count) {
|
||
|
|
if (!files || file_count <= 0 || !groups || !group_count) {
|
||
|
|
return ERROR_INVALID_PATH;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 临时分组结构
|
||
|
|
typedef struct {
|
||
|
|
char base_name[256];
|
||
|
|
char** file_list;
|
||
|
|
int file_count;
|
||
|
|
int capacity;
|
||
|
|
} TempGroup;
|
||
|
|
|
||
|
|
TempGroup* temp_groups = NULL;
|
||
|
|
int temp_count = 0;
|
||
|
|
int temp_capacity = 10;
|
||
|
|
|
||
|
|
temp_groups = (TempGroup*)malloc(temp_capacity * sizeof(TempGroup));
|
||
|
|
|
||
|
|
for (int i = 0; i < file_count; i++) {
|
||
|
|
char* file = files[i];
|
||
|
|
char* folder_name = strrchr(file, '\\');
|
||
|
|
if (!folder_name) folder_name = file;
|
||
|
|
else folder_name++;
|
||
|
|
|
||
|
|
char* base_name = get_base_filename(folder_name);
|
||
|
|
|
||
|
|
// 查找是否已存在该基础名的组
|
||
|
|
int group_index = -1;
|
||
|
|
for (int j = 0; j < temp_count; j++) {
|
||
|
|
if (strcmp(temp_groups[j].base_name, base_name) == 0) {
|
||
|
|
group_index = j;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 如果不存在,创建新组
|
||
|
|
if (group_index == -1) {
|
||
|
|
if (temp_count >= temp_capacity) {
|
||
|
|
temp_capacity *= 2;
|
||
|
|
temp_groups = (TempGroup*)realloc(temp_groups, temp_capacity * sizeof(TempGroup));
|
||
|
|
}
|
||
|
|
|
||
|
|
group_index = temp_count++;
|
||
|
|
strcpy_s(temp_groups[group_index].base_name, sizeof(temp_groups[group_index].base_name), base_name);
|
||
|
|
temp_groups[group_index].file_list = (char**)malloc(10 * sizeof(char*));
|
||
|
|
temp_groups[group_index].file_count = 0;
|
||
|
|
temp_groups[group_index].capacity = 10;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 添加文件到组中
|
||
|
|
TempGroup* group = &temp_groups[group_index];
|
||
|
|
if (group->file_count >= group->capacity) {
|
||
|
|
group->capacity *= 2;
|
||
|
|
group->file_list = (char**)realloc(group->file_list, group->capacity * sizeof(char*));
|
||
|
|
}
|
||
|
|
|
||
|
|
group->file_list[group->file_count] = _strdup(file);
|
||
|
|
group->file_count++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 对每个组内的文件进行排序
|
||
|
|
for (int i = 0; i < temp_count; i++) {
|
||
|
|
qsort(temp_groups[i].file_list, temp_groups[i].file_count, sizeof(char*), compare_bkmk_files);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 转换为FileGroup结构
|
||
|
|
*groups = (FileGroup*)malloc(temp_count * sizeof(FileGroup));
|
||
|
|
*group_count = temp_count;
|
||
|
|
|
||
|
|
for (int i = 0; i < temp_count; i++) {
|
||
|
|
FileGroup* group = &(*groups)[i];
|
||
|
|
TempGroup* temp_group = &temp_groups[i];
|
||
|
|
|
||
|
|
strcpy_s(group->base_name, sizeof(group->base_name), temp_group->base_name);
|
||
|
|
group->files = temp_group->file_list;
|
||
|
|
group->file_count = temp_group->file_count;
|
||
|
|
group->metadata_docs = NULL;
|
||
|
|
group->metadata_count = 0;
|
||
|
|
group->output_content = NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放临时结构
|
||
|
|
free(temp_groups);
|
||
|
|
|
||
|
|
return ERROR_NONE;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 合并文件组
|
||
|
|
ErrorCode merge_file_group(FileGroup* group, const char* txt_source_path) {
|
||
|
|
if (!group || !txt_source_path) {
|
||
|
|
return ERROR_INVALID_PATH;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 分配内存给元数据文档
|
||
|
|
group->metadata_docs = (DocumentMetadata*)malloc(group->file_count * sizeof(DocumentMetadata));
|
||
|
|
if (!group->metadata_docs) {
|
||
|
|
return ERROR_MEMORY_ALLOCATION;
|
||
|
|
}
|
||
|
|
|
||
|
|
group->metadata_count = 0;
|
||
|
|
|
||
|
|
// 处理每个文件
|
||
|
|
for (int i = 0; i < group->file_count; i++) {
|
||
|
|
char* bkmk_file = group->files[i];
|
||
|
|
|
||
|
|
// 获取对应的TXT文件路径
|
||
|
|
char* folder_name = strrchr(bkmk_file, '\\');
|
||
|
|
if (!folder_name) folder_name = bkmk_file;
|
||
|
|
else folder_name++;
|
||
|
|
|
||
|
|
char txt_file[MAX_PATH_LENGTH];
|
||
|
|
sprintf_s(txt_file, sizeof(txt_file), "%s\\%s.txt", txt_source_path, folder_name);
|
||
|
|
|
||
|
|
// 创建元数据文档
|
||
|
|
DocumentMetadata* metadata = &group->metadata_docs[group->metadata_count];
|
||
|
|
memset(metadata, 0, sizeof(DocumentMetadata));
|
||
|
|
|
||
|
|
// 读取TXT元数据
|
||
|
|
if (PathFileExistsA(txt_file)) {
|
||
|
|
read_metadata_from_txt(txt_file, metadata);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 提取书签
|
||
|
|
if (PathFileExistsA(bkmk_file)) {
|
||
|
|
extract_bookmarks_from_bkmk(bmk_file, metadata->bookmarks, &metadata->bookmark_count);
|
||
|
|
}
|
||
|
|
|
||
|
|
group->metadata_count++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 创建合并后的输出内容
|
||
|
|
ErrorCode result = create_output_content(group->metadata_docs, group->metadata_count, &group->output_content);
|
||
|
|
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 处理所有文件
|
||
|
|
ErrorCode process_all_files(const char* pdf_path, const char* txt_path, FileGroup** groups, int* group_count) {
|
||
|
|
if (!pdf_path || !txt_path || !groups || !group_count) {
|
||
|
|
return ERROR_INVALID_PATH;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 检查路径是否存在
|
||
|
|
if (!PathFileExistsA(pdf_path)) {
|
||
|
|
return ERROR_FILE_NOT_FOUND;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!PathFileExistsA(txt_path)) {
|
||
|
|
return ERROR_FILE_NOT_FOUND;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 查找所有bkmk文件
|
||
|
|
char** bkmk_files = NULL;
|
||
|
|
int bkmk_count = 0;
|
||
|
|
|
||
|
|
ErrorCode result = find_bkmk_files(pdf_path, &bkmk_files, &bkmk_count);
|
||
|
|
if (result != ERROR_NONE) {
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (bmk_count == 0) {
|
||
|
|
if (bmk_files) free(bmk_files);
|
||
|
|
return ERROR_FILE_NOT_FOUND;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 按基础文件名分组
|
||
|
|
result = group_files_by_base_name(bmk_files, bmk_count, groups, group_count);
|
||
|
|
|
||
|
|
// 合并每个文件组
|
||
|
|
if (result == ERROR_NONE) {
|
||
|
|
for (int i = 0; i < *group_count; i++) {
|
||
|
|
merge_file_group(&(*groups)[i], txt_path);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放文件列表内存
|
||
|
|
for (int i = 0; i < bkmk_count; i++) {
|
||
|
|
free(bmk_files[i]);
|
||
|
|
}
|
||
|
|
free(bkm_files);
|
||
|
|
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 保存所有结果
|
||
|
|
ErrorCode save_all_results(FileGroup* groups, int group_count, const char* output_path) {
|
||
|
|
if (!groups || group_count <= 0 || !output_path) {
|
||
|
|
return ERROR_INVALID_PATH;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 确保输出目录存在
|
||
|
|
if (!PathFileExistsA(output_path)) {
|
||
|
|
if (!CreateDirectoryA(output_path, NULL)) {
|
||
|
|
return ERROR_FILE_WRITE;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
int success_count = 0;
|
||
|
|
|
||
|
|
for (int i = 0; i < group_count; i++) {
|
||
|
|
FileGroup* group = &groups[i];
|
||
|
|
|
||
|
|
if (group->output_content && strlen(group->output_content) > 0) {
|
||
|
|
char output_file[MAX_PATH_LENGTH];
|
||
|
|
sprintf_s(output_file, sizeof(output_file), "%s\\%s.txt", output_path, group->base_name);
|
||
|
|
|
||
|
|
ErrorCode result = save_content_to_file(output_file, group->output_content);
|
||
|
|
if (result == ERROR_NONE) {
|
||
|
|
success_count++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return success_count > 0 ? ERROR_NONE : ERROR_FILE_WRITE;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放内存
|
||
|
|
void free_memory(FileGroup* groups, int count) {
|
||
|
|
if (!groups) return;
|
||
|
|
|
||
|
|
for (int i = 0; i < count; i++) {
|
||
|
|
FileGroup* group = &groups[i];
|
||
|
|
|
||
|
|
// 释放文件列表
|
||
|
|
if (group->files) {
|
||
|
|
for (int j = 0; j < group->file_count; j++) {
|
||
|
|
free(group->files[j]);
|
||
|
|
}
|
||
|
|
free(group->files);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放元数据文档
|
||
|
|
if (group->metadata_docs) {
|
||
|
|
free(group->metadata_docs);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放输出内容
|
||
|
|
if (group->output_content) {
|
||
|
|
free(group->output_content);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
free(groups);
|
||
|
|
}
|