343 lines
9.6 KiB
C
343 lines
9.6 KiB
C
|
|
#include "slide_combine_c.h"
|
|||
|
|
#include <locale.h>
|
|||
|
|
|
|||
|
|
// 提取书签从bkmk文件
|
|||
|
|
ErrorCode extract_bookmarks_from_bkmk(const char* filename, BookmarkItem* bookmarks, int* count) {
|
|||
|
|
if (!filename || !bookmarks || !count) {
|
|||
|
|
return ERROR_INVALID_PATH;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
FILE* file = fopen(filename, "rb");
|
|||
|
|
if (!file) {
|
|||
|
|
return ERROR_FILE_NOT_FOUND;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 读取文件内容到缓冲区
|
|||
|
|
fseek(file, 0, SEEK_END);
|
|||
|
|
long file_size = ftell(file);
|
|||
|
|
fseek(file, 0, SEEK_SET);
|
|||
|
|
|
|||
|
|
char* buffer = (char*)malloc(file_size + 1);
|
|||
|
|
if (!buffer) {
|
|||
|
|
fclose(file);
|
|||
|
|
return ERROR_MEMORY_ALLOCATION;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
size_t bytes_read = fread(buffer, 1, file_size, file);
|
|||
|
|
buffer[bytes_read] = '\0';
|
|||
|
|
fclose(file);
|
|||
|
|
|
|||
|
|
// 检测并转换编码
|
|||
|
|
ErrorCode encoding_result = detect_file_encoding(filename, buffer, file_size + 1);
|
|||
|
|
if (encoding_result != ERROR_NONE) {
|
|||
|
|
free(buffer);
|
|||
|
|
return encoding_result;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 按行分割内容
|
|||
|
|
char* line = strtok(buffer, "\r\n");
|
|||
|
|
*count = 0;
|
|||
|
|
|
|||
|
|
while (line && *count < MAX_BOOKMARKS) {
|
|||
|
|
char* trimmed = trim_whitespace(line);
|
|||
|
|
if (strlen(trimmed) > 0) {
|
|||
|
|
// 解析书签行
|
|||
|
|
char* last_space = strrchr(trimmed, ' ');
|
|||
|
|
if (last_space) {
|
|||
|
|
*last_space = '\0';
|
|||
|
|
|
|||
|
|
char* title = trim_whitespace(trimmed);
|
|||
|
|
char* page = trim_whitespace(last_space + 1);
|
|||
|
|
|
|||
|
|
if (strlen(title) > 0 && strlen(page) > 0) {
|
|||
|
|
strcpy_s(bookmarks[*count].title, sizeof(bookmarks[*count].title), title);
|
|||
|
|
strcpy_s(bookmarks[*count].page, sizeof(bookmarks[*count].page), page);
|
|||
|
|
(*count)++;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
line = strtok(NULL, "\r\n");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
free(buffer);
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 从TXT文件读取元数据
|
|||
|
|
ErrorCode read_metadata_from_txt(const char* filename, DocumentMetadata* metadata) {
|
|||
|
|
if (!filename || !metadata) {
|
|||
|
|
return ERROR_INVALID_PATH;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
FILE* file = fopen(filename, "r");
|
|||
|
|
if (!file) {
|
|||
|
|
return ERROR_FILE_NOT_FOUND;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
char line[MAX_BUFFER_SIZE];
|
|||
|
|
|
|||
|
|
// 初始化所有字段为空
|
|||
|
|
for (int i = 0; i < FIELD_COUNT; i++) {
|
|||
|
|
metadata->fields[i][0] = '\0';
|
|||
|
|
}
|
|||
|
|
metadata->bookmark_count = 0;
|
|||
|
|
|
|||
|
|
while (fgets(line, sizeof(line), file)) {
|
|||
|
|
// 移除换行符
|
|||
|
|
line[strcspn(line, "\r\n")] = '\0';
|
|||
|
|
|
|||
|
|
char* trimmed = trim_whitespace(line);
|
|||
|
|
if (strlen(trimmed) == 0) continue;
|
|||
|
|
|
|||
|
|
// 分割键值对
|
|||
|
|
char* separator = strchr(trimmed, ':');
|
|||
|
|
if (!separator) continue;
|
|||
|
|
|
|||
|
|
*separator = '\0';
|
|||
|
|
char* key = trim_whitespace(trimmed);
|
|||
|
|
char* value = trim_whitespace(separator + 1);
|
|||
|
|
|
|||
|
|
// 查找对应的字段
|
|||
|
|
for (int i = 0; i < FIELD_COUNT; i++) {
|
|||
|
|
if (strcmp(key, FIELD_NAMES[i]) == 0) {
|
|||
|
|
strcpy_s(metadata->fields[i], sizeof(metadata->fields[i]), value);
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
fclose(file);
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 创建输出内容
|
|||
|
|
ErrorCode create_output_content(DocumentMetadata* docs, int count, char** output) {
|
|||
|
|
if (!docs || count <= 0 || !output) {
|
|||
|
|
return ERROR_INVALID_PATH;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 计算总长度
|
|||
|
|
int total_length = 0;
|
|||
|
|
for (int i = 0; i < count; i++) {
|
|||
|
|
for (int j = 0; j < FIELD_COUNT; j++) {
|
|||
|
|
total_length += strlen(FIELD_NAMES[j]) + strlen(docs[i].fields[j]) + 10;
|
|||
|
|
}
|
|||
|
|
total_length += strlen("tableOfContents:") + 10;
|
|||
|
|
for (int k = 0; k < docs[i].bookmark_count; k++) {
|
|||
|
|
total_length += strlen(docs[i].bookmarks[k].title) + strlen(docs[i].bookmarks[k].page) + 20;
|
|||
|
|
}
|
|||
|
|
total_length += 100; // 分隔符和缓冲
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 分配内存
|
|||
|
|
char* result = (char*)malloc(total_length + 1);
|
|||
|
|
if (!result) {
|
|||
|
|
return ERROR_MEMORY_ALLOCATION;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
result[0] = '\0';
|
|||
|
|
char* ptr = result;
|
|||
|
|
|
|||
|
|
// 生成内容
|
|||
|
|
for (int i = 0; i < count; i++) {
|
|||
|
|
if (i > 0) {
|
|||
|
|
strcat_s(ptr, total_length - strlen(result), " <>\n");
|
|||
|
|
ptr += strlen(ptr);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 添加元数据字段
|
|||
|
|
for (int j = 0; j < FIELD_COUNT; j++) {
|
|||
|
|
if (strlen(docs[i].fields[j]) > 0) {
|
|||
|
|
sprintf_s(ptr, total_length - strlen(result), "%s:%s\n", FIELD_NAMES[j], docs[i].fields[j]);
|
|||
|
|
ptr += strlen(ptr);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 添加书签目录
|
|||
|
|
if (docs[i].bookmark_count > 0) {
|
|||
|
|
strcat_s(ptr, total_length - strlen(result), "tableOfContents:\n");
|
|||
|
|
ptr += strlen(ptr);
|
|||
|
|
|
|||
|
|
for (int k = 0; k < docs[i].bookmark_count; k++) {
|
|||
|
|
sprintf_s(ptr, total_length - strlen(result), "%s---------------%s<br/>\n",
|
|||
|
|
docs[i].bookmarks[k].title, docs[i].bookmarks[k].page);
|
|||
|
|
ptr += strlen(ptr);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
*output = result;
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 保存内容到文件
|
|||
|
|
ErrorCode save_content_to_file(const char* filename, const char* content) {
|
|||
|
|
if (!filename || !content) {
|
|||
|
|
return ERROR_INVALID_PATH;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
FILE* file = fopen(filename, "wb"); // 二进制写入,确保UTF-8 BOM正确
|
|||
|
|
if (!file) {
|
|||
|
|
return ERROR_FILE_WRITE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 写入UTF-8 BOM
|
|||
|
|
const unsigned char bom[] = {0xEF, 0xBB, 0xBF};
|
|||
|
|
fwrite(bom, 1, 3, file);
|
|||
|
|
|
|||
|
|
// 写入内容
|
|||
|
|
size_t content_len = strlen(content);
|
|||
|
|
size_t written = fwrite(content, 1, content_len, file);
|
|||
|
|
|
|||
|
|
fclose(file);
|
|||
|
|
|
|||
|
|
if (written != content_len) {
|
|||
|
|
return ERROR_FILE_WRITE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 检测文件编码并转换
|
|||
|
|
ErrorCode detect_file_encoding(const char* filename, char* buffer, int buffer_size) {
|
|||
|
|
// 简单的编码检测和转换
|
|||
|
|
// 这里假设文件可能是UTF-8、GBK或GB2312
|
|||
|
|
// 对于C语言,我们使用Windows API进行转换
|
|||
|
|
|
|||
|
|
// 检查是否为UTF-8 BOM
|
|||
|
|
if (buffer_size >= 3 && (unsigned char)buffer[0] == 0xEF &&
|
|||
|
|
(unsigned char)buffer[1] == 0xBB && (unsigned char)buffer[2] == 0xBF) {
|
|||
|
|
// 是UTF-8 with BOM,跳过BOM
|
|||
|
|
memmove(buffer, buffer + 3, buffer_size - 3);
|
|||
|
|
buffer[buffer_size - 3] = '\0';
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 尝试用MultiByteToWideChar检测是否为有效UTF-8
|
|||
|
|
int wide_length = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buffer, -1, NULL, 0);
|
|||
|
|
if (wide_length > 0) {
|
|||
|
|
// 是有效的UTF-8
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 尝试用CP_ACP(系统默认编码)
|
|||
|
|
wide_length = MultiByteToWideChar(CP_ACP, 0, buffer, -1, NULL, 0);
|
|||
|
|
if (wide_length > 0) {
|
|||
|
|
// 转换为UTF-8
|
|||
|
|
wchar_t* wide_buffer = (wchar_t*)malloc(wide_length * sizeof(wchar_t));
|
|||
|
|
if (!wide_buffer) {
|
|||
|
|
return ERROR_MEMORY_ALLOCATION;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
MultiByteToWideChar(CP_ACP, 0, buffer, -1, wide_buffer, wide_length);
|
|||
|
|
|
|||
|
|
int utf8_length = WideCharToMultiByte(CP_UTF8, 0, wide_buffer, -1, NULL, 0, NULL, NULL);
|
|||
|
|
if (utf8_length > 0 && utf8_length < buffer_size) {
|
|||
|
|
WideCharToMultiByte(CP_UTF8, 0, wide_buffer, -1, buffer, utf8_length, NULL, NULL);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
free(wide_buffer);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return ERROR_NONE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 从文件名提取数字
|
|||
|
|
int extract_number_from_string(const char* str) {
|
|||
|
|
if (!str) return 0;
|
|||
|
|
|
|||
|
|
// 使用正则表达式类似的逻辑,查找数字
|
|||
|
|
const char* p = str;
|
|||
|
|
while (*p) {
|
|||
|
|
if (isdigit(*p)) {
|
|||
|
|
int number = 0;
|
|||
|
|
while (*p && isdigit(*p)) {
|
|||
|
|
number = number * 10 + (*p - '0');
|
|||
|
|
p++;
|
|||
|
|
}
|
|||
|
|
return number;
|
|||
|
|
}
|
|||
|
|
p++;
|
|||
|
|
}
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 提取文件夹编号
|
|||
|
|
int extract_folder_number(const char* folder_name) {
|
|||
|
|
if (!folder_name) return 0;
|
|||
|
|
|
|||
|
|
// 查找第一个数字序列
|
|||
|
|
const char* p = folder_name;
|
|||
|
|
while (*p) {
|
|||
|
|
if (isdigit(*p)) {
|
|||
|
|
int number = 0;
|
|||
|
|
const char* start = p;
|
|||
|
|
while (*p && isdigit(*p)) {
|
|||
|
|
number = number * 10 + (*p - '0');
|
|||
|
|
p++;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 检查是否是主要的数字(不是年份或其他)
|
|||
|
|
if (number < 1000) { // 假设页码不会超过999
|
|||
|
|
return number;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
p++;
|
|||
|
|
}
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 比较函数用于排序
|
|||
|
|
int compare_bkmk_files(const void* a, const void* b) {
|
|||
|
|
const char* file_a = *(const char**)a;
|
|||
|
|
const char* file_b = *(const char**)b;
|
|||
|
|
|
|||
|
|
// 提取文件夹名称
|
|||
|
|
char folder_a[MAX_PATH];
|
|||
|
|
char folder_b[MAX_PATH];
|
|||
|
|
|
|||
|
|
const char* slash_a = strrchr(file_a, '\\');
|
|||
|
|
const char* slash_b = strrchr(file_b, '\\');
|
|||
|
|
|
|||
|
|
if (slash_a) strcpy_s(folder_a, sizeof(folder_a), slash_a + 1);
|
|||
|
|
else strcpy_s(folder_a, sizeof(folder_a), file_a);
|
|||
|
|
|
|||
|
|
if (slash_b) strcpy_s(folder_b, sizeof(folder_b), slash_b + 1);
|
|||
|
|
else strcpy_s(folder_b, sizeof(folder_b), file_b);
|
|||
|
|
|
|||
|
|
// 提取数字进行比较
|
|||
|
|
int num_a = extract_folder_number(folder_a);
|
|||
|
|
int num_b = extract_folder_number(folder_b);
|
|||
|
|
|
|||
|
|
if (num_a != num_b) {
|
|||
|
|
return num_a - num_b;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 如果数字相同,按字符串比较
|
|||
|
|
return strcmp(file_a, file_b);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 字符串处理函数
|
|||
|
|
char* trim_whitespace(char* str) {
|
|||
|
|
if (!str) return NULL;
|
|||
|
|
|
|||
|
|
// 去除前导空白
|
|||
|
|
char* start = str;
|
|||
|
|
while (*start == ' ' || *start == '\t' || *start == '\r' || *start == '\n') {
|
|||
|
|
start++;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 去除尾部空白
|
|||
|
|
char* end = start + strlen(start) - 1;
|
|||
|
|
while (end > start && (*end == ' ' || *end == '\t' || *end == '\r' || *end == '\n')) {
|
|||
|
|
*end = '\0';
|
|||
|
|
end--;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 移动字符串到开始位置
|
|||
|
|
if (start != str) {
|
|||
|
|
memmove(str, start, strlen(start) + 1);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return str;
|
|||
|
|
}
|