SlideCombine/BookmarkExtractor.cs
yuuko 505715c05e 实现PDF书签合并功能
- 添加BookmarkExtractor类用于从FreePic2Pdf_bkmk文件提取书签内容
- 添加ContentFormatter类实现内容格式化处理
- 添加FileMerger类实现文件智能合并功能
- 更新主界面支持路径选择和处理进度显示
- 支持按文件名前缀自动合并(如CH-875 1-3和CH-875 4-6合并为CH-875.txt)
- 输出格式符合需求:tableOfContents与subject之间插入格式化内容
- 支持UTF-8和GBK编码自动检测
- 添加详细的使用说明文档

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-24 15:44:37 +08:00

97 lines
3.0 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SlideCombine
{
public class BookmarkItem
{
public string Title { get; set; }
public string Page { get; set; }
public string FormattedContent { get; set; }
}
public class BookmarkExtractor
{
public static List<BookmarkItem> ExtractBookmarksFromBkmk(string bkmkFilePath)
{
var bookmarks = new List<BookmarkItem>();
if (!File.Exists(bkmkFilePath))
{
throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}");
}
try
{
// 尝试用UTF-8读取如果失败则用GBK
string content;
try
{
content = File.ReadAllText(bkmkFilePath, Encoding.UTF8);
}
catch
{
content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK"));
}
// 按行分割内容
var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
var trimmedLine = line.Trim();
if (string.IsNullOrEmpty(trimmedLine))
continue;
// 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码"
var bookmark = ParseBookmarkLine(trimmedLine);
if (bookmark != null)
{
bookmarks.Add(bookmark);
}
}
}
catch (Exception ex)
{
throw new Exception($"读取书签文件失败: {ex.Message}");
}
return bookmarks;
}
private static BookmarkItem ParseBookmarkLine(string line)
{
// 简单的解析逻辑根据实际FreePic2Pdf_bkmk文件格式调整
var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries);
if (parts.Length < 2)
return null;
var bookmark = new BookmarkItem();
// 假设最后一部分是页码
var pagePart = parts[parts.Length - 1];
if (IsPageNumber(pagePart))
{
bookmark.Page = pagePart;
bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1);
}
else
{
// 如果没有明确的页码,跳过这一行
return null;
}
return bookmark;
}
private static bool IsPageNumber(string text)
{
// 检查是否为数字(可以是罗马数字或阿拉伯数字)
return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") ||
System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$");
}
}
}