- 添加BookmarkExtractor类用于从FreePic2Pdf_bkmk文件提取书签内容 - 添加ContentFormatter类实现内容格式化处理 - 添加FileMerger类实现文件智能合并功能 - 更新主界面支持路径选择和处理进度显示 - 支持按文件名前缀自动合并(如CH-875 1-3和CH-875 4-6合并为CH-875.txt) - 输出格式符合需求:tableOfContents与subject之间插入格式化内容 - 支持UTF-8和GBK编码自动检测 - 添加详细的使用说明文档 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
97 lines
3.0 KiB
C#
97 lines
3.0 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Text;
|
||
|
||
namespace SlideCombine
|
||
{
|
||
public class BookmarkItem
|
||
{
|
||
public string Title { get; set; }
|
||
public string Page { get; set; }
|
||
public string FormattedContent { get; set; }
|
||
}
|
||
|
||
public class BookmarkExtractor
|
||
{
|
||
public static List<BookmarkItem> ExtractBookmarksFromBkmk(string bkmkFilePath)
|
||
{
|
||
var bookmarks = new List<BookmarkItem>();
|
||
|
||
if (!File.Exists(bkmkFilePath))
|
||
{
|
||
throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}");
|
||
}
|
||
|
||
try
|
||
{
|
||
// 尝试用UTF-8读取,如果失败则用GBK
|
||
string content;
|
||
try
|
||
{
|
||
content = File.ReadAllText(bkmkFilePath, Encoding.UTF8);
|
||
}
|
||
catch
|
||
{
|
||
content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK"));
|
||
}
|
||
|
||
// 按行分割内容
|
||
var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
|
||
|
||
foreach (var line in lines)
|
||
{
|
||
var trimmedLine = line.Trim();
|
||
if (string.IsNullOrEmpty(trimmedLine))
|
||
continue;
|
||
|
||
// 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码"
|
||
var bookmark = ParseBookmarkLine(trimmedLine);
|
||
if (bookmark != null)
|
||
{
|
||
bookmarks.Add(bookmark);
|
||
}
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
throw new Exception($"读取书签文件失败: {ex.Message}");
|
||
}
|
||
|
||
return bookmarks;
|
||
}
|
||
|
||
private static BookmarkItem ParseBookmarkLine(string line)
|
||
{
|
||
// 简单的解析逻辑,根据实际FreePic2Pdf_bkmk文件格式调整
|
||
var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries);
|
||
|
||
if (parts.Length < 2)
|
||
return null;
|
||
|
||
var bookmark = new BookmarkItem();
|
||
|
||
// 假设最后一部分是页码
|
||
var pagePart = parts[parts.Length - 1];
|
||
if (IsPageNumber(pagePart))
|
||
{
|
||
bookmark.Page = pagePart;
|
||
bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1);
|
||
}
|
||
else
|
||
{
|
||
// 如果没有明确的页码,跳过这一行
|
||
return null;
|
||
}
|
||
|
||
return bookmark;
|
||
}
|
||
|
||
private static bool IsPageNumber(string text)
|
||
{
|
||
// 检查是否为数字(可以是罗马数字或阿拉伯数字)
|
||
return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") ||
|
||
System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$");
|
||
}
|
||
}
|
||
} |