97 lines
3.0 KiB
C#
97 lines
3.0 KiB
C#
|
|
using System;
|
|||
|
|
using System.Collections.Generic;
|
|||
|
|
using System.IO;
|
|||
|
|
using System.Text;
|
|||
|
|
|
|||
|
|
namespace SlideCombine
|
|||
|
|
{
|
|||
|
|
public class BookmarkItem
|
|||
|
|
{
|
|||
|
|
public string Title { get; set; }
|
|||
|
|
public string Page { get; set; }
|
|||
|
|
public string FormattedContent { get; set; }
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
public class BookmarkExtractor
|
|||
|
|
{
|
|||
|
|
public static List<BookmarkItem> ExtractBookmarksFromBkmk(string bkmkFilePath)
|
|||
|
|
{
|
|||
|
|
var bookmarks = new List<BookmarkItem>();
|
|||
|
|
|
|||
|
|
if (!File.Exists(bkmkFilePath))
|
|||
|
|
{
|
|||
|
|
throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try
|
|||
|
|
{
|
|||
|
|
// 尝试用UTF-8读取,如果失败则用GBK
|
|||
|
|
string content;
|
|||
|
|
try
|
|||
|
|
{
|
|||
|
|
content = File.ReadAllText(bkmkFilePath, Encoding.UTF8);
|
|||
|
|
}
|
|||
|
|
catch
|
|||
|
|
{
|
|||
|
|
content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK"));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 按行分割内容
|
|||
|
|
var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
|
|||
|
|
|
|||
|
|
foreach (var line in lines)
|
|||
|
|
{
|
|||
|
|
var trimmedLine = line.Trim();
|
|||
|
|
if (string.IsNullOrEmpty(trimmedLine))
|
|||
|
|
continue;
|
|||
|
|
|
|||
|
|
// 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码"
|
|||
|
|
var bookmark = ParseBookmarkLine(trimmedLine);
|
|||
|
|
if (bookmark != null)
|
|||
|
|
{
|
|||
|
|
bookmarks.Add(bookmark);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
catch (Exception ex)
|
|||
|
|
{
|
|||
|
|
throw new Exception($"读取书签文件失败: {ex.Message}");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return bookmarks;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
private static BookmarkItem ParseBookmarkLine(string line)
|
|||
|
|
{
|
|||
|
|
// 简单的解析逻辑,根据实际FreePic2Pdf_bkmk文件格式调整
|
|||
|
|
var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries);
|
|||
|
|
|
|||
|
|
if (parts.Length < 2)
|
|||
|
|
return null;
|
|||
|
|
|
|||
|
|
var bookmark = new BookmarkItem();
|
|||
|
|
|
|||
|
|
// 假设最后一部分是页码
|
|||
|
|
var pagePart = parts[parts.Length - 1];
|
|||
|
|
if (IsPageNumber(pagePart))
|
|||
|
|
{
|
|||
|
|
bookmark.Page = pagePart;
|
|||
|
|
bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1);
|
|||
|
|
}
|
|||
|
|
else
|
|||
|
|
{
|
|||
|
|
// 如果没有明确的页码,跳过这一行
|
|||
|
|
return null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return bookmark;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
private static bool IsPageNumber(string text)
|
|||
|
|
{
|
|||
|
|
// 检查是否为数字(可以是罗马数字或阿拉伯数字)
|
|||
|
|
return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") ||
|
|||
|
|
System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$");
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|