using System; using System.Collections.Generic; using System.IO; using System.Text; namespace SlideCombine { public class BookmarkItem { public string Title { get; set; } public string Page { get; set; } public string FormattedContent { get; set; } } public class BookmarkExtractor { public static List ExtractBookmarksFromBkmk(string bkmkFilePath) { var bookmarks = new List(); if (!File.Exists(bkmkFilePath)) { throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}"); } try { // 尝试用UTF-8读取,如果失败则用GBK string content; try { content = File.ReadAllText(bkmkFilePath, Encoding.UTF8); } catch { content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK")); } // 按行分割内容 var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (var line in lines) { var trimmedLine = line.Trim(); if (string.IsNullOrEmpty(trimmedLine)) continue; // 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码" var bookmark = ParseBookmarkLine(trimmedLine); if (bookmark != null) { bookmarks.Add(bookmark); } } } catch (Exception ex) { throw new Exception($"读取书签文件失败: {ex.Message}"); } return bookmarks; } private static BookmarkItem ParseBookmarkLine(string line) { // 简单的解析逻辑,根据实际FreePic2Pdf_bkmk文件格式调整 var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length < 2) return null; var bookmark = new BookmarkItem(); // 假设最后一部分是页码 var pagePart = parts[parts.Length - 1]; if (IsPageNumber(pagePart)) { bookmark.Page = pagePart; bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1); } else { // 如果没有明确的页码,跳过这一行 return null; } return bookmark; } private static bool IsPageNumber(string text) { // 检查是否为数字(可以是罗马数字或阿拉伯数字) return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") || System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$"); } } }