From 505715c05ea0c1c74c7e763b8f718db322e31580 Mon Sep 17 00:00:00 2001 From: yuuko Date: Mon, 24 Nov 2025 15:44:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0PDF=E4=B9=A6=E7=AD=BE?= =?UTF-8?q?=E5=90=88=E5=B9=B6=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加BookmarkExtractor类用于从FreePic2Pdf_bkmk文件提取书签内容 - 添加ContentFormatter类实现内容格式化处理 - 添加FileMerger类实现文件智能合并功能 - 更新主界面支持路径选择和处理进度显示 - 支持按文件名前缀自动合并(如CH-875 1-3和CH-875 4-6合并为CH-875.txt) - 输出格式符合需求:tableOfContents与subject之间插入格式化内容 - 支持UTF-8和GBK编码自动检测 - 添加详细的使用说明文档 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- BookmarkExtractor.cs | 97 ++++++++++++++++++++++++++++++ ContentFormatter.cs | 67 +++++++++++++++++++++ FileMerger.cs | 137 +++++++++++++++++++++++++++++++++++++++++++ Form1.Designer.cs | 8 +-- Form1.cs | 84 +++++++++++++++++++++++++- Program.cs | 2 +- README.md | 93 +++++++++++++++++++++++++++++ 7 files changed, 481 insertions(+), 7 deletions(-) create mode 100644 BookmarkExtractor.cs create mode 100644 ContentFormatter.cs create mode 100644 FileMerger.cs create mode 100644 README.md diff --git a/BookmarkExtractor.cs b/BookmarkExtractor.cs new file mode 100644 index 0000000..a285974 --- /dev/null +++ b/BookmarkExtractor.cs @@ -0,0 +1,97 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace SlideCombine +{ + public class BookmarkItem + { + public string Title { get; set; } + public string Page { get; set; } + public string FormattedContent { get; set; } + } + + public class BookmarkExtractor + { + public static List ExtractBookmarksFromBkmk(string bkmkFilePath) + { + var bookmarks = new List(); + + if (!File.Exists(bkmkFilePath)) + { + throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}"); + } + + try + { + // 尝试用UTF-8读取,如果失败则用GBK + string content; + try + { + content = File.ReadAllText(bkmkFilePath, Encoding.UTF8); + } + catch + { + content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK")); + } + + // 按行分割内容 + var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); + + foreach (var line in lines) + { + var trimmedLine = line.Trim(); + if (string.IsNullOrEmpty(trimmedLine)) + continue; + + // 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码" + var bookmark = ParseBookmarkLine(trimmedLine); + if (bookmark != null) + { + bookmarks.Add(bookmark); + } + } + } + catch (Exception ex) + { + throw new Exception($"读取书签文件失败: {ex.Message}"); + } + + return bookmarks; + } + + private static BookmarkItem ParseBookmarkLine(string line) + { + // 简单的解析逻辑,根据实际FreePic2Pdf_bkmk文件格式调整 + var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries); + + if (parts.Length < 2) + return null; + + var bookmark = new BookmarkItem(); + + // 假设最后一部分是页码 + var pagePart = parts[parts.Length - 1]; + if (IsPageNumber(pagePart)) + { + bookmark.Page = pagePart; + bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1); + } + else + { + // 如果没有明确的页码,跳过这一行 + return null; + } + + return bookmark; + } + + private static bool IsPageNumber(string text) + { + // 检查是否为数字(可以是罗马数字或阿拉伯数字) + return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") || + System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$"); + } + } +} \ No newline at end of file diff --git a/ContentFormatter.cs b/ContentFormatter.cs new file mode 100644 index 0000000..ca68fea --- /dev/null +++ b/ContentFormatter.cs @@ -0,0 +1,67 @@ +using System.Collections.Generic; +using System.Text; + +namespace SlideCombine +{ + public class ContentFormatter + { + public static string FormatBookmarks(List bookmarks) + { + if (bookmarks == null || bookmarks.Count == 0) + return string.Empty; + + var sb = new StringBuilder(); + + // 添加tableOfContents标记 + sb.AppendLine("tableOfContents:"); + + foreach (var bookmark in bookmarks) + { + if (!string.IsNullOrEmpty(bookmark.Title)) + { + // 每行内容顶格 + sb.Append(bookmark.Title.Trim()); + + // 单词和页码之间加"----------" + if (!string.IsNullOrEmpty(bookmark.Page)) + { + sb.Append("----------"); + sb.Append(bookmark.Page); + } + + // 页码后加"
" + sb.Append("
"); + } + } + + // 添加subject标记 + sb.AppendLine(); + sb.Append("subject:"); + + return sb.ToString(); + } + + public static string CombineFormattedContents(List formattedContents) + { + if (formattedContents == null || formattedContents.Count == 0) + return string.Empty; + + var combined = new StringBuilder(); + + for (int i = 0; i < formattedContents.Count; i++) + { + if (i > 0) + { + // 在不同文件内容之间用"<>"分隔 + combined.AppendLine(); + combined.AppendLine("<>"); + combined.AppendLine(); + } + + combined.Append(formattedContents[i]); + } + + return combined.ToString(); + } + } +} \ No newline at end of file diff --git a/FileMerger.cs b/FileMerger.cs new file mode 100644 index 0000000..3c61f42 --- /dev/null +++ b/FileMerger.cs @@ -0,0 +1,137 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; + +namespace SlideCombine +{ + public class ProcessResult + { + public string BaseFileName { get; set; } + public List SourceFiles { get; set; } + public string OutputContent { get; set; } + public bool Success { get; set; } + public string ErrorMessage { get; set; } + } + + public class FileMerger + { + public static List ProcessAllFolders(string pdfRootPath, string txtOutputPath) + { + var results = new List(); + + try + { + // 获取所有包含FreePic2Pdf_bkmk文件的文件夹 + var bkmkFiles = Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories); + + if (bkmkFiles.Length == 0) + { + throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 文件"); + } + + // 按基础文件名分组(取文件夹名称的空格前缀) + var fileGroups = new Dictionary>(); + + foreach (var bkmkFile in bkmkFiles) + { + var folderName = Path.GetDirectoryName(bkmkFile); + var folderNameOnly = new DirectoryInfo(folderName).Name; + + // 获取空格前的基础名称 + var baseName = GetBaseFileName(folderNameOnly); + + if (!fileGroups.ContainsKey(baseName)) + { + fileGroups[baseName] = new List(); + } + + fileGroups[baseName].Add(bkmkFile); + } + + // 处理每个分组 + foreach (var group in fileGroups) + { + var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList()); + results.Add(result); + } + } + catch (Exception ex) + { + var errorResult = new ProcessResult + { + Success = false, + ErrorMessage = ex.Message + }; + results.Add(errorResult); + } + + return results; + } + + private static string GetBaseFileName(string folderName) + { + // 获取空格前的部分作为基础名称 + var spaceIndex = folderName.IndexOf(' '); + return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName; + } + + private static ProcessResult ProcessFileGroup(string baseName, List bkmkFiles) + { + var result = new ProcessResult + { + BaseFileName = baseName, + SourceFiles = bkmkFiles, + Success = true + }; + + try + { + var allFormattedContents = new List(); + + foreach (var bkmkFile in bkmkFiles) + { + // 提取书签 + var bookmarks = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile); + + // 格式化内容 + var formattedContent = ContentFormatter.FormatBookmarks(bookmarks); + + allFormattedContents.Add(formattedContent); + } + + // 合并所有格式化的内容 + var combinedContent = ContentFormatter.CombineFormattedContents(allFormattedContents); + result.OutputContent = combinedContent; + } + catch (Exception ex) + { + result.Success = false; + result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}"; + } + + return result; + } + + public static void SaveResults(List results, string outputPath) + { + if (!Directory.Exists(outputPath)) + { + Directory.CreateDirectory(outputPath); + } + + foreach (var result in results) + { + if (result.Success && !string.IsNullOrEmpty(result.OutputContent)) + { + var outputFileName = $"{result.BaseFileName}.txt"; + var outputFilePath = Path.Combine(outputPath, outputFileName); + + // 使用UTF-8编码保存 + File.WriteAllText(outputFilePath, result.OutputContent, Encoding.UTF8); + } + } + } + } +} \ No newline at end of file diff --git a/Form1.Designer.cs b/Form1.Designer.cs index 6be42b3..e7122be 100644 --- a/Form1.Designer.cs +++ b/Form1.Designer.cs @@ -37,7 +37,7 @@ namespace SlideCombine // 主窗体设置 AutoScaleMode = AutoScaleMode.Font; ClientSize = new Size(600, 400); - Text = "WinForms应用模板"; + Text = "PDF书签合并工具"; StartPosition = FormStartPosition.CenterScreen; // 创建控件 @@ -61,7 +61,7 @@ namespace SlideCombine txtLog = new TextBox(); // 设置源文件夹组 - grpSourceFolder.Text = "输入文件夹"; + grpSourceFolder.Text = "PDF文件夹路径"; grpSourceFolder.Location = new Point(10, 10); grpSourceFolder.Size = new Size(580, 60); grpSourceFolder.TabStop = false; @@ -83,7 +83,7 @@ namespace SlideCombine grpSourceFolder.Controls.Add(btnBrowseSource); // 设置输出文件夹组 - grpOutputFolder.Text = "输出文件夹"; + grpOutputFolder.Text = "TXT输出路径"; grpOutputFolder.Location = new Point(10, 80); grpOutputFolder.Size = new Size(580, 60); grpOutputFolder.TabStop = false; @@ -108,7 +108,7 @@ namespace SlideCombine pnlButtons.Location = new Point(10, 150); pnlButtons.Size = new Size(580, 40); - btnMerge.Text = "执行"; + btnMerge.Text = "合并书签"; btnMerge.Location = new Point(10, 8); btnMerge.Size = new Size(75, 25); btnMerge.Click += new EventHandler(btnMerge_Click); diff --git a/Form1.cs b/Form1.cs index dc6a8ba..e058b8a 100644 --- a/Form1.cs +++ b/Form1.cs @@ -1,4 +1,6 @@ using System; +using System.Collections.Generic; +using System.IO; using System.Drawing; using System.Windows.Forms; @@ -67,8 +69,86 @@ namespace SlideCombine private void btnMerge_Click(object sender, EventArgs e) { - MessageBox.Show("HelloWorld", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information); - Log("HelloWorld"); + try + { + // 验证输入 + if (string.IsNullOrWhiteSpace(txtSourcePath.Text) || string.IsNullOrWhiteSpace(txtOutputPath.Text)) + { + MessageBox.Show("请选择PDF文件夹路径和TXT输出路径", "提示", MessageBoxButtons.OK, MessageBoxIcon.Warning); + return; + } + + if (!Directory.Exists(txtSourcePath.Text)) + { + MessageBox.Show("指定的PDF文件夹路径不存在", "错误", MessageBoxButtons.OK, MessageBoxIcon.Error); + return; + } + + // 禁用按钮,防止重复点击 + btnMerge.Enabled = false; + btnClear.Enabled = false; + + // 重置进度条和日志 + progressBar.Value = 0; + txtLog.Clear(); + Log("开始处理PDF书签文件..."); + + // 处理文件 + var results = FileMerger.ProcessAllFolders(txtSourcePath.Text, txtOutputPath.Text); + + // 显示进度 + progressBar.Value = 50; + Log($"找到 {results.Count} 个文件组需要处理"); + + // 保存结果 + FileMerger.SaveResults(results, txtOutputPath.Text); + + progressBar.Value = 100; + + // 统计成功和失败的数量 + int successCount = 0; + int failCount = 0; + var sb = new StringBuilder(); + + foreach (var result in results) + { + if (result.Success) + { + successCount++; + Log($"✓ 成功处理: {result.BaseFileName} (合并了 {result.SourceFiles.Count} 个文件)"); + } + else + { + failCount++; + Log($"✗ 处理失败: {result.ErrorMessage}"); + } + } + + Log($"处理完成! 成功: {successCount}, 失败: {failCount}"); + + if (successCount > 0) + { + MessageBox.Show($"书签合并完成!\n成功处理 {successCount} 个文件\n输出路径: {txtOutputPath.Text}", + "处理完成", MessageBoxButtons.OK, MessageBoxIcon.Information); + } + else + { + MessageBox.Show("没有成功处理任何文件,请检查输入路径和文件格式。", + "处理失败", MessageBoxButtons.OK, MessageBoxIcon.Warning); + } + } + catch (Exception ex) + { + Log($"错误: {ex.Message}"); + MessageBox.Show($"处理过程中发生错误:\n{ex.Message}", + "错误", MessageBoxButtons.OK, MessageBoxIcon.Error); + } + finally + { + // 重新启用按钮 + btnMerge.Enabled = true; + btnClear.Enabled = true; + } } private void Log(string msg) diff --git a/Program.cs b/Program.cs index 915ed6c..6b2bab8 100644 --- a/Program.cs +++ b/Program.cs @@ -17,7 +17,7 @@ namespace SlideCombine { MessageBox.Show( "检测到您的系统未安装 .NET Framework 4.8\n\n" + - "Excel合并工具需要此环境才能运行。\n\n" + + "PDF书签合并工具需要此环境才能运行。\n\n" + "请前往以下链接下载并安装:\n" + "https://dotnet.microsoft.com/download/dotnet-framework/net48\n\n" + "安装完成后请重新运行本程序。", diff --git a/README.md b/README.md new file mode 100644 index 0000000..6cfb5bb --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# PDF书签合并工具 + +这是一个用于合并PDF书签文件的Windows桌面应用程序,基于C# WinForms开发。 + +## 功能特性 + +- **自动提取书签**:从指定路径下的`FreePic2Pdf_bkmk`文件中提取书签内容 +- **智能合并**:按文件名前缀自动分组合并(如"CH-875 1-3"和"CH-875 4-6"合并为"CH-875.txt") +- **格式化输出**:按照指定格式输出书签内容 +- **路径选择**:支持自定义选择PDF源文件夹和TXT输出文件夹 +- **进度显示**:实时显示处理进度和日志信息 + +## 输出格式 + +处理后的文件内容格式如下: + +``` +tableOfContents: +书签标题1----------页码1
+书签标题2----------页码2
+书签标题3----------页码3
+subject: +<> +tableOfContents: +书签标题4----------页码4
+书签标题5----------页码5
+subject: +``` + +## 编译要求 + +- .NET Framework 4.8 +- Windows操作系统 +- Visual Studio 2019+ 或 Visual Studio Build Tools + +## 编译步骤 + +1. 安装.NET Framework 4.8 +2. 使用Visual Studio打开`SlideCombine.csproj` +3. 选择`Release`配置 +4. 按F6生成解决方案 + +或者使用命令行: +```bash +msbuild SlideCombine.csproj /p:Configuration=Release +``` + +## 使用方法 + +1. 运行`SlideCombine.exe` +2. 选择包含PDF文件夹的源路径 +3. 选择TXT文件的输出路径 +4. 点击"合并书签"按钮开始处理 +5. 等待处理完成,查看结果 + +## 文件夹结构示例 + +输入文件夹结构: +``` +PDF文件夹/ +├── CH-875 1-3/ +│ └── FreePic2Pdf_bkmk +├── CH-875 4-6/ +│ └── FreePic2Pdf_bkmk +└── CH-876 1-2/ + └── FreePic2Pdf_bkmk +``` + +输出结果: +``` +TXT输出路径/ +├── CH-875.txt (合并了CH-875 1-3和CH-875 4-6) +└── CH-876.txt (来自CH-876 1-2) +``` + +## 支持的编码 + +程序自动检测文件编码: +- 优先使用UTF-8 +- 如果UTF-8失败,自动切换到GBK编码 + +## 错误处理 + +- 自动检查路径有效性 +- 提供详细的错误信息 +- 支持批量处理,单个文件失败不影响其他文件 + +## 技术架构 + +- `BookmarkExtractor.cs`: 书签提取逻辑 +- `ContentFormatter.cs`: 内容格式化 +- `FileMerger.cs`: 文件合并处理 +- `Form1.cs`: 主界面和用户交互 \ No newline at end of file