实现PDF书签合并功能

- 添加BookmarkExtractor类用于从FreePic2Pdf_bkmk文件提取书签内容
- 添加ContentFormatter类实现内容格式化处理
- 添加FileMerger类实现文件智能合并功能
- 更新主界面支持路径选择和处理进度显示
- 支持按文件名前缀自动合并(如CH-875 1-3和CH-875 4-6合并为CH-875.txt)
- 输出格式符合需求:tableOfContents与subject之间插入格式化内容
- 支持UTF-8和GBK编码自动检测
- 添加详细的使用说明文档

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
yuuko 2025-11-24 15:44:37 +08:00
parent d03f4595ec
commit 505715c05e
7 changed files with 481 additions and 7 deletions

97
BookmarkExtractor.cs Normal file
View File

@ -0,0 +1,97 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SlideCombine
{
public class BookmarkItem
{
public string Title { get; set; }
public string Page { get; set; }
public string FormattedContent { get; set; }
}
public class BookmarkExtractor
{
public static List<BookmarkItem> ExtractBookmarksFromBkmk(string bkmkFilePath)
{
var bookmarks = new List<BookmarkItem>();
if (!File.Exists(bkmkFilePath))
{
throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}");
}
try
{
// 尝试用UTF-8读取如果失败则用GBK
string content;
try
{
content = File.ReadAllText(bkmkFilePath, Encoding.UTF8);
}
catch
{
content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK"));
}
// 按行分割内容
var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
var trimmedLine = line.Trim();
if (string.IsNullOrEmpty(trimmedLine))
continue;
// 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码"
var bookmark = ParseBookmarkLine(trimmedLine);
if (bookmark != null)
{
bookmarks.Add(bookmark);
}
}
}
catch (Exception ex)
{
throw new Exception($"读取书签文件失败: {ex.Message}");
}
return bookmarks;
}
private static BookmarkItem ParseBookmarkLine(string line)
{
// 简单的解析逻辑根据实际FreePic2Pdf_bkmk文件格式调整
var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries);
if (parts.Length < 2)
return null;
var bookmark = new BookmarkItem();
// 假设最后一部分是页码
var pagePart = parts[parts.Length - 1];
if (IsPageNumber(pagePart))
{
bookmark.Page = pagePart;
bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1);
}
else
{
// 如果没有明确的页码,跳过这一行
return null;
}
return bookmark;
}
private static bool IsPageNumber(string text)
{
// 检查是否为数字(可以是罗马数字或阿拉伯数字)
return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") ||
System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$");
}
}
}

67
ContentFormatter.cs Normal file
View File

@ -0,0 +1,67 @@
using System.Collections.Generic;
using System.Text;
namespace SlideCombine
{
public class ContentFormatter
{
public static string FormatBookmarks(List<BookmarkItem> bookmarks)
{
if (bookmarks == null || bookmarks.Count == 0)
return string.Empty;
var sb = new StringBuilder();
// 添加tableOfContents标记
sb.AppendLine("tableOfContents:");
foreach (var bookmark in bookmarks)
{
if (!string.IsNullOrEmpty(bookmark.Title))
{
// 每行内容顶格
sb.Append(bookmark.Title.Trim());
// 单词和页码之间加"----------"
if (!string.IsNullOrEmpty(bookmark.Page))
{
sb.Append("----------");
sb.Append(bookmark.Page);
}
// 页码后加"<br/>"
sb.Append("<br/>");
}
}
// 添加subject标记
sb.AppendLine();
sb.Append("subject:");
return sb.ToString();
}
public static string CombineFormattedContents(List<string> formattedContents)
{
if (formattedContents == null || formattedContents.Count == 0)
return string.Empty;
var combined = new StringBuilder();
for (int i = 0; i < formattedContents.Count; i++)
{
if (i > 0)
{
// 在不同文件内容之间用"<>"分隔
combined.AppendLine();
combined.AppendLine("<>");
combined.AppendLine();
}
combined.Append(formattedContents[i]);
}
return combined.ToString();
}
}
}

137
FileMerger.cs Normal file
View File

@ -0,0 +1,137 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace SlideCombine
{
public class ProcessResult
{
public string BaseFileName { get; set; }
public List<string> SourceFiles { get; set; }
public string OutputContent { get; set; }
public bool Success { get; set; }
public string ErrorMessage { get; set; }
}
public class FileMerger
{
public static List<ProcessResult> ProcessAllFolders(string pdfRootPath, string txtOutputPath)
{
var results = new List<ProcessResult>();
try
{
// 获取所有包含FreePic2Pdf_bkmk文件的文件夹
var bkmkFiles = Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories);
if (bkmkFiles.Length == 0)
{
throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 文件");
}
// 按基础文件名分组(取文件夹名称的空格前缀)
var fileGroups = new Dictionary<string, List<string>>();
foreach (var bkmkFile in bkmkFiles)
{
var folderName = Path.GetDirectoryName(bkmkFile);
var folderNameOnly = new DirectoryInfo(folderName).Name;
// 获取空格前的基础名称
var baseName = GetBaseFileName(folderNameOnly);
if (!fileGroups.ContainsKey(baseName))
{
fileGroups[baseName] = new List<string>();
}
fileGroups[baseName].Add(bkmkFile);
}
// 处理每个分组
foreach (var group in fileGroups)
{
var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList());
results.Add(result);
}
}
catch (Exception ex)
{
var errorResult = new ProcessResult
{
Success = false,
ErrorMessage = ex.Message
};
results.Add(errorResult);
}
return results;
}
private static string GetBaseFileName(string folderName)
{
// 获取空格前的部分作为基础名称
var spaceIndex = folderName.IndexOf(' ');
return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName;
}
private static ProcessResult ProcessFileGroup(string baseName, List<string> bkmkFiles)
{
var result = new ProcessResult
{
BaseFileName = baseName,
SourceFiles = bkmkFiles,
Success = true
};
try
{
var allFormattedContents = new List<string>();
foreach (var bkmkFile in bkmkFiles)
{
// 提取书签
var bookmarks = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
// 格式化内容
var formattedContent = ContentFormatter.FormatBookmarks(bookmarks);
allFormattedContents.Add(formattedContent);
}
// 合并所有格式化的内容
var combinedContent = ContentFormatter.CombineFormattedContents(allFormattedContents);
result.OutputContent = combinedContent;
}
catch (Exception ex)
{
result.Success = false;
result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}";
}
return result;
}
public static void SaveResults(List<ProcessResult> results, string outputPath)
{
if (!Directory.Exists(outputPath))
{
Directory.CreateDirectory(outputPath);
}
foreach (var result in results)
{
if (result.Success && !string.IsNullOrEmpty(result.OutputContent))
{
var outputFileName = $"{result.BaseFileName}.txt";
var outputFilePath = Path.Combine(outputPath, outputFileName);
// 使用UTF-8编码保存
File.WriteAllText(outputFilePath, result.OutputContent, Encoding.UTF8);
}
}
}
}
}

8
Form1.Designer.cs generated
View File

@ -37,7 +37,7 @@ namespace SlideCombine
// 主窗体设置 // 主窗体设置
AutoScaleMode = AutoScaleMode.Font; AutoScaleMode = AutoScaleMode.Font;
ClientSize = new Size(600, 400); ClientSize = new Size(600, 400);
Text = "WinForms应用模板"; Text = "PDF书签合并工具";
StartPosition = FormStartPosition.CenterScreen; StartPosition = FormStartPosition.CenterScreen;
// 创建控件 // 创建控件
@ -61,7 +61,7 @@ namespace SlideCombine
txtLog = new TextBox(); txtLog = new TextBox();
// 设置源文件夹组 // 设置源文件夹组
grpSourceFolder.Text = "输入文件夹"; grpSourceFolder.Text = "PDF文件夹路径";
grpSourceFolder.Location = new Point(10, 10); grpSourceFolder.Location = new Point(10, 10);
grpSourceFolder.Size = new Size(580, 60); grpSourceFolder.Size = new Size(580, 60);
grpSourceFolder.TabStop = false; grpSourceFolder.TabStop = false;
@ -83,7 +83,7 @@ namespace SlideCombine
grpSourceFolder.Controls.Add(btnBrowseSource); grpSourceFolder.Controls.Add(btnBrowseSource);
// 设置输出文件夹组 // 设置输出文件夹组
grpOutputFolder.Text = "输出文件夹"; grpOutputFolder.Text = "TXT输出路径";
grpOutputFolder.Location = new Point(10, 80); grpOutputFolder.Location = new Point(10, 80);
grpOutputFolder.Size = new Size(580, 60); grpOutputFolder.Size = new Size(580, 60);
grpOutputFolder.TabStop = false; grpOutputFolder.TabStop = false;
@ -108,7 +108,7 @@ namespace SlideCombine
pnlButtons.Location = new Point(10, 150); pnlButtons.Location = new Point(10, 150);
pnlButtons.Size = new Size(580, 40); pnlButtons.Size = new Size(580, 40);
btnMerge.Text = "执行"; btnMerge.Text = "合并书签";
btnMerge.Location = new Point(10, 8); btnMerge.Location = new Point(10, 8);
btnMerge.Size = new Size(75, 25); btnMerge.Size = new Size(75, 25);
btnMerge.Click += new EventHandler(btnMerge_Click); btnMerge.Click += new EventHandler(btnMerge_Click);

View File

@ -1,4 +1,6 @@
using System; using System;
using System.Collections.Generic;
using System.IO;
using System.Drawing; using System.Drawing;
using System.Windows.Forms; using System.Windows.Forms;
@ -67,8 +69,86 @@ namespace SlideCombine
private void btnMerge_Click(object sender, EventArgs e) private void btnMerge_Click(object sender, EventArgs e)
{ {
MessageBox.Show("HelloWorld", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information); try
Log("HelloWorld"); {
// 验证输入
if (string.IsNullOrWhiteSpace(txtSourcePath.Text) || string.IsNullOrWhiteSpace(txtOutputPath.Text))
{
MessageBox.Show("请选择PDF文件夹路径和TXT输出路径", "提示", MessageBoxButtons.OK, MessageBoxIcon.Warning);
return;
}
if (!Directory.Exists(txtSourcePath.Text))
{
MessageBox.Show("指定的PDF文件夹路径不存在", "错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
return;
}
// 禁用按钮,防止重复点击
btnMerge.Enabled = false;
btnClear.Enabled = false;
// 重置进度条和日志
progressBar.Value = 0;
txtLog.Clear();
Log("开始处理PDF书签文件...");
// 处理文件
var results = FileMerger.ProcessAllFolders(txtSourcePath.Text, txtOutputPath.Text);
// 显示进度
progressBar.Value = 50;
Log($"找到 {results.Count} 个文件组需要处理");
// 保存结果
FileMerger.SaveResults(results, txtOutputPath.Text);
progressBar.Value = 100;
// 统计成功和失败的数量
int successCount = 0;
int failCount = 0;
var sb = new StringBuilder();
foreach (var result in results)
{
if (result.Success)
{
successCount++;
Log($"✓ 成功处理: {result.BaseFileName} (合并了 {result.SourceFiles.Count} 个文件)");
}
else
{
failCount++;
Log($"✗ 处理失败: {result.ErrorMessage}");
}
}
Log($"处理完成! 成功: {successCount}, 失败: {failCount}");
if (successCount > 0)
{
MessageBox.Show($"书签合并完成!\n成功处理 {successCount} 个文件\n输出路径: {txtOutputPath.Text}",
"处理完成", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
else
{
MessageBox.Show("没有成功处理任何文件,请检查输入路径和文件格式。",
"处理失败", MessageBoxButtons.OK, MessageBoxIcon.Warning);
}
}
catch (Exception ex)
{
Log($"错误: {ex.Message}");
MessageBox.Show($"处理过程中发生错误:\n{ex.Message}",
"错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
finally
{
// 重新启用按钮
btnMerge.Enabled = true;
btnClear.Enabled = true;
}
} }
private void Log(string msg) private void Log(string msg)

View File

@ -17,7 +17,7 @@ namespace SlideCombine
{ {
MessageBox.Show( MessageBox.Show(
"检测到您的系统未安装 .NET Framework 4.8\n\n" + "检测到您的系统未安装 .NET Framework 4.8\n\n" +
"Excel合并工具需要此环境才能运行。\n\n" + "PDF书签合并工具需要此环境才能运行。\n\n" +
"请前往以下链接下载并安装:\n" + "请前往以下链接下载并安装:\n" +
"https://dotnet.microsoft.com/download/dotnet-framework/net48\n\n" + "https://dotnet.microsoft.com/download/dotnet-framework/net48\n\n" +
"安装完成后请重新运行本程序。", "安装完成后请重新运行本程序。",

93
README.md Normal file
View File

@ -0,0 +1,93 @@
# PDF书签合并工具
这是一个用于合并PDF书签文件的Windows桌面应用程序基于C# WinForms开发。
## 功能特性
- **自动提取书签**:从指定路径下的`FreePic2Pdf_bkmk`文件中提取书签内容
- **智能合并**:按文件名前缀自动分组合并(如"CH-875 1-3"和"CH-875 4-6"合并为"CH-875.txt"
- **格式化输出**:按照指定格式输出书签内容
- **路径选择**支持自定义选择PDF源文件夹和TXT输出文件夹
- **进度显示**:实时显示处理进度和日志信息
## 输出格式
处理后的文件内容格式如下:
```
tableOfContents:
书签标题1----------页码1<br/>
书签标题2----------页码2<br/>
书签标题3----------页码3<br/>
subject:
<>
tableOfContents:
书签标题4----------页码4<br/>
书签标题5----------页码5<br/>
subject:
```
## 编译要求
- .NET Framework 4.8
- Windows操作系统
- Visual Studio 2019+ 或 Visual Studio Build Tools
## 编译步骤
1. 安装.NET Framework 4.8
2. 使用Visual Studio打开`SlideCombine.csproj`
3. 选择`Release`配置
4. 按F6生成解决方案
或者使用命令行:
```bash
msbuild SlideCombine.csproj /p:Configuration=Release
```
## 使用方法
1. 运行`SlideCombine.exe`
2. 选择包含PDF文件夹的源路径
3. 选择TXT文件的输出路径
4. 点击"合并书签"按钮开始处理
5. 等待处理完成,查看结果
## 文件夹结构示例
输入文件夹结构:
```
PDF文件夹/
├── CH-875 1-3/
│ └── FreePic2Pdf_bkmk
├── CH-875 4-6/
│ └── FreePic2Pdf_bkmk
└── CH-876 1-2/
└── FreePic2Pdf_bkmk
```
输出结果:
```
TXT输出路径/
├── CH-875.txt (合并了CH-875 1-3和CH-875 4-6)
└── CH-876.txt (来自CH-876 1-2)
```
## 支持的编码
程序自动检测文件编码:
- 优先使用UTF-8
- 如果UTF-8失败自动切换到GBK编码
## 错误处理
- 自动检查路径有效性
- 提供详细的错误信息
- 支持批量处理,单个文件失败不影响其他文件
## 技术架构
- `BookmarkExtractor.cs`: 书签提取逻辑
- `ContentFormatter.cs`: 内容格式化
- `FileMerger.cs`: 文件合并处理
- `Form1.cs`: 主界面和用户交互