实现PDF书签合并功能
- 添加BookmarkExtractor类用于从FreePic2Pdf_bkmk文件提取书签内容 - 添加ContentFormatter类实现内容格式化处理 - 添加FileMerger类实现文件智能合并功能 - 更新主界面支持路径选择和处理进度显示 - 支持按文件名前缀自动合并(如CH-875 1-3和CH-875 4-6合并为CH-875.txt) - 输出格式符合需求:tableOfContents与subject之间插入格式化内容 - 支持UTF-8和GBK编码自动检测 - 添加详细的使用说明文档 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
d03f4595ec
commit
505715c05e
97
BookmarkExtractor.cs
Normal file
97
BookmarkExtractor.cs
Normal file
@ -0,0 +1,97 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace SlideCombine
|
||||
{
|
||||
public class BookmarkItem
|
||||
{
|
||||
public string Title { get; set; }
|
||||
public string Page { get; set; }
|
||||
public string FormattedContent { get; set; }
|
||||
}
|
||||
|
||||
public class BookmarkExtractor
|
||||
{
|
||||
public static List<BookmarkItem> ExtractBookmarksFromBkmk(string bkmkFilePath)
|
||||
{
|
||||
var bookmarks = new List<BookmarkItem>();
|
||||
|
||||
if (!File.Exists(bkmkFilePath))
|
||||
{
|
||||
throw new FileNotFoundException($"FreePic2Pdf_bkmk文件不存在: {bkmkFilePath}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// 尝试用UTF-8读取,如果失败则用GBK
|
||||
string content;
|
||||
try
|
||||
{
|
||||
content = File.ReadAllText(bkmkFilePath, Encoding.UTF8);
|
||||
}
|
||||
catch
|
||||
{
|
||||
content = File.ReadAllText(bkmkFilePath, Encoding.GetEncoding("GBK"));
|
||||
}
|
||||
|
||||
// 按行分割内容
|
||||
var lines = content.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmedLine = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmedLine))
|
||||
continue;
|
||||
|
||||
// 查找页码模式 - 假设格式为 "标题 页码" 或 "标题:页码"
|
||||
var bookmark = ParseBookmarkLine(trimmedLine);
|
||||
if (bookmark != null)
|
||||
{
|
||||
bookmarks.Add(bookmark);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new Exception($"读取书签文件失败: {ex.Message}");
|
||||
}
|
||||
|
||||
return bookmarks;
|
||||
}
|
||||
|
||||
private static BookmarkItem ParseBookmarkLine(string line)
|
||||
{
|
||||
// 简单的解析逻辑,根据实际FreePic2Pdf_bkmk文件格式调整
|
||||
var parts = line.Split(new[] { ' ', '\t', ':' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
if (parts.Length < 2)
|
||||
return null;
|
||||
|
||||
var bookmark = new BookmarkItem();
|
||||
|
||||
// 假设最后一部分是页码
|
||||
var pagePart = parts[parts.Length - 1];
|
||||
if (IsPageNumber(pagePart))
|
||||
{
|
||||
bookmark.Page = pagePart;
|
||||
bookmark.Title = string.Join(" ", parts, 0, parts.Length - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 如果没有明确的页码,跳过这一行
|
||||
return null;
|
||||
}
|
||||
|
||||
return bookmark;
|
||||
}
|
||||
|
||||
private static bool IsPageNumber(string text)
|
||||
{
|
||||
// 检查是否为数字(可以是罗马数字或阿拉伯数字)
|
||||
return System.Text.RegularExpressions.Regex.IsMatch(text, @"^\d+$") ||
|
||||
System.Text.RegularExpressions.Regex.IsMatch(text, @"^[IVXLCDMivxlcdm]+$");
|
||||
}
|
||||
}
|
||||
}
|
||||
67
ContentFormatter.cs
Normal file
67
ContentFormatter.cs
Normal file
@ -0,0 +1,67 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace SlideCombine
|
||||
{
|
||||
public class ContentFormatter
|
||||
{
|
||||
public static string FormatBookmarks(List<BookmarkItem> bookmarks)
|
||||
{
|
||||
if (bookmarks == null || bookmarks.Count == 0)
|
||||
return string.Empty;
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// 添加tableOfContents标记
|
||||
sb.AppendLine("tableOfContents:");
|
||||
|
||||
foreach (var bookmark in bookmarks)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(bookmark.Title))
|
||||
{
|
||||
// 每行内容顶格
|
||||
sb.Append(bookmark.Title.Trim());
|
||||
|
||||
// 单词和页码之间加"----------"
|
||||
if (!string.IsNullOrEmpty(bookmark.Page))
|
||||
{
|
||||
sb.Append("----------");
|
||||
sb.Append(bookmark.Page);
|
||||
}
|
||||
|
||||
// 页码后加"<br/>"
|
||||
sb.Append("<br/>");
|
||||
}
|
||||
}
|
||||
|
||||
// 添加subject标记
|
||||
sb.AppendLine();
|
||||
sb.Append("subject:");
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public static string CombineFormattedContents(List<string> formattedContents)
|
||||
{
|
||||
if (formattedContents == null || formattedContents.Count == 0)
|
||||
return string.Empty;
|
||||
|
||||
var combined = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < formattedContents.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
// 在不同文件内容之间用"<>"分隔
|
||||
combined.AppendLine();
|
||||
combined.AppendLine("<>");
|
||||
combined.AppendLine();
|
||||
}
|
||||
|
||||
combined.Append(formattedContents[i]);
|
||||
}
|
||||
|
||||
return combined.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
137
FileMerger.cs
Normal file
137
FileMerger.cs
Normal file
@ -0,0 +1,137 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace SlideCombine
|
||||
{
|
||||
public class ProcessResult
|
||||
{
|
||||
public string BaseFileName { get; set; }
|
||||
public List<string> SourceFiles { get; set; }
|
||||
public string OutputContent { get; set; }
|
||||
public bool Success { get; set; }
|
||||
public string ErrorMessage { get; set; }
|
||||
}
|
||||
|
||||
public class FileMerger
|
||||
{
|
||||
public static List<ProcessResult> ProcessAllFolders(string pdfRootPath, string txtOutputPath)
|
||||
{
|
||||
var results = new List<ProcessResult>();
|
||||
|
||||
try
|
||||
{
|
||||
// 获取所有包含FreePic2Pdf_bkmk文件的文件夹
|
||||
var bkmkFiles = Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories);
|
||||
|
||||
if (bkmkFiles.Length == 0)
|
||||
{
|
||||
throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 文件");
|
||||
}
|
||||
|
||||
// 按基础文件名分组(取文件夹名称的空格前缀)
|
||||
var fileGroups = new Dictionary<string, List<string>>();
|
||||
|
||||
foreach (var bkmkFile in bkmkFiles)
|
||||
{
|
||||
var folderName = Path.GetDirectoryName(bkmkFile);
|
||||
var folderNameOnly = new DirectoryInfo(folderName).Name;
|
||||
|
||||
// 获取空格前的基础名称
|
||||
var baseName = GetBaseFileName(folderNameOnly);
|
||||
|
||||
if (!fileGroups.ContainsKey(baseName))
|
||||
{
|
||||
fileGroups[baseName] = new List<string>();
|
||||
}
|
||||
|
||||
fileGroups[baseName].Add(bkmkFile);
|
||||
}
|
||||
|
||||
// 处理每个分组
|
||||
foreach (var group in fileGroups)
|
||||
{
|
||||
var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList());
|
||||
results.Add(result);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
var errorResult = new ProcessResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = ex.Message
|
||||
};
|
||||
results.Add(errorResult);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static string GetBaseFileName(string folderName)
|
||||
{
|
||||
// 获取空格前的部分作为基础名称
|
||||
var spaceIndex = folderName.IndexOf(' ');
|
||||
return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName;
|
||||
}
|
||||
|
||||
private static ProcessResult ProcessFileGroup(string baseName, List<string> bkmkFiles)
|
||||
{
|
||||
var result = new ProcessResult
|
||||
{
|
||||
BaseFileName = baseName,
|
||||
SourceFiles = bkmkFiles,
|
||||
Success = true
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var allFormattedContents = new List<string>();
|
||||
|
||||
foreach (var bkmkFile in bkmkFiles)
|
||||
{
|
||||
// 提取书签
|
||||
var bookmarks = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
|
||||
|
||||
// 格式化内容
|
||||
var formattedContent = ContentFormatter.FormatBookmarks(bookmarks);
|
||||
|
||||
allFormattedContents.Add(formattedContent);
|
||||
}
|
||||
|
||||
// 合并所有格式化的内容
|
||||
var combinedContent = ContentFormatter.CombineFormattedContents(allFormattedContents);
|
||||
result.OutputContent = combinedContent;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.Success = false;
|
||||
result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void SaveResults(List<ProcessResult> results, string outputPath)
|
||||
{
|
||||
if (!Directory.Exists(outputPath))
|
||||
{
|
||||
Directory.CreateDirectory(outputPath);
|
||||
}
|
||||
|
||||
foreach (var result in results)
|
||||
{
|
||||
if (result.Success && !string.IsNullOrEmpty(result.OutputContent))
|
||||
{
|
||||
var outputFileName = $"{result.BaseFileName}.txt";
|
||||
var outputFilePath = Path.Combine(outputPath, outputFileName);
|
||||
|
||||
// 使用UTF-8编码保存
|
||||
File.WriteAllText(outputFilePath, result.OutputContent, Encoding.UTF8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
8
Form1.Designer.cs
generated
8
Form1.Designer.cs
generated
@ -37,7 +37,7 @@ namespace SlideCombine
|
||||
// 主窗体设置
|
||||
AutoScaleMode = AutoScaleMode.Font;
|
||||
ClientSize = new Size(600, 400);
|
||||
Text = "WinForms应用模板";
|
||||
Text = "PDF书签合并工具";
|
||||
StartPosition = FormStartPosition.CenterScreen;
|
||||
|
||||
// 创建控件
|
||||
@ -61,7 +61,7 @@ namespace SlideCombine
|
||||
txtLog = new TextBox();
|
||||
|
||||
// 设置源文件夹组
|
||||
grpSourceFolder.Text = "输入文件夹";
|
||||
grpSourceFolder.Text = "PDF文件夹路径";
|
||||
grpSourceFolder.Location = new Point(10, 10);
|
||||
grpSourceFolder.Size = new Size(580, 60);
|
||||
grpSourceFolder.TabStop = false;
|
||||
@ -83,7 +83,7 @@ namespace SlideCombine
|
||||
grpSourceFolder.Controls.Add(btnBrowseSource);
|
||||
|
||||
// 设置输出文件夹组
|
||||
grpOutputFolder.Text = "输出文件夹";
|
||||
grpOutputFolder.Text = "TXT输出路径";
|
||||
grpOutputFolder.Location = new Point(10, 80);
|
||||
grpOutputFolder.Size = new Size(580, 60);
|
||||
grpOutputFolder.TabStop = false;
|
||||
@ -108,7 +108,7 @@ namespace SlideCombine
|
||||
pnlButtons.Location = new Point(10, 150);
|
||||
pnlButtons.Size = new Size(580, 40);
|
||||
|
||||
btnMerge.Text = "执行";
|
||||
btnMerge.Text = "合并书签";
|
||||
btnMerge.Location = new Point(10, 8);
|
||||
btnMerge.Size = new Size(75, 25);
|
||||
btnMerge.Click += new EventHandler(btnMerge_Click);
|
||||
|
||||
84
Form1.cs
84
Form1.cs
@ -1,4 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Drawing;
|
||||
using System.Windows.Forms;
|
||||
|
||||
@ -67,8 +69,86 @@ namespace SlideCombine
|
||||
|
||||
private void btnMerge_Click(object sender, EventArgs e)
|
||||
{
|
||||
MessageBox.Show("HelloWorld", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
|
||||
Log("HelloWorld");
|
||||
try
|
||||
{
|
||||
// 验证输入
|
||||
if (string.IsNullOrWhiteSpace(txtSourcePath.Text) || string.IsNullOrWhiteSpace(txtOutputPath.Text))
|
||||
{
|
||||
MessageBox.Show("请选择PDF文件夹路径和TXT输出路径", "提示", MessageBoxButtons.OK, MessageBoxIcon.Warning);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Directory.Exists(txtSourcePath.Text))
|
||||
{
|
||||
MessageBox.Show("指定的PDF文件夹路径不存在", "错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
return;
|
||||
}
|
||||
|
||||
// 禁用按钮,防止重复点击
|
||||
btnMerge.Enabled = false;
|
||||
btnClear.Enabled = false;
|
||||
|
||||
// 重置进度条和日志
|
||||
progressBar.Value = 0;
|
||||
txtLog.Clear();
|
||||
Log("开始处理PDF书签文件...");
|
||||
|
||||
// 处理文件
|
||||
var results = FileMerger.ProcessAllFolders(txtSourcePath.Text, txtOutputPath.Text);
|
||||
|
||||
// 显示进度
|
||||
progressBar.Value = 50;
|
||||
Log($"找到 {results.Count} 个文件组需要处理");
|
||||
|
||||
// 保存结果
|
||||
FileMerger.SaveResults(results, txtOutputPath.Text);
|
||||
|
||||
progressBar.Value = 100;
|
||||
|
||||
// 统计成功和失败的数量
|
||||
int successCount = 0;
|
||||
int failCount = 0;
|
||||
var sb = new StringBuilder();
|
||||
|
||||
foreach (var result in results)
|
||||
{
|
||||
if (result.Success)
|
||||
{
|
||||
successCount++;
|
||||
Log($"✓ 成功处理: {result.BaseFileName} (合并了 {result.SourceFiles.Count} 个文件)");
|
||||
}
|
||||
else
|
||||
{
|
||||
failCount++;
|
||||
Log($"✗ 处理失败: {result.ErrorMessage}");
|
||||
}
|
||||
}
|
||||
|
||||
Log($"处理完成! 成功: {successCount}, 失败: {failCount}");
|
||||
|
||||
if (successCount > 0)
|
||||
{
|
||||
MessageBox.Show($"书签合并完成!\n成功处理 {successCount} 个文件\n输出路径: {txtOutputPath.Text}",
|
||||
"处理完成", MessageBoxButtons.OK, MessageBoxIcon.Information);
|
||||
}
|
||||
else
|
||||
{
|
||||
MessageBox.Show("没有成功处理任何文件,请检查输入路径和文件格式。",
|
||||
"处理失败", MessageBoxButtons.OK, MessageBoxIcon.Warning);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log($"错误: {ex.Message}");
|
||||
MessageBox.Show($"处理过程中发生错误:\n{ex.Message}",
|
||||
"错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// 重新启用按钮
|
||||
btnMerge.Enabled = true;
|
||||
btnClear.Enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
private void Log(string msg)
|
||||
|
||||
@ -17,7 +17,7 @@ namespace SlideCombine
|
||||
{
|
||||
MessageBox.Show(
|
||||
"检测到您的系统未安装 .NET Framework 4.8\n\n" +
|
||||
"Excel合并工具需要此环境才能运行。\n\n" +
|
||||
"PDF书签合并工具需要此环境才能运行。\n\n" +
|
||||
"请前往以下链接下载并安装:\n" +
|
||||
"https://dotnet.microsoft.com/download/dotnet-framework/net48\n\n" +
|
||||
"安装完成后请重新运行本程序。",
|
||||
|
||||
93
README.md
Normal file
93
README.md
Normal file
@ -0,0 +1,93 @@
|
||||
# PDF书签合并工具
|
||||
|
||||
这是一个用于合并PDF书签文件的Windows桌面应用程序,基于C# WinForms开发。
|
||||
|
||||
## 功能特性
|
||||
|
||||
- **自动提取书签**:从指定路径下的`FreePic2Pdf_bkmk`文件中提取书签内容
|
||||
- **智能合并**:按文件名前缀自动分组合并(如"CH-875 1-3"和"CH-875 4-6"合并为"CH-875.txt")
|
||||
- **格式化输出**:按照指定格式输出书签内容
|
||||
- **路径选择**:支持自定义选择PDF源文件夹和TXT输出文件夹
|
||||
- **进度显示**:实时显示处理进度和日志信息
|
||||
|
||||
## 输出格式
|
||||
|
||||
处理后的文件内容格式如下:
|
||||
|
||||
```
|
||||
tableOfContents:
|
||||
书签标题1----------页码1<br/>
|
||||
书签标题2----------页码2<br/>
|
||||
书签标题3----------页码3<br/>
|
||||
subject:
|
||||
<>
|
||||
tableOfContents:
|
||||
书签标题4----------页码4<br/>
|
||||
书签标题5----------页码5<br/>
|
||||
subject:
|
||||
```
|
||||
|
||||
## 编译要求
|
||||
|
||||
- .NET Framework 4.8
|
||||
- Windows操作系统
|
||||
- Visual Studio 2019+ 或 Visual Studio Build Tools
|
||||
|
||||
## 编译步骤
|
||||
|
||||
1. 安装.NET Framework 4.8
|
||||
2. 使用Visual Studio打开`SlideCombine.csproj`
|
||||
3. 选择`Release`配置
|
||||
4. 按F6生成解决方案
|
||||
|
||||
或者使用命令行:
|
||||
```bash
|
||||
msbuild SlideCombine.csproj /p:Configuration=Release
|
||||
```
|
||||
|
||||
## 使用方法
|
||||
|
||||
1. 运行`SlideCombine.exe`
|
||||
2. 选择包含PDF文件夹的源路径
|
||||
3. 选择TXT文件的输出路径
|
||||
4. 点击"合并书签"按钮开始处理
|
||||
5. 等待处理完成,查看结果
|
||||
|
||||
## 文件夹结构示例
|
||||
|
||||
输入文件夹结构:
|
||||
```
|
||||
PDF文件夹/
|
||||
├── CH-875 1-3/
|
||||
│ └── FreePic2Pdf_bkmk
|
||||
├── CH-875 4-6/
|
||||
│ └── FreePic2Pdf_bkmk
|
||||
└── CH-876 1-2/
|
||||
└── FreePic2Pdf_bkmk
|
||||
```
|
||||
|
||||
输出结果:
|
||||
```
|
||||
TXT输出路径/
|
||||
├── CH-875.txt (合并了CH-875 1-3和CH-875 4-6)
|
||||
└── CH-876.txt (来自CH-876 1-2)
|
||||
```
|
||||
|
||||
## 支持的编码
|
||||
|
||||
程序自动检测文件编码:
|
||||
- 优先使用UTF-8
|
||||
- 如果UTF-8失败,自动切换到GBK编码
|
||||
|
||||
## 错误处理
|
||||
|
||||
- 自动检查路径有效性
|
||||
- 提供详细的错误信息
|
||||
- 支持批量处理,单个文件失败不影响其他文件
|
||||
|
||||
## 技术架构
|
||||
|
||||
- `BookmarkExtractor.cs`: 书签提取逻辑
|
||||
- `ContentFormatter.cs`: 内容格式化
|
||||
- `FileMerger.cs`: 文件合并处理
|
||||
- `Form1.cs`: 主界面和用户交互
|
||||
Loading…
x
Reference in New Issue
Block a user