SlideCombine/FileMerger.cs
yuuko e2c894f229 实现完整的元数据格式支持
- 添加MetadataModel.cs支持完整的元数据字段
- 更新FileMerger.cs从TXT文件读取元数据,从bkmk文件读取目录
- 支持所有元数据字段:title, Other titles, Volume, ISBN, creator等
- 修正书签连接符为14个短横线(---------------)
- 添加UTF-8/GBK编码自动检测
- 更新ContentFormatter.cs支持元数据文档合并

现在程序能够:
1. 从TXT文件读取完整的元数据信息
2. 从FreePic2Pdf_bkmk.txt文件提取书签目录
3. 按照需求格式合并输出完整内容

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-24 15:51:08 +08:00

258 lines
9.3 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace SlideCombine
{
public class ProcessResult
{
public string BaseFileName { get; set; }
public List<string> SourceFiles { get; set; }
public string OutputContent { get; set; }
public bool Success { get; set; }
public string ErrorMessage { get; set; }
public List<DocumentMetadata> MetadataDocuments { get; set; }
}
public class FileMerger
{
public static List<ProcessResult> ProcessAllFolders(string pdfRootPath, string txtOutputPath)
{
var results = new List<ProcessResult>();
try
{
// 获取所有包含FreePic2Pdf_bkmk文件的文件夹支持无扩展名和.txt扩展名
var bkmkFiles = new List<string>();
bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories));
bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk.txt", SearchOption.AllDirectories));
if (bkmkFiles.Count == 0)
{
throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 或 FreePic2Pdf_bkmk.txt 文件");
}
// 按基础文件名分组(取文件夹名称的空格前缀)
var fileGroups = new Dictionary<string, List<string>>();
foreach (var bkmkFile in bkmkFiles)
{
var folderName = Path.GetDirectoryName(bkmkFile);
var folderNameOnly = new DirectoryInfo(folderName).Name;
// 获取空格前的基础名称
var baseName = GetBaseFileName(folderNameOnly);
if (!fileGroups.ContainsKey(baseName))
{
fileGroups[baseName] = new List<string>();
}
fileGroups[baseName].Add(bkmkFile);
}
// 处理每个分组
foreach (var group in fileGroups)
{
var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList());
results.Add(result);
}
}
catch (Exception ex)
{
var errorResult = new ProcessResult
{
Success = false,
ErrorMessage = ex.Message
};
results.Add(errorResult);
}
return results;
}
private static string GetBaseFileName(string folderName)
{
// 获取空格前的部分作为基础名称
var spaceIndex = folderName.IndexOf(' ');
return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName;
}
private static ProcessResult ProcessFileGroup(string baseName, List<string> bkmkFiles)
{
var result = new ProcessResult
{
BaseFileName = baseName,
SourceFiles = bkmkFiles,
Success = true,
MetadataDocuments = new List<DocumentMetadata>()
};
try
{
foreach (var bkmkFile in bkmkFiles)
{
// 获取对应的TXT文件路径
var txtFile = GetCorrespondingTxtFile(bkmkFile);
// 创建元数据文档
var metadata = CreateMetadataFromFiles(txtFile, bkmkFile);
if (metadata != null)
{
result.MetadataDocuments.Add(metadata);
}
}
// 合并所有元数据文档
var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments);
result.OutputContent = combinedContent;
}
catch (Exception ex)
{
result.Success = false;
result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}";
}
return result;
}
private static string GetCorrespondingTxtFile(string bkmkFile)
{
var directory = Path.GetDirectoryName(bkmkFile);
var folderName = new DirectoryInfo(directory).Name;
// 在TXT文件夹中查找对应的文件
var txtDirectory = Path.Combine(Directory.GetParent(directory).Parent.FullName, "TXT");
var txtFile = Path.Combine(txtDirectory, $"{folderName}.txt");
return File.Exists(txtFile) ? txtFile : null;
}
private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile)
{
var metadata = new DocumentMetadata();
if (File.Exists(txtFile))
{
// 从TXT文件读取元数据
ReadMetadataFromTxt(txtFile, metadata);
}
// 从bkmk文件提取书签
if (File.Exists(bkmkFile))
{
metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
}
return metadata;
}
private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata)
{
try
{
string[] lines;
try
{
lines = File.ReadAllLines(txtFile, Encoding.UTF8);
}
catch
{
lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GBK"));
}
foreach (var line in lines)
{
var parts = line.Split(new[] { ':' }, 2);
if (parts.Length == 2)
{
var key = parts[0].Trim();
var value = parts[1].Trim();
switch (key)
{
case "title":
metadata.Title = value;
break;
case "Other titles":
metadata.OtherTitles = value;
break;
case "Volume":
metadata.Volume = value;
break;
case "ISBN":
metadata.ISBN = value;
break;
case "creator":
metadata.Creator = value;
break;
case "contributor":
metadata.Contributor = value;
break;
case "issuedDate":
metadata.IssuedDate = value;
break;
case "publisher":
metadata.Publisher = value;
break;
case "place":
metadata.Place = value;
break;
case "Classification number":
metadata.ClassificationNumber = value;
break;
case "page":
metadata.Page = value;
break;
case "subject":
metadata.Subject = value;
break;
case "date":
metadata.Date = value;
break;
case "spatial":
metadata.Spatial = value;
break;
case "Other ISBN":
metadata.OtherISBN = value;
break;
case "Other time":
metadata.OtherTime = value;
break;
case "url":
metadata.Url = value;
break;
}
}
}
}
catch (Exception ex)
{
throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}");
}
}
public static void SaveResults(List<ProcessResult> results, string outputPath)
{
if (!Directory.Exists(outputPath))
{
Directory.CreateDirectory(outputPath);
}
foreach (var result in results)
{
if (result.Success && !string.IsNullOrEmpty(result.OutputContent))
{
var outputFileName = $"{result.BaseFileName}.txt";
var outputFilePath = Path.Combine(outputPath, outputFileName);
// 使用UTF-8编码保存
File.WriteAllText(outputFilePath, result.OutputContent, Encoding.UTF8);
}
}
}
}
}