实现完整的元数据格式支持

- 添加MetadataModel.cs支持完整的元数据字段
- 更新FileMerger.cs从TXT文件读取元数据,从bkmk文件读取目录
- 支持所有元数据字段:title, Other titles, Volume, ISBN, creator等
- 修正书签连接符为14个短横线(---------------)
- 添加UTF-8/GBK编码自动检测
- 更新ContentFormatter.cs支持元数据文档合并

现在程序能够:
1. 从TXT文件读取完整的元数据信息
2. 从FreePic2Pdf_bkmk.txt文件提取书签目录
3. 按照需求格式合并输出完整内容

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
yuuko 2025-11-24 15:51:08 +08:00
parent 0332dcd715
commit e2c894f229
3 changed files with 297 additions and 10 deletions

View File

@ -1,5 +1,6 @@
using System.Collections.Generic;
using System.Text;
using System.Linq;
namespace SlideCombine
{
@ -41,6 +42,39 @@ namespace SlideCombine
return sb.ToString();
}
public static string FormatMetadataDocument(DocumentMetadata metadata)
{
if (metadata == null)
return string.Empty;
return metadata.ToFormattedString();
}
public static string CombineFormattedMetadataDocuments(List<DocumentMetadata> documents)
{
if (documents == null || documents.Count == 0)
return string.Empty;
var combined = new StringBuilder();
for (int i = 0; i < documents.Count; i++)
{
var doc = documents[i];
if (i > 0)
{
// 在不同文档内容之间用"<>"分隔
combined.AppendLine();
combined.AppendLine("<>");
combined.AppendLine();
}
combined.Append(doc.ToFormattedString());
}
return combined.ToString();
}
public static string CombineFormattedContents(List<string> formattedContents)
{
if (formattedContents == null || formattedContents.Count == 0)

View File

@ -13,6 +13,7 @@ namespace SlideCombine
public string OutputContent { get; set; }
public bool Success { get; set; }
public string ErrorMessage { get; set; }
public List<DocumentMetadata> MetadataDocuments { get; set; }
}
public class FileMerger
@ -85,26 +86,28 @@ namespace SlideCombine
{
BaseFileName = baseName,
SourceFiles = bkmkFiles,
Success = true
Success = true,
MetadataDocuments = new List<DocumentMetadata>()
};
try
{
var allFormattedContents = new List<string>();
foreach (var bkmkFile in bkmkFiles)
{
// 提取书签
var bookmarks = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
// 获取对应的TXT文件路径
var txtFile = GetCorrespondingTxtFile(bkmkFile);
// 格式化内容
var formattedContent = ContentFormatter.FormatBookmarks(bookmarks);
// 创建元数据文档
var metadata = CreateMetadataFromFiles(txtFile, bkmkFile);
allFormattedContents.Add(formattedContent);
if (metadata != null)
{
result.MetadataDocuments.Add(metadata);
}
}
// 合并所有格式化的内容
var combinedContent = ContentFormatter.CombineFormattedContents(allFormattedContents);
// 合并所有元数据文档
var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments);
result.OutputContent = combinedContent;
}
catch (Exception ex)
@ -116,6 +119,122 @@ namespace SlideCombine
return result;
}
private static string GetCorrespondingTxtFile(string bkmkFile)
{
var directory = Path.GetDirectoryName(bkmkFile);
var folderName = new DirectoryInfo(directory).Name;
// 在TXT文件夹中查找对应的文件
var txtDirectory = Path.Combine(Directory.GetParent(directory).Parent.FullName, "TXT");
var txtFile = Path.Combine(txtDirectory, $"{folderName}.txt");
return File.Exists(txtFile) ? txtFile : null;
}
private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile)
{
var metadata = new DocumentMetadata();
if (File.Exists(txtFile))
{
// 从TXT文件读取元数据
ReadMetadataFromTxt(txtFile, metadata);
}
// 从bkmk文件提取书签
if (File.Exists(bkmkFile))
{
metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
}
return metadata;
}
private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata)
{
try
{
string[] lines;
try
{
lines = File.ReadAllLines(txtFile, Encoding.UTF8);
}
catch
{
lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GBK"));
}
foreach (var line in lines)
{
var parts = line.Split(new[] { ':' }, 2);
if (parts.Length == 2)
{
var key = parts[0].Trim();
var value = parts[1].Trim();
switch (key)
{
case "title":
metadata.Title = value;
break;
case "Other titles":
metadata.OtherTitles = value;
break;
case "Volume":
metadata.Volume = value;
break;
case "ISBN":
metadata.ISBN = value;
break;
case "creator":
metadata.Creator = value;
break;
case "contributor":
metadata.Contributor = value;
break;
case "issuedDate":
metadata.IssuedDate = value;
break;
case "publisher":
metadata.Publisher = value;
break;
case "place":
metadata.Place = value;
break;
case "Classification number":
metadata.ClassificationNumber = value;
break;
case "page":
metadata.Page = value;
break;
case "subject":
metadata.Subject = value;
break;
case "date":
metadata.Date = value;
break;
case "spatial":
metadata.Spatial = value;
break;
case "Other ISBN":
metadata.OtherISBN = value;
break;
case "Other time":
metadata.OtherTime = value;
break;
case "url":
metadata.Url = value;
break;
}
}
}
}
catch (Exception ex)
{
throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}");
}
}
public static void SaveResults(List<ProcessResult> results, string outputPath)
{
if (!Directory.Exists(outputPath))

134
MetadataModel.cs Normal file
View File

@ -0,0 +1,134 @@
using System;
using System.Collections.Generic;
namespace SlideCombine
{
public class DocumentMetadata
{
public string Title { get; set; }
public string OtherTitles { get; set; }
public string Volume { get; set; }
public string ISBN { get; set; }
public string Creator { get; set; }
public string Contributor { get; set; }
public string IssuedDate { get; set; }
public string Publisher { get; set; }
public string Place { get; set; }
public string ClassificationNumber { get; set; }
public string Page { get; set; }
public List<BookmarkItem> TableOfContents { get; set; }
public string Subject { get; set; }
public string Date { get; set; }
public string Spatial { get; set; }
public string OtherISBN { get; set; }
public string OtherTime { get; set; }
public string Url { get; set; }
public DocumentMetadata()
{
TableOfContents = new List<BookmarkItem>();
}
public string ToFormattedString()
{
var result = new System.Text.StringBuilder();
// Title行
result.AppendLine($"title:{Title}");
// Other titles行如果有
if (!string.IsNullOrEmpty(OtherTitles))
{
result.AppendLine($"Other titles:{OtherTitles}");
}
// Volume行
result.AppendLine($"Volume:{Volume}");
// ISBN行
if (!string.IsNullOrEmpty(ISBN))
{
result.AppendLine($"ISBN:{ISBN}");
}
// Creator行
if (!string.IsNullOrEmpty(Creator))
{
result.AppendLine($"creator:{Creator}");
}
// Contributor行
if (!string.IsNullOrEmpty(Contributor))
{
result.AppendLine($"contributor:{Contributor}");
}
// IssuedDate行
result.AppendLine($"issuedDate:{IssuedDate}");
// Publisher行
result.AppendLine($"publisher:{Publisher}");
// Place行
result.AppendLine($"place:{Place}");
// Classification number行
result.AppendLine($"Classification number:{ClassificationNumber}");
// Page行
result.AppendLine($"page:{Page}");
// Table of contents
result.AppendLine("tableOfContents:");
foreach (var bookmark in TableOfContents)
{
if (!string.IsNullOrEmpty(bookmark.Title))
{
result.Append(bookmark.Title.Trim());
if (!string.IsNullOrEmpty(bookmark.Page))
{
// 注意使用14个短横线与需求示例一致
result.Append("---------------");
result.Append(bookmark.Page);
}
result.AppendLine("<br/>");
}
}
// Subject行
result.AppendLine($"subject:{Subject}");
// Date行
if (!string.IsNullOrEmpty(Date))
{
result.AppendLine($"date:{Date}");
}
// Spatial行
if (!string.IsNullOrEmpty(Spatial))
{
result.AppendLine($"spatial:{Spatial}");
}
// Other ISBN行
if (!string.IsNullOrEmpty(OtherISBN))
{
result.AppendLine($"Other ISBN:{OtherISBN}");
}
// Other time行
if (!string.IsNullOrEmpty(OtherTime))
{
result.AppendLine($"Other time:{OtherTime}");
}
// URL行
if (!string.IsNullOrEmpty(Url))
{
result.AppendLine($"url:{Url}");
}
return result.ToString();
}
}
}