diff --git a/ContentFormatter.cs b/ContentFormatter.cs index ca68fea..a9cf776 100644 --- a/ContentFormatter.cs +++ b/ContentFormatter.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.Text; +using System.Linq; namespace SlideCombine { @@ -41,6 +42,39 @@ namespace SlideCombine return sb.ToString(); } + public static string FormatMetadataDocument(DocumentMetadata metadata) + { + if (metadata == null) + return string.Empty; + + return metadata.ToFormattedString(); + } + + public static string CombineFormattedMetadataDocuments(List documents) + { + if (documents == null || documents.Count == 0) + return string.Empty; + + var combined = new StringBuilder(); + + for (int i = 0; i < documents.Count; i++) + { + var doc = documents[i]; + + if (i > 0) + { + // 在不同文档内容之间用"<>"分隔 + combined.AppendLine(); + combined.AppendLine("<>"); + combined.AppendLine(); + } + + combined.Append(doc.ToFormattedString()); + } + + return combined.ToString(); + } + public static string CombineFormattedContents(List formattedContents) { if (formattedContents == null || formattedContents.Count == 0) diff --git a/FileMerger.cs b/FileMerger.cs index 14b984e..b1b3c4a 100644 --- a/FileMerger.cs +++ b/FileMerger.cs @@ -13,6 +13,7 @@ namespace SlideCombine public string OutputContent { get; set; } public bool Success { get; set; } public string ErrorMessage { get; set; } + public List MetadataDocuments { get; set; } } public class FileMerger @@ -85,26 +86,28 @@ namespace SlideCombine { BaseFileName = baseName, SourceFiles = bkmkFiles, - Success = true + Success = true, + MetadataDocuments = new List() }; try { - var allFormattedContents = new List(); - foreach (var bkmkFile in bkmkFiles) { - // 提取书签 - var bookmarks = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile); + // 获取对应的TXT文件路径 + var txtFile = GetCorrespondingTxtFile(bkmkFile); - // 格式化内容 - var formattedContent = ContentFormatter.FormatBookmarks(bookmarks); + // 创建元数据文档 + var metadata = CreateMetadataFromFiles(txtFile, bkmkFile); - allFormattedContents.Add(formattedContent); + if (metadata != null) + { + result.MetadataDocuments.Add(metadata); + } } - // 合并所有格式化的内容 - var combinedContent = ContentFormatter.CombineFormattedContents(allFormattedContents); + // 合并所有元数据文档 + var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments); result.OutputContent = combinedContent; } catch (Exception ex) @@ -116,6 +119,122 @@ namespace SlideCombine return result; } + private static string GetCorrespondingTxtFile(string bkmkFile) + { + var directory = Path.GetDirectoryName(bkmkFile); + var folderName = new DirectoryInfo(directory).Name; + + // 在TXT文件夹中查找对应的文件 + var txtDirectory = Path.Combine(Directory.GetParent(directory).Parent.FullName, "TXT"); + var txtFile = Path.Combine(txtDirectory, $"{folderName}.txt"); + + return File.Exists(txtFile) ? txtFile : null; + } + + private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile) + { + var metadata = new DocumentMetadata(); + + if (File.Exists(txtFile)) + { + // 从TXT文件读取元数据 + ReadMetadataFromTxt(txtFile, metadata); + } + + // 从bkmk文件提取书签 + if (File.Exists(bkmkFile)) + { + metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile); + } + + return metadata; + } + + private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata) + { + try + { + string[] lines; + try + { + lines = File.ReadAllLines(txtFile, Encoding.UTF8); + } + catch + { + lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GBK")); + } + + foreach (var line in lines) + { + var parts = line.Split(new[] { ':' }, 2); + if (parts.Length == 2) + { + var key = parts[0].Trim(); + var value = parts[1].Trim(); + + switch (key) + { + case "title": + metadata.Title = value; + break; + case "Other titles": + metadata.OtherTitles = value; + break; + case "Volume": + metadata.Volume = value; + break; + case "ISBN": + metadata.ISBN = value; + break; + case "creator": + metadata.Creator = value; + break; + case "contributor": + metadata.Contributor = value; + break; + case "issuedDate": + metadata.IssuedDate = value; + break; + case "publisher": + metadata.Publisher = value; + break; + case "place": + metadata.Place = value; + break; + case "Classification number": + metadata.ClassificationNumber = value; + break; + case "page": + metadata.Page = value; + break; + case "subject": + metadata.Subject = value; + break; + case "date": + metadata.Date = value; + break; + case "spatial": + metadata.Spatial = value; + break; + case "Other ISBN": + metadata.OtherISBN = value; + break; + case "Other time": + metadata.OtherTime = value; + break; + case "url": + metadata.Url = value; + break; + } + } + } + } + catch (Exception ex) + { + throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}"); + } + } + public static void SaveResults(List results, string outputPath) { if (!Directory.Exists(outputPath)) diff --git a/MetadataModel.cs b/MetadataModel.cs new file mode 100644 index 0000000..655e355 --- /dev/null +++ b/MetadataModel.cs @@ -0,0 +1,134 @@ +using System; +using System.Collections.Generic; + +namespace SlideCombine +{ + public class DocumentMetadata + { + public string Title { get; set; } + public string OtherTitles { get; set; } + public string Volume { get; set; } + public string ISBN { get; set; } + public string Creator { get; set; } + public string Contributor { get; set; } + public string IssuedDate { get; set; } + public string Publisher { get; set; } + public string Place { get; set; } + public string ClassificationNumber { get; set; } + public string Page { get; set; } + public List TableOfContents { get; set; } + public string Subject { get; set; } + public string Date { get; set; } + public string Spatial { get; set; } + public string OtherISBN { get; set; } + public string OtherTime { get; set; } + public string Url { get; set; } + + public DocumentMetadata() + { + TableOfContents = new List(); + } + + public string ToFormattedString() + { + var result = new System.Text.StringBuilder(); + + // Title行 + result.AppendLine($"title:{Title}"); + + // Other titles行(如果有) + if (!string.IsNullOrEmpty(OtherTitles)) + { + result.AppendLine($"Other titles:{OtherTitles}"); + } + + // Volume行 + result.AppendLine($"Volume:{Volume}"); + + // ISBN行 + if (!string.IsNullOrEmpty(ISBN)) + { + result.AppendLine($"ISBN:{ISBN}"); + } + + // Creator行 + if (!string.IsNullOrEmpty(Creator)) + { + result.AppendLine($"creator:{Creator}"); + } + + // Contributor行 + if (!string.IsNullOrEmpty(Contributor)) + { + result.AppendLine($"contributor:{Contributor}"); + } + + // IssuedDate行 + result.AppendLine($"issuedDate:{IssuedDate}"); + + // Publisher行 + result.AppendLine($"publisher:{Publisher}"); + + // Place行 + result.AppendLine($"place:{Place}"); + + // Classification number行 + result.AppendLine($"Classification number:{ClassificationNumber}"); + + // Page行 + result.AppendLine($"page:{Page}"); + + // Table of contents + result.AppendLine("tableOfContents:"); + foreach (var bookmark in TableOfContents) + { + if (!string.IsNullOrEmpty(bookmark.Title)) + { + result.Append(bookmark.Title.Trim()); + if (!string.IsNullOrEmpty(bookmark.Page)) + { + // 注意:使用14个短横线,与需求示例一致 + result.Append("---------------"); + result.Append(bookmark.Page); + } + result.AppendLine("
"); + } + } + + // Subject行 + result.AppendLine($"subject:{Subject}"); + + // Date行 + if (!string.IsNullOrEmpty(Date)) + { + result.AppendLine($"date:{Date}"); + } + + // Spatial行 + if (!string.IsNullOrEmpty(Spatial)) + { + result.AppendLine($"spatial:{Spatial}"); + } + + // Other ISBN行 + if (!string.IsNullOrEmpty(OtherISBN)) + { + result.AppendLine($"Other ISBN:{OtherISBN}"); + } + + // Other time行 + if (!string.IsNullOrEmpty(OtherTime)) + { + result.AppendLine($"Other time:{OtherTime}"); + } + + // URL行 + if (!string.IsNullOrEmpty(Url)) + { + result.AppendLine($"url:{Url}"); + } + + return result.ToString(); + } + } +} \ No newline at end of file