using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; namespace SlideCombine { public class ProcessResult { public string BaseFileName { get; set; } public List SourceFiles { get; set; } public string OutputContent { get; set; } public bool Success { get; set; } public string ErrorMessage { get; set; } public List MetadataDocuments { get; set; } } public class FileMerger { public static List ProcessAllFolders(string pdfRootPath, string txtSourcePath, string txtOutputPath) { var results = new List(); try { // 获取所有包含FreePic2Pdf_bkmk文件的文件夹(支持无扩展名和.txt扩展名) var bkmkFiles = new List(); bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories)); bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk.txt", SearchOption.AllDirectories)); if (bkmkFiles.Count == 0) { throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 或 FreePic2Pdf_bkmk.txt 文件"); } // 检查TXT源路径是否存在 if (!Directory.Exists(txtSourcePath)) { throw new Exception($"TXT源文件路径不存在: {txtSourcePath}"); } // 按基础文件名分组(取文件夹名称的空格前缀) var fileGroups = new Dictionary>(); foreach (var bkmkFile in bkmkFiles) { var folderName = Path.GetDirectoryName(bkmkFile); var folderNameOnly = new DirectoryInfo(folderName).Name; // 获取空格前的基础名称 var baseName = GetBaseFileName(folderNameOnly); if (!fileGroups.ContainsKey(baseName)) { fileGroups[baseName] = new List(); } fileGroups[baseName].Add(bkmkFile); } // 处理每个分组 foreach (var group in fileGroups) { var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList(), txtSourcePath); results.Add(result); } } catch (Exception ex) { var errorResult = new ProcessResult { Success = false, ErrorMessage = ex.Message }; results.Add(errorResult); } return results; } private static string GetBaseFileName(string folderName) { // 获取空格前的部分作为基础名称 var spaceIndex = folderName.IndexOf(' '); return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName; } private static ProcessResult ProcessFileGroup(string baseName, List bkmkFiles, string txtSourcePath) { var result = new ProcessResult { BaseFileName = baseName, SourceFiles = bkmkFiles, Success = true, MetadataDocuments = new List() }; try { foreach (var bkmkFile in bkmkFiles) { // 获取对应的TXT文件路径 var txtFile = GetCorrespondingTxtFile(bkmkFile, txtSourcePath); // 创建元数据文档 var metadata = CreateMetadataFromFiles(txtFile, bkmkFile); if (metadata != null) { result.MetadataDocuments.Add(metadata); } } // 合并所有元数据文档 var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments); result.OutputContent = combinedContent; } catch (Exception ex) { result.Success = false; result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}"; } return result; } private static string GetCorrespondingTxtFile(string bkmkFile, string txtSourcePath) { var directory = Path.GetDirectoryName(bkmkFile); var folderName = new DirectoryInfo(directory).Name; // 在指定的TXT源路径中查找对应的文件 var txtFile = Path.Combine(txtSourcePath, $"{folderName}.txt"); return File.Exists(txtFile) ? txtFile : null; } private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile) { var metadata = new DocumentMetadata(); if (File.Exists(txtFile)) { // 从TXT文件读取元数据 ReadMetadataFromTxt(txtFile, metadata); } // 从bkmk文件提取书签 if (File.Exists(bkmkFile)) { metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile); } return metadata; } private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata) { try { string[] lines; // 在Windows上使用GB2312编码读取,正确处理中文字符 try { lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GB2312")); } catch { // 如果GB2312不可用,使用系统默认编码 lines = File.ReadAllLines(txtFile, Encoding.Default); } foreach (var line in lines) { var parts = line.Split(new[] { ':' }, 2); if (parts.Length == 2) { var key = parts[0].Trim(); var value = parts[1].Trim(); switch (key) { case "title": metadata.Title = value; break; case "Other titles": metadata.OtherTitles = value; break; case "Volume": metadata.Volume = value; break; case "ISBN": metadata.ISBN = value; break; case "creator": metadata.Creator = value; break; case "contributor": metadata.Contributor = value; break; case "issuedDate": metadata.IssuedDate = value; break; case "publisher": metadata.Publisher = value; break; case "place": metadata.Place = value; break; case "Classification number": metadata.ClassificationNumber = value; break; case "page": metadata.Page = value; break; case "subject": metadata.Subject = value; break; case "date": metadata.Date = value; break; case "spatial": metadata.Spatial = value; break; case "Other ISBN": metadata.OtherISBN = value; break; case "Other time": metadata.OtherTime = value; break; case "url": metadata.Url = value; break; } } } } catch (Exception ex) { throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}"); } } public static void SaveResults(List results, string outputPath) { if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } foreach (var result in results) { if (result.Success && !string.IsNullOrEmpty(result.OutputContent)) { var outputFileName = $"{result.BaseFileName}.txt"; var outputFilePath = Path.Combine(outputPath, outputFileName); // 使用UTF-8 with BOM保存,支持特殊字符 var utf8WithBom = new UTF8Encoding(true); File.WriteAllText(outputFilePath, result.OutputContent, utf8WithBom); } } } } }