using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace SlideCombine { /// /// Bkmk文件智能排序比较器 /// 按文件夹名称中的数字部分进行排序 /// public class BkmkFileComparer : IComparer { public int Compare(string x, string y) { if (x == null && y == null) return 0; if (x == null) return -1; if (y == null) return 1; // 获取文件夹名称(去掉路径和文件名) var xFolder = Path.GetFileName(Path.GetDirectoryName(x)); var yFolder = Path.GetFileName(Path.GetDirectoryName(y)); // 提取数字部分进行智能排序 var xNumber = ExtractNumberFromFolder(xFolder); var yNumber = ExtractNumberFromFolder(yFolder); // 如果都有数字,按数字大小排序 if (xNumber.HasValue && yNumber.HasValue) { int result = xNumber.Value.CompareTo(yNumber.Value); if (result != 0) return result; } // 如果只有一方有数字,有数字的排前面 else if (xNumber.HasValue) { return -1; } else if (yNumber.HasValue) { return 1; } // 如果都没有数字或数字相同,按完整字符串排序 return string.Compare(x, y, StringComparison.OrdinalIgnoreCase); } private int? ExtractNumberFromFolder(string folderName) { // 使用正则表达式提取文件夹名称中的数字部分 // 支持格式:CH-875 1-3, CH-875 4-6, CH-875 10-12, Volume 2, Part 1等 var match = Regex.Match(folderName, @"(?:[\w-]+\s+)?(\d+)", RegexOptions.IgnoreCase); if (match.Success && match.Groups.Count > 1) { string numberStr = match.Groups[1].Value; if (int.TryParse(numberStr, out int number)) { return number; } } return null; } } public class ProcessResult { public string BaseFileName { get; set; } public List SourceFiles { get; set; } public string OutputContent { get; set; } public bool Success { get; set; } public string ErrorMessage { get; set; } public List MetadataDocuments { get; set; } } public class FileMerger { public static List ProcessAllFolders(string pdfRootPath, string txtSourcePath, string txtOutputPath) { var results = new List(); try { // 获取所有包含FreePic2Pdf_bkmk文件的文件夹(支持无扩展名和.txt扩展名) var bkmkFiles = new List(); bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories)); bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk.txt", SearchOption.AllDirectories)); if (bkmkFiles.Count == 0) { throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 或 FreePic2Pdf_bkmk.txt 文件"); } // 检查TXT源路径是否存在 if (!Directory.Exists(txtSourcePath)) { throw new Exception($"TXT源文件路径不存在: {txtSourcePath}"); } // 按基础文件名分组(取文件夹名称的空格前缀) var fileGroups = new Dictionary>(); foreach (var bkmkFile in bkmkFiles) { var folderName = Path.GetDirectoryName(bkmkFile); var folderNameOnly = new DirectoryInfo(folderName).Name; // 获取空格前的基础名称 var baseName = GetBaseFileName(folderNameOnly); if (!fileGroups.ContainsKey(baseName)) { fileGroups[baseName] = new List(); } fileGroups[baseName].Add(bkmkFile); } // 处理每个分组 foreach (var group in fileGroups) { var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f, new BkmkFileComparer()).ToList(), txtSourcePath); results.Add(result); } } catch (Exception ex) { var errorResult = new ProcessResult { Success = false, ErrorMessage = ex.Message }; results.Add(errorResult); } return results; } private static string GetBaseFileName(string folderName) { // 获取空格前的部分作为基础名称 var spaceIndex = folderName.IndexOf(' '); return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName; } private static ProcessResult ProcessFileGroup(string baseName, List bkmkFiles, string txtSourcePath) { var result = new ProcessResult { BaseFileName = baseName, SourceFiles = bkmkFiles, Success = true, MetadataDocuments = new List() }; try { foreach (var bkmkFile in bkmkFiles) { // 获取对应的TXT文件路径 var txtFile = GetCorrespondingTxtFile(bkmkFile, txtSourcePath); // 创建元数据文档 var metadata = CreateMetadataFromFiles(txtFile, bkmkFile); if (metadata != null) { result.MetadataDocuments.Add(metadata); } } // 合并所有元数据文档 var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments); result.OutputContent = combinedContent; } catch (Exception ex) { result.Success = false; result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}"; } return result; } private static string GetCorrespondingTxtFile(string bkmkFile, string txtSourcePath) { var directory = Path.GetDirectoryName(bkmkFile); var folderName = new DirectoryInfo(directory).Name; // 在指定的TXT源路径中查找对应的文件 var txtFile = Path.Combine(txtSourcePath, $"{folderName}.txt"); return File.Exists(txtFile) ? txtFile : null; } private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile) { var metadata = new DocumentMetadata(); if (File.Exists(txtFile)) { // 从TXT文件读取元数据 ReadMetadataFromTxt(txtFile, metadata); } // 从bkmk文件提取书签 if (File.Exists(bkmkFile)) { metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile); } return metadata; } private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata) { try { string[] lines; // 在Windows上使用GB2312编码读取,正确处理中文字符 try { lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GB2312")); } catch { // 如果GB2312不可用,使用系统默认编码 lines = File.ReadAllLines(txtFile, Encoding.Default); } foreach (var line in lines) { var parts = line.Split(new[] { ':' }, 2); if (parts.Length == 2) { var key = parts[0].Trim(); var value = parts[1].Trim(); switch (key) { case "title": metadata.Title = value; break; case "Other titles": metadata.OtherTitles = value; break; case "Volume": metadata.Volume = value; break; case "ISBN": metadata.ISBN = value; break; case "creator": metadata.Creator = value; break; case "contributor": metadata.Contributor = value; break; case "issuedDate": metadata.IssuedDate = value; break; case "publisher": metadata.Publisher = value; break; case "place": metadata.Place = value; break; case "Classification number": metadata.ClassificationNumber = value; break; case "page": metadata.Page = value; break; case "subject": metadata.Subject = value; break; case "date": metadata.Date = value; break; case "spatial": metadata.Spatial = value; break; case "Other ISBN": metadata.OtherISBN = value; break; case "Other time": metadata.OtherTime = value; break; case "url": metadata.Url = value; break; } } } } catch (Exception ex) { throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}"); } } public static void SaveResults(List results, string outputPath) { if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } foreach (var result in results) { if (result.Success && !string.IsNullOrEmpty(result.OutputContent)) { var outputFileName = $"{result.BaseFileName}.txt"; var outputFilePath = Path.Combine(outputPath, outputFileName); // 使用UTF-8 with BOM保存,支持特殊字符 var utf8WithBom = new UTF8Encoding(true); File.WriteAllText(outputFilePath, result.OutputContent, utf8WithBom); } } } } }