SlideCombine/FileMerger.cs

266 lines
9.8 KiB
C#
Raw Permalink Normal View History

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace SlideCombine
{
public class ProcessResult
{
public string BaseFileName { get; set; }
public List<string> SourceFiles { get; set; }
public string OutputContent { get; set; }
public bool Success { get; set; }
public string ErrorMessage { get; set; }
public List<DocumentMetadata> MetadataDocuments { get; set; }
}
public class FileMerger
{
public static List<ProcessResult> ProcessAllFolders(string pdfRootPath, string txtSourcePath, string txtOutputPath)
{
var results = new List<ProcessResult>();
try
{
// 获取所有包含FreePic2Pdf_bkmk文件的文件夹支持无扩展名和.txt扩展名
var bkmkFiles = new List<string>();
bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories));
bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk.txt", SearchOption.AllDirectories));
if (bkmkFiles.Count == 0)
{
throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 或 FreePic2Pdf_bkmk.txt 文件");
}
// 检查TXT源路径是否存在
if (!Directory.Exists(txtSourcePath))
{
throw new Exception($"TXT源文件路径不存在: {txtSourcePath}");
}
// 按基础文件名分组(取文件夹名称的空格前缀)
var fileGroups = new Dictionary<string, List<string>>();
foreach (var bkmkFile in bkmkFiles)
{
var folderName = Path.GetDirectoryName(bkmkFile);
var folderNameOnly = new DirectoryInfo(folderName).Name;
// 获取空格前的基础名称
var baseName = GetBaseFileName(folderNameOnly);
if (!fileGroups.ContainsKey(baseName))
{
fileGroups[baseName] = new List<string>();
}
fileGroups[baseName].Add(bkmkFile);
}
// 处理每个分组
foreach (var group in fileGroups)
{
var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList(), txtSourcePath);
results.Add(result);
}
}
catch (Exception ex)
{
var errorResult = new ProcessResult
{
Success = false,
ErrorMessage = ex.Message
};
results.Add(errorResult);
}
return results;
}
private static string GetBaseFileName(string folderName)
{
// 获取空格前的部分作为基础名称
var spaceIndex = folderName.IndexOf(' ');
return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName;
}
private static ProcessResult ProcessFileGroup(string baseName, List<string> bkmkFiles, string txtSourcePath)
{
var result = new ProcessResult
{
BaseFileName = baseName,
SourceFiles = bkmkFiles,
Success = true,
MetadataDocuments = new List<DocumentMetadata>()
};
try
{
foreach (var bkmkFile in bkmkFiles)
{
// 获取对应的TXT文件路径
var txtFile = GetCorrespondingTxtFile(bkmkFile, txtSourcePath);
// 创建元数据文档
var metadata = CreateMetadataFromFiles(txtFile, bkmkFile);
if (metadata != null)
{
result.MetadataDocuments.Add(metadata);
}
}
// 合并所有元数据文档
var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments);
result.OutputContent = combinedContent;
}
catch (Exception ex)
{
result.Success = false;
result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}";
}
return result;
}
private static string GetCorrespondingTxtFile(string bkmkFile, string txtSourcePath)
{
var directory = Path.GetDirectoryName(bkmkFile);
var folderName = new DirectoryInfo(directory).Name;
// 在指定的TXT源路径中查找对应的文件
var txtFile = Path.Combine(txtSourcePath, $"{folderName}.txt");
return File.Exists(txtFile) ? txtFile : null;
}
private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile)
{
var metadata = new DocumentMetadata();
if (File.Exists(txtFile))
{
// 从TXT文件读取元数据
ReadMetadataFromTxt(txtFile, metadata);
}
// 从bkmk文件提取书签
if (File.Exists(bkmkFile))
{
metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
}
return metadata;
}
private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata)
{
try
{
string[] lines;
// 在Windows上使用GB2312编码读取正确处理中文字符
try
{
lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GB2312"));
}
catch
{
// 如果GB2312不可用使用系统默认编码
lines = File.ReadAllLines(txtFile, Encoding.Default);
}
foreach (var line in lines)
{
var parts = line.Split(new[] { ':' }, 2);
if (parts.Length == 2)
{
var key = parts[0].Trim();
var value = parts[1].Trim();
switch (key)
{
case "title":
metadata.Title = value;
break;
case "Other titles":
metadata.OtherTitles = value;
break;
case "Volume":
metadata.Volume = value;
break;
case "ISBN":
metadata.ISBN = value;
break;
case "creator":
metadata.Creator = value;
break;
case "contributor":
metadata.Contributor = value;
break;
case "issuedDate":
metadata.IssuedDate = value;
break;
case "publisher":
metadata.Publisher = value;
break;
case "place":
metadata.Place = value;
break;
case "Classification number":
metadata.ClassificationNumber = value;
break;
case "page":
metadata.Page = value;
break;
case "subject":
metadata.Subject = value;
break;
case "date":
metadata.Date = value;
break;
case "spatial":
metadata.Spatial = value;
break;
case "Other ISBN":
metadata.OtherISBN = value;
break;
case "Other time":
metadata.OtherTime = value;
break;
case "url":
metadata.Url = value;
break;
}
}
}
}
catch (Exception ex)
{
throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}");
}
}
public static void SaveResults(List<ProcessResult> results, string outputPath)
{
if (!Directory.Exists(outputPath))
{
Directory.CreateDirectory(outputPath);
}
foreach (var result in results)
{
if (result.Success && !string.IsNullOrEmpty(result.OutputContent))
{
var outputFileName = $"{result.BaseFileName}.txt";
var outputFilePath = Path.Combine(outputPath, outputFileName);
// 使用UTF-8 with BOM保存支持特殊字符
var utf8WithBom = new UTF8Encoding(true);
File.WriteAllText(outputFilePath, result.OutputContent, utf8WithBom);
}
}
}
}
}