Files
openarchival/OpenArchival.DataAccess/Providers/ArtifactGroupingProvider.cs

536 lines
22 KiB
C#

using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using System.Diagnostics.CodeAnalysis;
namespace OpenArchival.DataAccess;
public class ArtifactGroupingProvider : IArtifactGroupingProvider
{
private readonly IDbContextFactory<ApplicationDbContext> _context;
private readonly ILogger<ArtifactGroupingProvider> _logger;
[SetsRequiredMembers]
public ArtifactGroupingProvider(IDbContextFactory<ApplicationDbContext> context, ILogger<ArtifactGroupingProvider> logger)
{
_context = context;
_logger = logger;
}
public async Task<ArtifactGrouping?> GetGroupingAsync(int id)
{
await using var context = await _context.CreateDbContextAsync();
return await context.ArtifactGroupings
.Include(g => g.Category)
.Include(g => g.IdentifierFields)
.Include(g => g.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.StorageLocation)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Files)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Tags)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.ListedNames)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Defects)
.Where(g => g.Id == id)
.FirstOrDefaultAsync();
}
public async Task<ArtifactGrouping?> GetGroupingAsync(string artifactGroupingIdentifier)
{
await using var context = await _context.CreateDbContextAsync();
return await context.ArtifactGroupings
.Include(g => g.Category)
.Include(g => g.IdentifierFields)
.Include(g => g.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.StorageLocation)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Files)
.Include(g=> g.ChildArtifactEntries)
.ThenInclude(e => e.Tags)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.ListedNames)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Defects)
.Where(g => g.ArtifactGroupingIdentifier == artifactGroupingIdentifier)
.FirstOrDefaultAsync();
}
public async Task CreateGroupingAsync(ArtifactGrouping grouping)
{
await using var context = await _context.CreateDbContextAsync();
// Attach the Category to the context. If it has a key, it will be tracked.
context.Attach(grouping.Category);
var processedTypes = new Dictionary<string, ArtifactType>();
// Helper function to get a de-duplicated type
async Task<ArtifactType> GetUniqueTypeAsync(ArtifactType typeToProcess)
{
// If the type is null or has no name, do nothing.
if (string.IsNullOrEmpty(typeToProcess?.Name))
{
return typeToProcess;
}
// A. First, check our local cache for the type.
if (processedTypes.TryGetValue(typeToProcess.Name, out var uniqueType))
{
// Found it in the cache! Return the single instance we're tracking.
return uniqueType;
}
// B. If not in the cache, check the database.
var dbType = await context.ArtifactTypes.FirstOrDefaultAsync(t => t.Name == typeToProcess.Name);
if (dbType != null)
{
// Found it in the database. Add it to our cache for next time.
processedTypes[dbType.Name] = dbType;
return dbType;
}
// C. It's a brand new type. Add the new instance to our cache.
processedTypes[typeToProcess.Name] = typeToProcess;
return typeToProcess;
}
// 2. De-duplicate the main grouping's type
grouping.Type = await GetUniqueTypeAsync(grouping.Type);
// Iterate through all child entries to handle their related entities.
foreach (var entry in grouping.ChildArtifactEntries)
{
// Handle Artifact Types
// Check if the type exists in the database.
var existingType = await GetUniqueTypeAsync(entry.Type);
entry.Type = existingType;
// Handle Storage Location
// Check if the storage location exists in the database.
var existingLocation = await context.ArtifactStorageLocations.FirstOrDefaultAsync(l => l.Location == entry.StorageLocation.Location);
if (existingLocation != null)
{
// If it exists, replace the disconnected object with the tracked one.
entry.StorageLocation = existingLocation;
}
// Handle Tags
// Create a temporary list to hold the managed tag entities.
var managedTags = new List<ArtifactEntryTag>();
foreach (var tag in entry.Tags)
{
// Attempt to find the tag in the database.
var existingTag = await context.ArtifactEntryTags.FirstOrDefaultAsync(t => t.Name == tag.Name);
if (existingTag != null)
{
// The tag already exists. Use the tracked instance.
managedTags.Add(existingTag);
}
else
{
// The tag is new. Add it to the managed list.
managedTags.Add(tag);
}
}
// Replace the disconnected tag objects on the entry with the managed ones.
entry.Tags = managedTags;
// Handle Listed Names
// Create a temporary list to hold the managed name entities.
var managedNames = new List<ListedName>();
foreach (var name in entry.ListedNames)
{
// Attempt to find the listed name in the database.
var existingName = await context.ArtifactAssociatedNames.FirstOrDefaultAsync(n => n.Value == name.Value);
if (existingName != null)
{
// The name already exists. Use the tracked instance.
managedNames.Add(existingName);
}
else
{
// The name is new. Add it to the managed list.
managedNames.Add(name);
}
}
// Replace the disconnected name objects on the entry with the managed ones.
entry.ListedNames = managedNames;
// Handle Defects
// Create a temporary list to hold the managed defect entities.
var managedDefects = new List<ArtifactDefect>();
foreach (var defect in entry.Defects)
{
// Attempt to find the defect in the database.
var existingDefect = await context.ArtifactDefects.FirstOrDefaultAsync(d => d.Description == defect.Description);
if (existingDefect != null)
{
// The defect already exists. Use the tracked instance.
managedDefects.Add(existingDefect);
}
else
{
// The defect is new. Add it to the managed list.
managedDefects.Add(defect);
}
}
// Replace the disconnected defect objects on the entry with the managed ones.
entry.Defects = managedDefects;
// Handle file paths. This is the original logic you provided.
var managedFilePaths = new List<FilePathListing>();
foreach (var filepath in entry.Files)
{
// Attempt to find the file path in the database.
var existingFilePath = await context.ArtifactFilePaths.FirstOrDefaultAsync(f => f.Path == filepath.Path);
if (existingFilePath != null)
{
// The file path already exists. Use the tracked instance.
managedFilePaths.Add(existingFilePath);
}
else
{
// The file path is new. Add it to the managed list.
managedFilePaths.Add(filepath);
}
}
// Replace the disconnected file path objects on the entry with the managed ones.
entry.Files = managedFilePaths;
}
// Concatinate all of the text to be searchable by postgres
grouping.GenerateSearchIndex();
// Add the new grouping and save changes.
//context.ArtifactGroupings.Add(grouping);
context.ChangeTracker.TrackGraph(grouping, node =>
{
// If the entity's key is set, EF should treat it as an existing, unchanged entity.
if (node.Entry.IsKeySet)
{
node.Entry.State = EntityState.Unchanged;
}
// Otherwise, it's a new entity that needs to be inserted.
else
{
node.Entry.State = EntityState.Added;
}
});
await context.SaveChangesAsync();
}
public async Task UpdateGroupingAsync(ArtifactGrouping updatedGrouping)
{
// The DbContext is provided externally, so we will use it as is.
// Assuming you have an instance available, e.g., via a constructor or method parameter.
await using var context = await _context.CreateDbContextAsync();
// 1. Retrieve the existing grouping object from the database, eagerly loading all related data.
// This is crucial for correctly handling all relationships.
var existingGrouping = await context.ArtifactGroupings
.Include(g => g.Category)
.Include(g => g.IdentifierFields)
.Include(g => g.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.StorageLocation)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Files)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Tags)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.ListedNames)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Defects)
.Where(g => g.Id == updatedGrouping.Id)
.FirstOrDefaultAsync();
if (existingGrouping == null)
{
// The grouping does not exist. You may want to throw an exception or handle this case.
return;
}
// 2. Manually copy over primitive properties.
existingGrouping.Title = updatedGrouping.Title;
existingGrouping.Description = updatedGrouping.Description;
existingGrouping.IsPublicallyVisible = updatedGrouping.IsPublicallyVisible;
existingGrouping.IdentifierFields = updatedGrouping.IdentifierFields;
// Handle one-to-many relationships (Type, Category).
// Find the existing related entity and attach it to the tracked graph.
var existingGroupingType = await context.ArtifactTypes.FirstOrDefaultAsync(t => t.Name == updatedGrouping.Type.Name);
if (existingGroupingType != null)
{
existingGrouping.Type = existingGroupingType;
}
else
{
existingGrouping.Type = updatedGrouping.Type;
}
// Attach the category as specified
if (existingGrouping.Category.Name != updatedGrouping.Category.Name)
{
existingGrouping.Category = updatedGrouping.Category;
context.Add(existingGrouping.Category);
}
// Update top-level properties.
existingGrouping.Title = updatedGrouping.Title;
existingGrouping.IsPublicallyVisible = updatedGrouping.IsPublicallyVisible;
existingGrouping.Description = updatedGrouping.Description;
// 3. Synchronize the ChildArtifactEntries collection.
// First, remove any entries that were deleted in the DTO.
var updatedEntryIds = updatedGrouping.ChildArtifactEntries.Select(e => e.Id).ToList();
var entriesToRemove = existingGrouping.ChildArtifactEntries
.Where(e => !updatedEntryIds.Contains(e.Id))
.ToList();
foreach (var entryToRemove in entriesToRemove)
{
existingGrouping.ChildArtifactEntries.Remove(entryToRemove);
}
// Now, loop through the updated entries to handle updates and additions.
foreach (var updatedEntry in updatedGrouping.ChildArtifactEntries)
{
// FIRST, de-duplicate all related entities on the incoming entry.
var existingEntry = existingGrouping.ChildArtifactEntries
.FirstOrDefault(e => e.Id == updatedEntry.Id);
await DeDuplicateEntryRelationsAsync(context, updatedEntry);
if (existingEntry != null)
{
// The entry exists, so manually update its properties.
existingEntry.Title = updatedEntry.Title;
existingEntry.Description = updatedEntry.Description;
existingEntry.ArtifactNumber = updatedEntry.ArtifactNumber;
existingEntry.IsPubliclyVisible = updatedEntry.IsPubliclyVisible;
existingEntry.AssociatedDates = updatedEntry.AssociatedDates;
existingEntry.FileTextContent = updatedEntry.FileTextContent;
existingEntry.Files = updatedEntry.Files;
// The relations on updatedEntry are already de-duplicated, so just assign them.
existingEntry.StorageLocation = updatedEntry.StorageLocation;
existingEntry.Type = updatedEntry.Type;
// For collections, clear the old ones and add the new de-duplicated ones.
existingEntry.Tags.Clear();
updatedEntry.Tags.ForEach(tag => existingEntry.Tags.Add(tag));
existingEntry.ListedNames.Clear();
updatedEntry.ListedNames.ForEach(name => existingEntry.ListedNames.Add(name));
existingEntry.Defects.Clear();
updatedEntry.Defects.ForEach(defect => existingEntry.Defects.Add(defect));
}
else
{
// The entry is new and its children are already de-duplicated, so just add it.
existingGrouping.ChildArtifactEntries.Add(updatedEntry);
}
}
existingGrouping.GenerateSearchIndex();
// 4. Save all changes.
await context.SaveChangesAsync();
}
private async Task DeDuplicateEntryRelationsAsync(ApplicationDbContext context, ArtifactEntry entry)
{
// --- Handle One-to-Many Relationships ---
var existingLocation = await context.ArtifactStorageLocations.FirstOrDefaultAsync(l => l.Location == entry.StorageLocation.Location);
if (existingLocation != null)
{
entry.StorageLocation = existingLocation;
}
var existingType = await context.ArtifactTypes.FirstOrDefaultAsync(t => t.Name == entry.Type.Name);
if (existingType != null)
{
entry.Type = existingType;
}
// --- Handle Many-to-Many Relationships ---
// De-duplicate Tags
var processedTags = new List<ArtifactEntryTag>();
foreach (var tag in entry.Tags)
{
var existingTag = await context.ArtifactEntryTags.FirstOrDefaultAsync(t => t.Name == tag.Name) ?? tag;
processedTags.Add(existingTag);
}
entry.Tags = processedTags;
// De-duplicate ListedNames
var processedNames = new List<ListedName>();
if (entry.ListedNames != null)
{
foreach (var name in entry.ListedNames)
{
var existingName = await context.ArtifactAssociatedNames.FirstOrDefaultAsync(n => n.Value == name.Value) ?? name;
processedNames.Add(existingName);
}
entry.ListedNames = processedNames;
}
// De-duplicate Defects
var processedDefects = new List<ArtifactDefect>();
if (entry.Defects != null)
{
foreach (var defect in entry.Defects)
{
var existingDefect = await context.ArtifactDefects.FirstOrDefaultAsync(d => d.Description == defect.Description) ?? defect;
processedDefects.Add(existingDefect);
}
entry.Defects = processedDefects;
}
if (entry.Files.Any())
{
// 1. Get the IDs from the incoming, untracked file objects.
var inputFileIds = entry.Files.Select(f => f.Id).ToList();
// 2. Fetch the actual, tracked entities from the database.
var trackedFiles = await context.ArtifactFilePaths
.Where(dbFile => inputFileIds.Contains(dbFile.Id))
.ToListAsync();
// 3. Replace the untracked collection with the tracked one.
entry.Files = trackedFiles;
}
}
/// <summary>
/// A helper method to synchronize many-to-many collections.
/// </summary>
private async Task SyncCollectionAsync<TEntity, TKey>(
DbContext context,
ICollection<TEntity> existingItems,
ICollection<TEntity> updatedItems,
Func<TEntity, TKey> keySelector) where TEntity : class
{
var existingKeys = existingItems.Select(keySelector).ToHashSet();
var updatedKeys = updatedItems.Select(keySelector).ToHashSet();
// 1. Remove items that are no longer in the updated collection
var keysToRemove = existingKeys.Except(updatedKeys);
var itemsToRemove = existingItems.Where(item => keysToRemove.Contains(keySelector(item))).ToList();
foreach (var item in itemsToRemove)
{
existingItems.Remove(item);
}
// 2. Identify keys for brand new items
var keysToAdd = updatedKeys.Except(existingKeys).ToList();
if (!keysToAdd.Any())
{
return; // Nothing to add
}
// 3. Batch-fetch all entities from the DB that match the new keys.
// This is the key change to make the query translatable to SQL.
Dictionary<TKey, TEntity> existingDbItemsMap = [];
if (typeof(TEntity) == typeof(ArtifactEntryTag))
{
var tagKeys = keysToAdd.Cast<string>().ToList();
var tags = await context.Set<ArtifactEntryTag>()
.Where(t => tagKeys.Contains(t.Name))
.ToListAsync();
existingDbItemsMap = tags.ToDictionary(t => (TKey)(object)t.Name) as Dictionary<TKey, TEntity>;
}
else if (typeof(TEntity) == typeof(ListedName))
{
var nameKeys = keysToAdd.Cast<string>().ToList();
var names = await context.Set<ListedName>()
.Where(n => nameKeys.Contains(n.Value))
.ToListAsync();
existingDbItemsMap = names.ToDictionary(n => (TKey)(object)n.Value) as Dictionary<TKey, TEntity>;
}
else if (typeof(TEntity) == typeof(ArtifactDefect))
{
var defectKeys = keysToAdd.Cast<string>().ToList();
var defects = await context.Set<ArtifactDefect>()
.Where(d => defectKeys.Contains(d.Description))
.ToListAsync();
existingDbItemsMap = defects.ToDictionary(d => (TKey)(object)d.Description) as Dictionary<TKey, TEntity>;
}
// 4. Add the items, using the tracked entity from the DB if it exists.
foreach (var updatedItem in updatedItems.Where(i => keysToAdd.Contains(keySelector(i))))
{
var key = keySelector(updatedItem);
if (existingDbItemsMap.TryGetValue(key, out var dbItem))
{
// The item already exists in the DB, so add the tracked version.
existingItems.Add(dbItem);
}
else
{
// This is a brand new item, so add the untracked one from the input.
existingItems.Add(updatedItem);
}
}
}
public async Task DeleteGroupingAsync(int id)
{
await using var context = await _context.CreateDbContextAsync();
await context.ArtifactGroupings
.Where(p => p.Id == id)
.ExecuteDeleteAsync();
await context.SaveChangesAsync();
}
public async Task DeleteGroupingAsync(ArtifactGrouping grouping)
{
await using var context = await _context.CreateDbContextAsync();
context.ArtifactGroupings.Remove(grouping);
await context.SaveChangesAsync();
}
public async Task<List<ArtifactGrouping>> GetGroupingsPaged(int pageNumber, int resultsCount)
{
await using var context = await _context.CreateDbContextAsync();
if (pageNumber < 1 || resultsCount < 1)
{
throw new ArgumentOutOfRangeException($"Either page number or number of results was less than or equal to 0. {nameof(pageNumber)}={pageNumber} {nameof(resultsCount)}={resultsCount}");
}
var totalCount = await context.ArtifactGroupings.CountAsync();
var items = await context.ArtifactGroupings
.Include(g => g.ChildArtifactEntries)
.Include(g => g.Category)
.OrderBy(g => g.Id)
.Skip((pageNumber - 1) * resultsCount)
.Take(resultsCount)
.ToListAsync();
return items;
}
public async Task<int> GetTotalCount()
{
await using var context = await _context.CreateDbContextAsync();
return context.ArtifactGroupings.Count();
}
}