Files
openarchival/OpenArchival.DataAccess/Providers/ArtifactGroupingProvider.cs

525 lines
21 KiB
C#

using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using System.Diagnostics.CodeAnalysis;
namespace OpenArchival.DataAccess;
public class ArtifactGroupingProvider : IArtifactGroupingProvider
{
private readonly IDbContextFactory<ApplicationDbContext> _context;
private readonly ILogger<ArtifactGroupingProvider> _logger;
[SetsRequiredMembers]
public ArtifactGroupingProvider(IDbContextFactory<ApplicationDbContext> context, ILogger<ArtifactGroupingProvider> logger)
{
_context = context;
_logger = logger;
}
public async Task<ArtifactGrouping?> GetGroupingAsync(int id)
{
await using var context = await _context.CreateDbContextAsync();
return await context.ArtifactGroupings
.Include(g => g.Category)
.Include(g => g.IdentifierFields)
.Include(g => g.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.StorageLocation)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Tags)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.ListedNames)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Defects)
.Include(g => g.ViewCount) // Added
.Include(g => g.IdentifierFields)
.Where(g => g.Id == id)
.FirstOrDefaultAsync();
}
public async Task<ArtifactGrouping?> GetGroupingAsync(string artifactGroupingIdentifier)
{
await using var context = await _context.CreateDbContextAsync();
return await context.ArtifactGroupings
.Include(g => g.Category)
.Include(g => g.IdentifierFields)
.Include(g => g.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.StorageLocation)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Tags)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.ListedNames)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Defects)
.Include(g => g.ViewCount) // Added
.Where(g => g.ArtifactGroupingIdentifier == artifactGroupingIdentifier)
.FirstOrDefaultAsync();
}
public async Task CreateGroupingAsync(ArtifactGrouping grouping)
{
await using var context = await _context.CreateDbContextAsync();
// Attach the Category to the context. If it has a key, it will be tracked.
context.Attach(grouping.Category);
// --- Local caches for de-duplication within this transaction ---
var processedTypes = new Dictionary<string, ArtifactType>();
var processedLocations = new Dictionary<string, ArtifactStorageLocation>();
var processedTags = new Dictionary<string, ArtifactEntryTag>();
var processedNames = new Dictionary<string, ListedName>();
var processedDefects = new Dictionary<string, ArtifactDefect>();
// --- End local caches ---
// --- Helper functions to get unique entities (from cache or DB) ---
async Task<ArtifactType> GetUniqueTypeAsync(ArtifactType typeToProcess)
{
if (string.IsNullOrEmpty(typeToProcess?.Name)) return typeToProcess;
if (processedTypes.TryGetValue(typeToProcess.Name, out var uniqueType))
{
return uniqueType;
}
var dbType = await context.ArtifactTypes.FirstOrDefaultAsync(t => t.Name == typeToProcess.Name);
if (dbType != null)
{
processedTypes[dbType.Name] = dbType;
return dbType;
}
processedTypes[typeToProcess.Name] = typeToProcess;
return typeToProcess;
}
async Task<ArtifactStorageLocation> GetUniqueLocationAsync(ArtifactStorageLocation locationToProcess)
{
if (string.IsNullOrEmpty(locationToProcess?.Location)) return locationToProcess;
if (processedLocations.TryGetValue(locationToProcess.Location, out var uniqueLocation))
{
return uniqueLocation;
}
var dbLocation = await context.ArtifactStorageLocations.FirstOrDefaultAsync(l => l.Location == locationToProcess.Location);
if (dbLocation != null)
{
processedLocations[dbLocation.Location] = dbLocation;
return dbLocation;
}
processedLocations[locationToProcess.Location] = locationToProcess;
return locationToProcess;
}
async Task<ArtifactEntryTag> GetUniqueTagAsync(ArtifactEntryTag tagToProcess)
{
if (string.IsNullOrEmpty(tagToProcess?.Name)) return tagToProcess;
if (processedTags.TryGetValue(tagToProcess.Name, out var uniqueTag))
{
return uniqueTag;
}
var dbTag = await context.ArtifactEntryTags.FirstOrDefaultAsync(t => t.Name == tagToProcess.Name);
if (dbTag != null)
{
processedTags[dbTag.Name] = dbTag;
return dbTag;
}
processedTags[tagToProcess.Name] = tagToProcess;
return tagToProcess;
}
async Task<ListedName> GetUniqueNameAsync(ListedName nameToProcess)
{
if (string.IsNullOrEmpty(nameToProcess?.Value)) return nameToProcess;
if (processedNames.TryGetValue(nameToProcess.Value, out var uniqueName))
{
return uniqueName;
}
var dbName = await context.ArtifactAssociatedNames.FirstOrDefaultAsync(n => n.Value == nameToProcess.Value);
if (dbName != null)
{
processedNames[dbName.Value] = dbName;
return dbName;
}
processedNames[nameToProcess.Value] = nameToProcess;
return nameToProcess;
}
async Task<ArtifactDefect> GetUniqueDefectAsync(ArtifactDefect defectToProcess)
{
if (string.IsNullOrEmpty(defectToProcess?.Description)) return defectToProcess;
if (processedDefects.TryGetValue(defectToProcess.Description, out var uniqueDefect))
{
return uniqueDefect;
}
var dbDefect = await context.ArtifactDefects.FirstOrDefaultAsync(d => d.Description == defectToProcess.Description);
if (dbDefect != null)
{
processedDefects[dbDefect.Description] = dbDefect;
return dbDefect;
}
processedDefects[defectToProcess.Description] = defectToProcess;
return defectToProcess;
}
// --- End helper functions ---
// De-duplicate the main grouping's type
grouping.Type = await GetUniqueTypeAsync(grouping.Type);
// Iterate through all child entries to handle their related entities.
foreach (var entry in grouping.ChildArtifactEntries)
{
// Handle Artifact Types
entry.Type = await GetUniqueTypeAsync(entry.Type);
// Handle Storage Location
entry.StorageLocation = await GetUniqueLocationAsync(entry.StorageLocation);
// Handle Tags
var managedTags = new List<ArtifactEntryTag>();
foreach (var tag in entry.Tags)
{
managedTags.Add(await GetUniqueTagAsync(tag));
}
entry.Tags = managedTags;
// Handle Listed Names
var managedNames = new List<ListedName>();
foreach (var name in entry.ListedNames)
{
managedNames.Add(await GetUniqueNameAsync(name));
}
entry.ListedNames = managedNames;
// Handle Defects
var managedDefects = new List<ArtifactDefect>();
foreach (var defect in entry.Defects)
{
managedDefects.Add(await GetUniqueDefectAsync(defect));
}
entry.Defects = managedDefects;
}
// Concatinate all of the text to be searchable by postgres
grouping.GenerateSearchIndex();
// Add the new grouping and save changes.
context.ChangeTracker.TrackGraph(grouping, node =>
{
// If the entity's key is set, EF should treat it as an existing, unchanged entity.
if (node.Entry.IsKeySet)
{
node.Entry.State = EntityState.Unchanged;
}
// Otherwise, it's a new entity that needs to be inserted.
else
{
node.Entry.State = EntityState.Added;
}
});
await context.SaveChangesAsync();
}
public async Task UpdateGroupingAsync(ArtifactGrouping updatedGrouping)
{
await using var context = await _context.CreateDbContextAsync();
// 1. Retrieve the existing grouping object from the database, eagerly loading all related data.
var existingGrouping = await context.ArtifactGroupings
.Include(g => g.Category)
.Include(g => g.IdentifierFields)
.Include(g => g.Type)
.Include(g => g.ViewCount) // Load ViewCount
//.Include(g => g.BlogPosts) // BlogPosts not handled yet
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.StorageLocation)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Type)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Tags)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.ListedNames)
.Include(g => g.ChildArtifactEntries)
.ThenInclude(e => e.Defects)
.Where(g => g.Id == updatedGrouping.Id)
.FirstOrDefaultAsync();
if (existingGrouping == null)
{
return;
}
// 2. Manually copy over primitive properties.
existingGrouping.Title = updatedGrouping.Title;
existingGrouping.Description = updatedGrouping.Description;
existingGrouping.IsPublicallyVisible = updatedGrouping.IsPublicallyVisible;
existingGrouping.IdentifierFields = updatedGrouping.IdentifierFields;
// Handle one-to-many relationships (Type, Category).
var existingGroupingType = await context.ArtifactTypes.FirstOrDefaultAsync(t => t.Name == updatedGrouping.Type.Name);
if (existingGroupingType != null)
{
existingGrouping.Type = existingGroupingType;
}
else
{
existingGrouping.Type = updatedGrouping.Type;
}
if (existingGrouping.Category.Name != updatedGrouping.Category.Name)
{
existingGrouping.Category = updatedGrouping.Category;
context.Add(existingGrouping.Category);
}
// Handle ViewCount (Added)
if (updatedGrouping.ViewCount != null)
{
if (existingGrouping.ViewCount == null)
{
// Create a new ViewCount
existingGrouping.ViewCount = new ArtifactGroupingViewCount
{
Grouping = existingGrouping,
Views = updatedGrouping.ViewCount.Views
};
}
else
{
// Update existing ViewCount
existingGrouping.ViewCount.Views = updatedGrouping.ViewCount.Views;
}
}
// TODO: Handle BlogPosts update (requires model definition & de-duplication)
// await DeDuplicateGroupingRelationsAsync(context, updatedGrouping);
// existingGrouping.BlogPosts.Clear();
// updatedGrouping.BlogPosts.ForEach(post => existingGrouping.BlogPosts.Add(post));
// 3. Synchronize the ChildArtifactEntries collection.
var updatedEntryIds = updatedGrouping.ChildArtifactEntries.Select(e => e.Id).ToList();
var entriesToRemove = existingGrouping.ChildArtifactEntries
.Where(e => !updatedEntryIds.Contains(e.Id))
.ToList();
foreach (var entryToRemove in entriesToRemove)
{
existingGrouping.ChildArtifactEntries.Remove(entryToRemove);
}
foreach (var updatedEntry in updatedGrouping.ChildArtifactEntries)
{
// FIRST, de-duplicate all related entities on the incoming entry.
await DeDuplicateEntryRelationsAsync(context, updatedEntry);
var existingEntry = existingGrouping.ChildArtifactEntries
.FirstOrDefault(e => e.Id == updatedEntry.Id);
if (existingEntry != null)
{
// The entry exists, so manually update its properties.
existingEntry.Title = updatedEntry.Title;
existingEntry.Description = updatedEntry.Description;
existingEntry.ArtifactNumber = updatedEntry.ArtifactNumber;
existingEntry.IsPubliclyVisible = updatedEntry.IsPubliclyVisible;
existingEntry.AssociatedDates = updatedEntry.AssociatedDates;
existingEntry.FileTextContent = updatedEntry.FileTextContent;
existingEntry.Quantity = updatedEntry.Quantity;
existingEntry.Links = updatedEntry.Links;
// The relations on updatedEntry are already de-duplicated, so just assign them.
existingEntry.StorageLocation = updatedEntry.StorageLocation;
existingEntry.Type = updatedEntry.Type;
// For collections, clear the old ones and add the new de-duplicated ones.
existingEntry.Tags.Clear();
updatedEntry.Tags.ForEach(tag => existingEntry.Tags.Add(tag));
existingEntry.ListedNames.Clear();
updatedEntry.ListedNames.ForEach(name => existingEntry.ListedNames.Add(name));
existingEntry.Defects.Clear();
updatedEntry.Defects.ForEach(defect => existingEntry.Defects.Add(defect));
}
else
{
// The entry is new and its children are already de-duplicated, so just add it.
existingGrouping.ChildArtifactEntries.Add(updatedEntry);
}
}
existingGrouping.GenerateSearchIndex();
// 4. Save all changes.
await context.SaveChangesAsync();
}
private async Task DeDuplicateEntryRelationsAsync(ApplicationDbContext context, ArtifactEntry entry)
{
// --- Handle One-to-Many Relationships ---
var existingLocation = await context.ArtifactStorageLocations.FirstOrDefaultAsync(l => l.Location == entry.StorageLocation.Location);
if (existingLocation != null)
{
entry.StorageLocation = existingLocation;
}
var existingType = await context.ArtifactTypes.FirstOrDefaultAsync(t => t.Name == entry.Type.Name);
if (existingType != null)
{
entry.Type = existingType;
}
// --- Handle Many-to-Many Relationships ---
// De-duplicate Tags
var processedTags = new List<ArtifactEntryTag>();
foreach (var tag in entry.Tags)
{
var existingTag = await context.ArtifactEntryTags.FirstOrDefaultAsync(t => t.Name == tag.Name) ?? tag;
processedTags.Add(existingTag);
}
entry.Tags = processedTags;
// De-duplicate ListedNames
var processedNames = new List<ListedName>();
if (entry.ListedNames != null)
{
foreach (var name in entry.ListedNames)
{
var existingName = await context.ArtifactAssociatedNames.FirstOrDefaultAsync(n => n.Value == name.Value) ?? name;
processedNames.Add(existingName);
}
entry.ListedNames = processedNames;
}
// De-duplicate Defects
var processedDefects = new List<ArtifactDefect>();
if (entry.Defects != null)
{
foreach (var defect in entry.Defects)
{
var existingDefect = await context.ArtifactDefects.FirstOrDefaultAsync(d => d.Description == defect.Description) ?? defect;
processedDefects.Add(existingDefect);
}
entry.Defects = processedDefects;
}
}
/// <summary>
/// A helper method to synchronize many-to-many collections.
/// </summary>
private async Task SyncCollectionAsync<TEntity, TKey>(
DbContext context,
ICollection<TEntity> existingItems,
ICollection<TEntity> updatedItems,
Func<TEntity, TKey> keySelector) where TEntity : class
{
var existingKeys = existingItems.Select(keySelector).ToHashSet();
var updatedKeys = updatedItems.Select(keySelector).ToHashSet();
// 1. Remove items that are no longer in the updated collection
var keysToRemove = existingKeys.Except(updatedKeys);
var itemsToRemove = existingItems.Where(item => keysToRemove.Contains(keySelector(item))).ToList();
foreach (var item in itemsToRemove)
{
existingItems.Remove(item);
}
// 2. Identify keys for brand new items
var keysToAdd = updatedKeys.Except(existingKeys).ToList();
if (!keysToAdd.Any())
{
return; // Nothing to add
}
// 3. Batch-fetch all entities from the DB that match the new keys.
Dictionary<TKey, TEntity> existingDbItemsMap = [];
if (typeof(TEntity) == typeof(ArtifactEntryTag))
{
var tagKeys = keysToAdd.Cast<string>().ToList();
var tags = await context.Set<ArtifactEntryTag>()
.Where(t => tagKeys.Contains(t.Name))
.ToListAsync();
existingDbItemsMap = tags.ToDictionary(t => (TKey)(object)t.Name) as Dictionary<TKey, TEntity>;
}
else if (typeof(TEntity) == typeof(ListedName))
{
var nameKeys = keysToAdd.Cast<string>().ToList();
var names = await context.Set<ListedName>()
.Where(n => nameKeys.Contains(n.Value))
.ToListAsync();
existingDbItemsMap = names.ToDictionary(n => (TKey)(object)n.Value) as Dictionary<TKey, TEntity>;
}
else if (typeof(TEntity) == typeof(ArtifactDefect))
{
var defectKeys = keysToAdd.Cast<string>().ToList();
var defects = await context.Set<ArtifactDefect>()
.Where(d => defectKeys.Contains(d.Description))
.ToListAsync();
existingDbItemsMap = defects.ToDictionary(d => (TKey)(object)d.Description) as Dictionary<TKey, TEntity>;
}
// TODO: Add support for other entity types like BlogPost or ArtifactEntry if needed
// 4. Add the items, using the tracked entity from the DB if it exists.
foreach (var updatedItem in updatedItems.Where(i => keysToAdd.Contains(keySelector(i))))
{
var key = keySelector(updatedItem);
if (existingDbItemsMap.TryGetValue(key, out var dbItem))
{
// The item already exists in the DB, so add the tracked version.
existingItems.Add(dbItem);
}
else
{
// This is a brand new item, so add the untracked one from the input.
existingItems.Add(updatedItem);
}
}
}
public async Task DeleteGroupingAsync(int id)
{
await using var context = await _context.CreateDbContextAsync();
await context.ArtifactGroupings
.Where(p => p.Id == id)
.ExecuteDeleteAsync();
await context.SaveChangesAsync();
}
public async Task DeleteGroupingAsync(ArtifactGrouping grouping)
{
await using var context = await _context.CreateDbContextAsync();
context.ArtifactGroupings.Remove(grouping);
await context.SaveChangesAsync();
}
public async Task<List<ArtifactGrouping>> GetGroupingsPaged(int pageNumber, int resultsCount)
{
await using var context = await _context.CreateDbContextAsync();
if (pageNumber < 1 || resultsCount < 1)
{
throw new ArgumentOutOfRangeException($"Either page number or number of results was less than or equal to 0. {nameof(pageNumber)}={pageNumber} {nameof(resultsCount)}={resultsCount}");
}
var totalCount = await context.ArtifactGroupings.CountAsync();
var items = await context.ArtifactGroupings
.Include(g => g.ChildArtifactEntries)
.Include(g => g.Category)
.OrderBy(g => g.Id)
.Skip((pageNumber - 1) * resultsCount)
.Take(resultsCount)
.ToListAsync();
return items;
}
public async Task<int> GetTotalCount()
{
await using var context = await _context.CreateDbContextAsync();
return context.ArtifactGroupings.Count();
}
}