using Ryujinx.Graphics.GAL;
using Ryujinx.Memory.Range;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Ryujinx.Graphics.Gpu.Memory
{
    /// 
    /// Buffer cache.
    /// 
    class BufferCache : IDisposable
    {
        private const int OverlapsBufferInitialCapacity = 10;
        private const int OverlapsBufferMaxCapacity     = 10000;
        private const ulong BufferAlignmentSize = 0x1000;
        private const ulong BufferAlignmentMask = BufferAlignmentSize - 1;
        private const ulong MaxDynamicGrowthSize = 0x100000;
        private readonly GpuContext _context;
        private readonly PhysicalMemory _physicalMemory;
        /// 
        /// Only modified from the GPU thread. Must lock for add/remove.
        /// Must lock for any access from other threads.
        /// 
        private readonly RangeList _buffers;
        private Buffer[] _bufferOverlaps;
        private readonly Dictionary _dirtyCache;
        private readonly Dictionary _modifiedCache;
        private bool _pruneCaches;
        public event Action NotifyBuffersModified;
        /// 
        /// Creates a new instance of the buffer manager.
        /// 
        /// The GPU context that the buffer manager belongs to
        /// Physical memory where the cached buffers are mapped
        public BufferCache(GpuContext context, PhysicalMemory physicalMemory)
        {
            _context = context;
            _physicalMemory = physicalMemory;
            _buffers = new RangeList();
            _bufferOverlaps = new Buffer[OverlapsBufferInitialCapacity];
            _dirtyCache = new Dictionary();
            // There are a lot more entries on the modified cache, so it is separate from the one for ForceDirty.
            _modifiedCache = new Dictionary();
        }
        /// 
        /// Handles removal of buffers written to a memory region being unmapped.
        /// 
        /// Sender object
        /// Event arguments
        public void MemoryUnmappedHandler(object sender, UnmapEventArgs e)
        {
            Buffer[] overlaps = new Buffer[10];
            int overlapCount;
            ulong address = ((MemoryManager)sender).Translate(e.Address);
            ulong size = e.Size;
            lock (_buffers)
            {
                overlapCount = _buffers.FindOverlaps(address, size, ref overlaps);
            }
            for (int i = 0; i < overlapCount; i++)
            {
                overlaps[i].Unmapped(address, size);
            }
        }
        /// 
        /// Performs address translation of the GPU virtual address, and creates a
        /// new buffer, if needed, for the specified range.
        /// 
        /// GPU memory manager where the buffer is mapped
        /// Start GPU virtual address of the buffer
        /// Size in bytes of the buffer
        /// CPU virtual address of the buffer, after address translation
        public ulong TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size)
        {
            if (gpuVa == 0)
            {
                return 0;
            }
            ulong address = memoryManager.Translate(gpuVa);
            if (address == MemoryManager.PteUnmapped)
            {
                return 0;
            }
            CreateBuffer(address, size);
            return address;
        }
        /// 
        /// Creates a new buffer for the specified range, if it does not yet exist.
        /// This can be used to ensure the existance of a buffer.
        /// 
        /// Address of the buffer in memory
        /// Size of the buffer in bytes
        public void CreateBuffer(ulong address, ulong size)
        {
            ulong endAddress = address + size;
            ulong alignedAddress = address & ~BufferAlignmentMask;
            ulong alignedEndAddress = (endAddress + BufferAlignmentMask) & ~BufferAlignmentMask;
            // The buffer must have the size of at least one page.
            if (alignedEndAddress == alignedAddress)
            {
                alignedEndAddress += BufferAlignmentSize;
            }
            CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress);
        }
        /// 
        /// Performs address translation of the GPU virtual address, and attempts to force
        /// the buffer in the region as dirty.
        /// The buffer lookup for this function is cached in a dictionary for quick access, which
        /// accelerates common UBO updates.
        /// 
        /// GPU memory manager where the buffer is mapped
        /// Start GPU virtual address of the buffer
        /// Size in bytes of the buffer
        public void ForceDirty(MemoryManager memoryManager, ulong gpuVa, ulong size)
        {
            if (_pruneCaches)
            {
                Prune();
            }
            if (!_dirtyCache.TryGetValue(gpuVa, out BufferCacheEntry result) ||
                result.EndGpuAddress < gpuVa + size ||
                result.UnmappedSequence != result.Buffer.UnmappedSequence)
            {
                ulong address = TranslateAndCreateBuffer(memoryManager, gpuVa, size);
                result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size));
                _dirtyCache[gpuVa] = result;
            }
            result.Buffer.ForceDirty(result.Address, size);
        }
        /// 
        /// Checks if the given buffer range has been GPU modifed.
        /// 
        /// GPU memory manager where the buffer is mapped
        /// Start GPU virtual address of the buffer
        /// Size in bytes of the buffer
        /// True if modified, false otherwise
        public bool CheckModified(MemoryManager memoryManager, ulong gpuVa, ulong size, out ulong outAddr)
        {
            if (_pruneCaches)
            {
                Prune();
            }
            // Align the address to avoid creating too many entries on the quick lookup dictionary.
            ulong mask = BufferAlignmentMask;
            ulong alignedGpuVa = gpuVa & (~mask);
            ulong alignedEndGpuVa = (gpuVa + size + mask) & (~mask);
            size = alignedEndGpuVa - alignedGpuVa;
            if (!_modifiedCache.TryGetValue(alignedGpuVa, out BufferCacheEntry result) ||
                result.EndGpuAddress < alignedEndGpuVa ||
                result.UnmappedSequence != result.Buffer.UnmappedSequence)
            {
                ulong address = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size);
                result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size));
                _modifiedCache[alignedGpuVa] = result;
            }
            outAddr = result.Address | (gpuVa & mask);
            return result.Buffer.IsModified(result.Address, size);
        }
        /// 
        /// Creates a new buffer for the specified range, if needed.
        /// If a buffer where this range can be fully contained already exists,
        /// then the creation of a new buffer is not necessary.
        /// 
        /// Address of the buffer in guest memory
        /// Size in bytes of the buffer
        private void CreateBufferAligned(ulong address, ulong size)
        {
            int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref _bufferOverlaps);
            if (overlapsCount != 0)
            {
                // The buffer already exists. We can just return the existing buffer
                // if the buffer we need is fully contained inside the overlapping buffer.
                // Otherwise, we must delete the overlapping buffers and create a bigger buffer
                // that fits all the data we need. We also need to copy the contents from the
                // old buffer(s) to the new buffer.
                ulong endAddress = address + size;
                if (_bufferOverlaps[0].Address > address || _bufferOverlaps[0].EndAddress < endAddress)
                {
                    // Check if the following conditions are met:
                    // - We have a single overlap.
                    // - The overlap starts at or before the requested range. That is, the overlap happens at the end.
                    // - The size delta between the new, merged buffer and the old one is of at most 2 pages.
                    // In this case, we attempt to extend the buffer further than the requested range,
                    // this can potentially avoid future resizes if the application keeps using overlapping
                    // sequential memory.
                    // Allowing for 2 pages (rather than just one) is necessary to catch cases where the
                    // range crosses a page, and after alignment, ends having a size of 2 pages.
                    if (overlapsCount == 1 &&
                        address >= _bufferOverlaps[0].Address &&
                        endAddress - _bufferOverlaps[0].EndAddress <= BufferAlignmentSize * 2)
                    {
                        // Try to grow the buffer by 1.5x of its current size.
                        // This improves performance in the cases where the buffer is resized often by small amounts.
                        ulong existingSize = _bufferOverlaps[0].Size;
                        ulong growthSize = (existingSize + Math.Min(existingSize >> 1, MaxDynamicGrowthSize)) & ~BufferAlignmentMask;
                        size = Math.Max(size, growthSize);
                        endAddress = address + size;
                        overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref _bufferOverlaps);
                    }
                    for (int index = 0; index < overlapsCount; index++)
                    {
                        Buffer buffer = _bufferOverlaps[index];
                        address    = Math.Min(address,    buffer.Address);
                        endAddress = Math.Max(endAddress, buffer.EndAddress);
                        lock (_buffers)
                        {
                            _buffers.Remove(buffer);
                        }
                    }
                    ulong newSize = endAddress - address;
                    Buffer newBuffer = new Buffer(_context, _physicalMemory, address, newSize, _bufferOverlaps.Take(overlapsCount));
                    lock (_buffers)
                    {
                        _buffers.Add(newBuffer);
                    }
                    for (int index = 0; index < overlapsCount; index++)
                    {
                        Buffer buffer = _bufferOverlaps[index];
                        int dstOffset = (int)(buffer.Address - newBuffer.Address);
                        buffer.CopyTo(newBuffer, dstOffset);
                        newBuffer.InheritModifiedRanges(buffer);
                        buffer.DecrementReferenceCount();
                    }
                    newBuffer.SynchronizeMemory(address, newSize);
                    // Existing buffers were modified, we need to rebind everything.
                    NotifyBuffersModified?.Invoke();
                }
            }
            else
            {
                // No overlap, just create a new buffer.
                Buffer buffer = new Buffer(_context, _physicalMemory, address, size);
                lock (_buffers)
                {
                    _buffers.Add(buffer);
                }
            }
            ShrinkOverlapsBufferIfNeeded();
        }
        /// 
        /// Resizes the temporary buffer used for range list intersection results, if it has grown too much.
        /// 
        private void ShrinkOverlapsBufferIfNeeded()
        {
            if (_bufferOverlaps.Length > OverlapsBufferMaxCapacity)
            {
                Array.Resize(ref _bufferOverlaps, OverlapsBufferMaxCapacity);
            }
        }
        /// 
        /// Copy a buffer data from a given address to another.
        /// 
        /// 
        /// This does a GPU side copy.
        /// 
        /// GPU memory manager where the buffer is mapped
        /// GPU virtual address of the copy source
        /// GPU virtual address of the copy destination
        /// Size in bytes of the copy
        public void CopyBuffer(MemoryManager memoryManager, ulong srcVa, ulong dstVa, ulong size)
        {
            ulong srcAddress = TranslateAndCreateBuffer(memoryManager, srcVa, size);
            ulong dstAddress = TranslateAndCreateBuffer(memoryManager, dstVa, size);
            Buffer srcBuffer = GetBuffer(srcAddress, size);
            Buffer dstBuffer = GetBuffer(dstAddress, size);
            int srcOffset = (int)(srcAddress - srcBuffer.Address);
            int dstOffset = (int)(dstAddress - dstBuffer.Address);
            _context.Renderer.Pipeline.CopyBuffer(
                srcBuffer.Handle,
                dstBuffer.Handle,
                srcOffset,
                dstOffset,
                (int)size);
            if (srcBuffer.IsModified(srcAddress, size))
            {
                dstBuffer.SignalModified(dstAddress, size);
            }
            else
            {
                // Optimization: If the data being copied is already in memory, then copy it directly instead of flushing from GPU.
                dstBuffer.ClearModified(dstAddress, size);
                memoryManager.Physical.WriteUntracked(dstAddress, memoryManager.Physical.GetSpan(srcAddress, (int)size));
            }
        }
        /// 
        /// Clears a buffer at a given address with the specified value.
        /// 
        /// 
        /// Both the address and size must be aligned to 4 bytes.
        /// 
        /// GPU memory manager where the buffer is mapped
        /// GPU virtual address of the region to clear
        /// Number of bytes to clear
        /// Value to be written into the buffer
        public void ClearBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, uint value)
        {
            ulong address = TranslateAndCreateBuffer(memoryManager, gpuVa, size);
            Buffer buffer = GetBuffer(address, size);
            int offset = (int)(address - buffer.Address);
            _context.Renderer.Pipeline.ClearBuffer(buffer.Handle, offset, (int)size, value);
            buffer.SignalModified(address, size);
        }
        /// 
        /// Gets a buffer sub-range starting at a given memory address.
        /// 
        /// Start address of the memory range
        /// Size in bytes of the memory range
        /// Whether the buffer will be written to by this use
        /// The buffer sub-range starting at the given memory address
        public BufferRange GetBufferRangeTillEnd(ulong address, ulong size, bool write = false)
        {
            return GetBuffer(address, size, write).GetRange(address);
        }
        /// 
        /// Gets a buffer sub-range for a given memory range.
        /// 
        /// Start address of the memory range
        /// Size in bytes of the memory range
        /// Whether the buffer will be written to by this use
        /// The buffer sub-range for the given range
        public BufferRange GetBufferRange(ulong address, ulong size, bool write = false)
        {
            return GetBuffer(address, size, write).GetRange(address, size);
        }
        /// 
        /// Gets a buffer for a given memory range.
        /// A buffer overlapping with the specified range is assumed to already exist on the cache.
        /// 
        /// Start address of the memory range
        /// Size in bytes of the memory range
        /// Whether the buffer will be written to by this use
        /// The buffer where the range is fully contained
        private Buffer GetBuffer(ulong address, ulong size, bool write = false)
        {
            Buffer buffer;
            if (size != 0)
            {
                buffer = _buffers.FindFirstOverlap(address, size);
                buffer.SynchronizeMemory(address, size);
                if (write)
                {
                    buffer.SignalModified(address, size);
                }
            }
            else
            {
                buffer = _buffers.FindFirstOverlap(address, 1);
            }
            return buffer;
        }
        /// 
        /// Performs guest to host memory synchronization of a given memory range.
        /// 
        /// Start address of the memory range
        /// Size in bytes of the memory range
        public void SynchronizeBufferRange(ulong address, ulong size)
        {
            if (size != 0)
            {
                Buffer buffer = _buffers.FindFirstOverlap(address, size);
                buffer.SynchronizeMemory(address, size);
            }
        }
        /// 
        /// Prune any invalid entries from a quick access dictionary.
        /// 
        /// Dictionary to prune
        /// List used to track entries to delete
        private void Prune(Dictionary dictionary, ref List toDelete)
        {
            foreach (var entry in dictionary)
            {
                if (entry.Value.UnmappedSequence != entry.Value.Buffer.UnmappedSequence)
                {
                    (toDelete ??= new()).Add(entry.Key);
                }
            }
            if (toDelete != null)
            {
                foreach (ulong entry in toDelete)
                {
                    dictionary.Remove(entry);
                }
            }
        }
        /// 
        /// Prune any invalid entries from the quick access dictionaries.
        /// 
        private void Prune()
        {
            List toDelete = null;
            Prune(_dirtyCache, ref toDelete);
            toDelete?.Clear();
            Prune(_modifiedCache, ref toDelete);
            _pruneCaches = false;
        }
        /// 
        /// Queues a prune of invalid entries the next time a dictionary cache is accessed.
        /// 
        public void QueuePrune()
        {
            _pruneCaches = true;
        }
        /// 
        /// Disposes all buffers in the cache.
        /// It's an error to use the buffer manager after disposal.
        /// 
        public void Dispose()
        {
            lock (_buffers)
            {
                foreach (Buffer buffer in _buffers)
                {
                    buffer.Dispose();
                }
            }
        }
    }
}