diff --git a/patches/server/0025-Use-coordinate-based-locking-to-increase-chunk-syste.patch b/patches/server/0025-Use-coordinate-based-locking-to-increase-chunk-syste.patch new file mode 100644 index 0000000..536aa44 --- /dev/null +++ b/patches/server/0025-Use-coordinate-based-locking-to-increase-chunk-syste.patch @@ -0,0 +1,4975 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Spottedleaf +Date: Fri, 12 May 2023 20:37:56 -0700 +Subject: [PATCH] Use coordinate-based locking to increase chunk system + parallelism + +A significant overhead in Folia comes from the chunk system's +locks, the ticket lock and the scheduling lock. The public +test server, which had ~330 players, had signficant performance +problems with these locks: ~80% of the time spent ticking +was _waiting_ for the locks to free. Given that it used +around 15 cores total at peak, this is a complete and utter loss +of potential. + +To address this issue, I have replaced the ticket lock and scheduling +lock with two ReentrantAreaLocks. The ReentrantAreaLock takes a +shift, which is used internally to group positions into sections. +This grouping is neccessary, as the possible radius of area that +needs to be acquired for any given lock usage is up to 64. As such, +the shift is critical to reduce the number of areas required to lock +for any lock operation. Currently, it is set to a shift of 6, which +is identical to the ticket level propagation shift (and, it must be +at least the ticket level propagation shift AND the region shift). + +The chunk system locking changes required a complete rewrite of the +chunk system tick, chunk system unload, and chunk system ticket level +propagation - as all of the previous logic only works with a single +global lock. + +This does introduce two other section shifts: the lock shift, and the +ticket shift. The lock shift is simply what shift the area locks use, +and the ticket shift represents the size of the ticket sections. +Currently, these values are just set to the region shift for simplicity. +However, they are not arbitrary: the lock shift must be at least the size +of the ticket shift and must be at least the size of the region shift. +The ticket shift must also be >= the ceil(log2(max ticket level source)). + +The chunk system's ticket propagator is now global state, instead of +region state. This cleans up the logic for ticket levels significantly, +and removes usage of the region lock in this area, but it also means +that the addition of a ticket no longer creates a region. To alleviate +the side effects of this change, the global tick thread now processes +ticket level updates for each world every tick to guarantee eventual +ticket level processing. The chunk system also provides a hook to +process ticket level changes in a given _section_, so that the +region queue can guarantee that after adding its reference counter +that the region section is created/exists/wont be destroyed. + +The ticket propagator operates by updating the sources in a single ticket +section, and propagating the updates to its 1 radius neighbours. This +allows the ticket updates to occur in parallel or selectively (see above). +Currently, the process ticket level update function operates by +polling from a concurrent queue of sections to update and simply +invoking the single section update logic. This allows the function +to operate completely in parallel, provided the queue is ordered right. +Additionally, this limits the area used in the ticket/scheduling lock +when processing updates, which should massively increase parallelism compared +to before. + +The chunk system ticket addition for expirable ticket types has been modified +to no longer track exact tick deadlines, as this relies on what region the +ticket is in. Instead, the chunk system tracks a map of +lock section -> (chunk coordinate -> expire ticket count) and every ticket +has been changed to have a removeDelay count that is decremented each tick. +Each region searches its own sections to find tickets to try to expire. + +Chunk system unloading has been modified to track unloads by lock section. +The ordering is determined by which section a chunk resides in. +The unload process now removes from unload sections and processes +the full unload stages (1, 2, 3) before moving to the next section, if possible. +This allows the unload logic to only hold one lock section at a time for +each lock, which is a massive parallelism increase. + +In stress testing, these changes lowered the locking overhead to only 5% +from ~70%, which completely fix the original problem as described. + +diff --git a/src/main/java/ca/spottedleaf/concurrentutil/lock/AreaLock.java b/src/main/java/ca/spottedleaf/concurrentutil/lock/AreaLock.java +deleted file mode 100644 +index 6a155b779914828a0d4199bdfcb0d6fca25e1581..0000000000000000000000000000000000000000 +--- a/src/main/java/ca/spottedleaf/concurrentutil/lock/AreaLock.java ++++ /dev/null +@@ -1,146 +0,0 @@ +-package ca.spottedleaf.concurrentutil.lock; +- +-import it.unimi.dsi.fastutil.longs.Long2ReferenceOpenHashMap; +-import it.unimi.dsi.fastutil.objects.ReferenceOpenHashSet; +-import java.util.ArrayList; +-import java.util.List; +-import java.util.concurrent.locks.LockSupport; +- +-public final class AreaLock { +- +- private final int coordinateShift; +- +- private final Long2ReferenceOpenHashMap nodesByPosition = new Long2ReferenceOpenHashMap<>(1024, 0.10f); +- +- public AreaLock(final int coordinateShift) { +- this.coordinateShift = coordinateShift; +- } +- +- private static long key(final int x, final int z) { +- return ((long)z << 32) | (x & 0xFFFFFFFFL); +- } +- +- public Node lock(final int x, final int z, final int radius) { +- final Thread thread = Thread.currentThread(); +- final int minX = (x - radius) >> this.coordinateShift; +- final int minZ = (z - radius) >> this.coordinateShift; +- final int maxX = (x + radius) >> this.coordinateShift; +- final int maxZ = (z + radius) >> this.coordinateShift; +- +- final Node node = new Node(x, z, radius, thread); +- +- synchronized (this) { +- ReferenceOpenHashSet parents = null; +- for (int currZ = minZ; currZ <= maxZ; ++currZ) { +- for (int currX = minX; currX <= maxX; ++currX) { +- final Node dependency = this.nodesByPosition.put(key(currX, currZ), node); +- if (dependency == null) { +- continue; +- } +- +- if (parents == null) { +- parents = new ReferenceOpenHashSet<>(); +- } +- +- if (parents.add(dependency)) { +- // added a dependency, so we need to add as a child to the dependency +- if (dependency.children == null) { +- dependency.children = new ArrayList<>(); +- } +- dependency.children.add(node); +- } +- } +- } +- +- if (parents == null) { +- // no dependencies, so we can just return immediately +- return node; +- } // else: we need to lock +- +- node.parents = parents; +- } +- +- while (!node.unlocked) { +- LockSupport.park(node); +- } +- +- return node; +- } +- +- public void unlock(final Node node) { +- List toUnpark = null; +- +- final int x = node.x; +- final int z = node.z; +- final int radius = node.radius; +- +- final int minX = (x - radius) >> this.coordinateShift; +- final int minZ = (z - radius) >> this.coordinateShift; +- final int maxX = (x + radius) >> this.coordinateShift; +- final int maxZ = (z + radius) >> this.coordinateShift; +- +- synchronized (this) { +- final List children = node.children; +- if (children != null) { +- // try to unlock children +- for (int i = 0, len = children.size(); i < len; ++i) { +- final Node child = children.get(i); +- if (!child.parents.remove(node)) { +- throw new IllegalStateException(); +- } +- if (child.parents.isEmpty()) { +- // we can unlock, as it now has no dependencies in front +- child.parents = null; +- if (toUnpark == null) { +- toUnpark = new ArrayList<>(); +- toUnpark.add(child); +- } else { +- toUnpark.add(child); +- } +- } +- } +- } +- +- // remove node from dependency map +- for (int currZ = minZ; currZ <= maxZ; ++currZ) { +- for (int currX = minX; currX <= maxX; ++currX) { +- // node: we only remove if we match, as a mismatch indicates a child node which of course has not +- // yet been unlocked +- this.nodesByPosition.remove(key(currX, currZ), node); +- } +- } +- } +- +- if (toUnpark == null) { +- return; +- } +- +- // we move the unpark / unlock logic here because we want to avoid performing work while holding the lock +- +- for (int i = 0, len = toUnpark.size(); i < len; ++i) { +- final Node toUnlock = toUnpark.get(i); +- toUnlock.unlocked = true; // must be volatile and before unpark() +- LockSupport.unpark(toUnlock.thread); +- } +- } +- +- public static final class Node { +- +- public final int x; +- public final int z; +- public final int radius; +- public final Thread thread; +- +- private List children; +- private ReferenceOpenHashSet parents; +- +- private volatile boolean unlocked; +- +- public Node(final int x, final int z, final int radius, final Thread thread) { +- this.x = x; +- this.z = z; +- this.radius = radius; +- this.thread = thread; +- } +- } +-} +diff --git a/src/main/java/ca/spottedleaf/concurrentutil/lock/ReentrantAreaLock.java b/src/main/java/ca/spottedleaf/concurrentutil/lock/ReentrantAreaLock.java +new file mode 100644 +index 0000000000000000000000000000000000000000..4fd9a0cd8f1e6ae1a97e963dc7731a80bc6fac5b +--- /dev/null ++++ b/src/main/java/ca/spottedleaf/concurrentutil/lock/ReentrantAreaLock.java +@@ -0,0 +1,395 @@ ++package ca.spottedleaf.concurrentutil.lock; ++ ++import ca.spottedleaf.concurrentutil.collection.MultiThreadedQueue; ++import it.unimi.dsi.fastutil.HashCommon; ++import java.util.ArrayList; ++import java.util.List; ++import java.util.concurrent.ConcurrentHashMap; ++import java.util.concurrent.locks.LockSupport; ++ ++public final class ReentrantAreaLock { ++ ++ public final int coordinateShift; ++ ++ // aggressive load factor to reduce contention ++ private final ConcurrentHashMap nodes = new ConcurrentHashMap<>(128, 0.2f); ++ ++ public ReentrantAreaLock(final int coordinateShift) { ++ this.coordinateShift = coordinateShift; ++ } ++ ++ public boolean isHeldByCurrentThread(final int x, final int z) { ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int sectionX = x >> shift; ++ final int sectionZ = z >> shift; ++ ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ final Node node = this.nodes.get(coordinate); ++ ++ return node != null && node.thread == currThread; ++ } ++ ++ public boolean isHeldByCurrentThread(final int centerX, final int centerZ, final int radius) { ++ return this.isHeldByCurrentThread(centerX - radius, centerZ - radius, centerX + radius, centerZ + radius); ++ } ++ ++ public boolean isHeldByCurrentThread(final int fromX, final int fromZ, final int toX, final int toZ) { ++ if (fromX > toX || fromZ > toZ) { ++ throw new IllegalArgumentException(); ++ } ++ ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int fromSectionX = fromX >> shift; ++ final int fromSectionZ = fromZ >> shift; ++ final int toSectionX = toX >> shift; ++ final int toSectionZ = toZ >> shift; ++ ++ for (int currZ = fromSectionZ; currZ <= toSectionZ; ++currZ) { ++ for (int currX = fromSectionX; currX <= toSectionX; ++currX) { ++ final Coordinate coordinate = new Coordinate(Coordinate.key(currX, currZ)); ++ ++ final Node node = this.nodes.get(coordinate); ++ ++ if (node == null || node.thread != currThread) { ++ return false; ++ } ++ } ++ } ++ ++ return true; ++ } ++ ++ public Node tryLock(final int x, final int z) { ++ return this.tryLock(x, z, x, z); ++ } ++ ++ public Node tryLock(final int centerX, final int centerZ, final int radius) { ++ return this.tryLock(centerX - radius, centerZ - radius, centerX + radius, centerZ + radius); ++ } ++ ++ public Node tryLock(final int fromX, final int fromZ, final int toX, final int toZ) { ++ if (fromX > toX || fromZ > toZ) { ++ throw new IllegalArgumentException(); ++ } ++ ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int fromSectionX = fromX >> shift; ++ final int fromSectionZ = fromZ >> shift; ++ final int toSectionX = toX >> shift; ++ final int toSectionZ = toZ >> shift; ++ ++ final List areaAffected = new ArrayList<>(); ++ ++ final Node ret = new Node(this, areaAffected, currThread); ++ ++ boolean failed = false; ++ ++ // try to fast acquire area ++ for (int currZ = fromSectionZ; currZ <= toSectionZ; ++currZ) { ++ for (int currX = fromSectionX; currX <= toSectionX; ++currX) { ++ final Coordinate coordinate = new Coordinate(Coordinate.key(currX, currZ)); ++ ++ final Node prev = this.nodes.putIfAbsent(coordinate, ret); ++ ++ if (prev == null) { ++ areaAffected.add(coordinate); ++ continue; ++ } ++ ++ if (prev.thread != currThread) { ++ failed = true; ++ break; ++ } ++ } ++ } ++ ++ if (!failed) { ++ return ret; ++ } ++ ++ // failed, undo logic ++ if (!areaAffected.isEmpty()) { ++ for (int i = 0, len = areaAffected.size(); i < len; ++i) { ++ final Coordinate key = areaAffected.get(i); ++ ++ if (this.nodes.remove(key) != ret) { ++ throw new IllegalStateException(); ++ } ++ } ++ ++ areaAffected.clear(); ++ ++ // since we inserted, we need to drain waiters ++ Thread unpark; ++ while ((unpark = ret.pollOrBlockAdds()) != null) { ++ LockSupport.unpark(unpark); ++ } ++ } ++ ++ return null; ++ } ++ ++ public Node lock(final int x, final int z) { ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int sectionX = x >> shift; ++ final int sectionZ = z >> shift; ++ ++ final List areaAffected = new ArrayList<>(1); ++ ++ final Node ret = new Node(this, areaAffected, currThread); ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ ++ for (long failures = 0L;;) { ++ final Node park; ++ ++ // try to fast acquire area ++ { ++ final Node prev = this.nodes.putIfAbsent(coordinate, ret); ++ ++ if (prev == null) { ++ areaAffected.add(coordinate); ++ return ret; ++ } else if (prev.thread != currThread) { ++ park = prev; ++ } else { ++ // only one node we would want to acquire, and it's owned by this thread already ++ return ret; ++ } ++ } ++ ++ ++failures; ++ ++ if (failures > 128L && park.add(currThread)) { ++ LockSupport.park(); ++ } else { ++ // high contention, spin wait ++ if (failures < 128L) { ++ for (long i = 0; i < failures; ++i) { ++ Thread.onSpinWait(); ++ } ++ failures = failures << 1; ++ } else if (failures < 1_200L) { ++ LockSupport.parkNanos(1_000L); ++ failures = failures + 1L; ++ } else { // scale 0.1ms (100us) per failure ++ Thread.yield(); ++ LockSupport.parkNanos(100_000L * failures); ++ failures = failures + 1L; ++ } ++ } ++ } ++ } ++ ++ public Node lock(final int centerX, final int centerZ, final int radius) { ++ return this.lock(centerX - radius, centerZ - radius, centerX + radius, centerZ + radius); ++ } ++ ++ public Node lock(final int fromX, final int fromZ, final int toX, final int toZ) { ++ if (fromX > toX || fromZ > toZ) { ++ throw new IllegalArgumentException(); ++ } ++ ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int fromSectionX = fromX >> shift; ++ final int fromSectionZ = fromZ >> shift; ++ final int toSectionX = toX >> shift; ++ final int toSectionZ = toZ >> shift; ++ ++ if (((fromSectionX ^ toSectionX) | (fromSectionZ ^ toSectionZ)) == 0) { ++ return this.lock(fromX, fromZ); ++ } ++ ++ final List areaAffected = new ArrayList<>(); ++ ++ final Node ret = new Node(this, areaAffected, currThread); ++ ++ for (long failures = 0L;;) { ++ Node park = null; ++ boolean addedToArea = false; ++ boolean alreadyOwned = false; ++ boolean allOwned = true; ++ ++ // try to fast acquire area ++ for (int currZ = fromSectionZ; currZ <= toSectionZ; ++currZ) { ++ for (int currX = fromSectionX; currX <= toSectionX; ++currX) { ++ final Coordinate coordinate = new Coordinate(Coordinate.key(currX, currZ)); ++ ++ final Node prev = this.nodes.putIfAbsent(coordinate, ret); ++ ++ if (prev == null) { ++ addedToArea = true; ++ allOwned = false; ++ areaAffected.add(coordinate); ++ continue; ++ } ++ ++ if (prev.thread != currThread) { ++ park = prev; ++ alreadyOwned = true; ++ break; ++ } ++ } ++ } ++ ++ if (park == null) { ++ if (alreadyOwned && !allOwned) { ++ throw new IllegalStateException("Improper lock usage: Should never acquire intersecting areas"); ++ } ++ return ret; ++ } ++ ++ // failed, undo logic ++ if (addedToArea) { ++ for (int i = 0, len = areaAffected.size(); i < len; ++i) { ++ final Coordinate key = areaAffected.get(i); ++ ++ if (this.nodes.remove(key) != ret) { ++ throw new IllegalStateException(); ++ } ++ } ++ ++ areaAffected.clear(); ++ ++ // since we inserted, we need to drain waiters ++ Thread unpark; ++ while ((unpark = ret.pollOrBlockAdds()) != null) { ++ LockSupport.unpark(unpark); ++ } ++ } ++ ++ ++failures; ++ ++ if (failures > 128L && park.add(currThread)) { ++ LockSupport.park(park); ++ } else { ++ // high contention, spin wait ++ if (failures < 128L) { ++ for (long i = 0; i < failures; ++i) { ++ Thread.onSpinWait(); ++ } ++ failures = failures << 1; ++ } else if (failures < 1_200L) { ++ LockSupport.parkNanos(1_000L); ++ failures = failures + 1L; ++ } else { // scale 0.1ms (100us) per failure ++ Thread.yield(); ++ LockSupport.parkNanos(100_000L * failures); ++ failures = failures + 1L; ++ } ++ } ++ ++ if (addedToArea) { ++ // try again, so we need to allow adds so that other threads can properly block on us ++ ret.allowAdds(); ++ } ++ } ++ } ++ ++ public void unlock(final Node node) { ++ if (node.lock != this) { ++ throw new IllegalStateException("Unlock target lock mismatch"); ++ } ++ ++ final List areaAffected = node.areaAffected; ++ ++ if (areaAffected.isEmpty()) { ++ // here we are not in the node map, and so do not need to remove from the node map or unblock any waiters ++ return; ++ } ++ ++ // remove from node map; allowing other threads to lock ++ for (int i = 0, len = areaAffected.size(); i < len; ++i) { ++ final Coordinate coordinate = areaAffected.get(i); ++ if (this.nodes.remove(coordinate) != node) { ++ throw new IllegalStateException(); ++ } ++ } ++ ++ Thread unpark; ++ while ((unpark = node.pollOrBlockAdds()) != null) { ++ LockSupport.unpark(unpark); ++ } ++ } ++ ++ public static final class Node extends MultiThreadedQueue { ++ ++ private final ReentrantAreaLock lock; ++ private final List areaAffected; ++ private final Thread thread; ++ //private final Throwable WHO_CREATED_MY_ASS = new Throwable(); ++ ++ private Node(final ReentrantAreaLock lock, final List areaAffected, final Thread thread) { ++ this.lock = lock; ++ this.areaAffected = areaAffected; ++ this.thread = thread; ++ } ++ ++ @Override ++ public String toString() { ++ return "Node{" + ++ "areaAffected=" + this.areaAffected + ++ ", thread=" + this.thread + ++ '}'; ++ } ++ } ++ ++ private static final class Coordinate implements Comparable { ++ ++ public final long key; ++ ++ public Coordinate(final long key) { ++ this.key = key; ++ } ++ ++ public Coordinate(final int x, final int z) { ++ this.key = key(x, z); ++ } ++ ++ public static long key(final int x, final int z) { ++ return ((long)z << 32) | (x & 0xFFFFFFFFL); ++ } ++ ++ public static int x(final long key) { ++ return (int)key; ++ } ++ ++ public static int z(final long key) { ++ return (int)(key >>> 32); ++ } ++ ++ @Override ++ public int hashCode() { ++ return (int)HashCommon.mix(this.key); ++ } ++ ++ @Override ++ public boolean equals(final Object obj) { ++ if (this == obj) { ++ return true; ++ } ++ ++ if (!(obj instanceof Coordinate other)) { ++ return false; ++ } ++ ++ return this.key == other.key; ++ } ++ ++ // This class is intended for HashMap/ConcurrentHashMap usage, which do treeify bin nodes if the chain ++ // is too large. So we should implement compareTo to help. ++ @Override ++ public int compareTo(final Coordinate other) { ++ return Long.compare(this.key, other.key); ++ } ++ ++ @Override ++ public String toString() { ++ return "[" + x(this.key) + "," + z(this.key) + "]"; ++ } ++ } ++} +diff --git a/src/main/java/ca/spottedleaf/concurrentutil/lock/SyncReentrantAreaLock.java b/src/main/java/ca/spottedleaf/concurrentutil/lock/SyncReentrantAreaLock.java +new file mode 100644 +index 0000000000000000000000000000000000000000..64b5803d002b2968841a5ddee987f98b72964e87 +--- /dev/null ++++ b/src/main/java/ca/spottedleaf/concurrentutil/lock/SyncReentrantAreaLock.java +@@ -0,0 +1,217 @@ ++package ca.spottedleaf.concurrentutil.lock; ++ ++import ca.spottedleaf.concurrentutil.collection.MultiThreadedQueue; ++import it.unimi.dsi.fastutil.longs.Long2ReferenceOpenHashMap; ++import it.unimi.dsi.fastutil.longs.LongArrayList; ++import java.util.concurrent.locks.LockSupport; ++ ++// not concurrent, unlike ReentrantAreaLock ++// no incorrect lock usage detection (acquiring intersecting areas) ++// this class is nothing more than a performance reference for ReentrantAreaLock ++public final class SyncReentrantAreaLock { ++ ++ private final int coordinateShift; ++ ++ // aggressive load factor to reduce contention ++ private final Long2ReferenceOpenHashMap nodes = new Long2ReferenceOpenHashMap<>(128, 0.2f); ++ ++ public SyncReentrantAreaLock(final int coordinateShift) { ++ this.coordinateShift = coordinateShift; ++ } ++ ++ private static long key(final int x, final int z) { ++ return ((long)z << 32) | (x & 0xFFFFFFFFL); ++ } ++ ++ public Node lock(final int x, final int z) { ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int sectionX = x >> shift; ++ final int sectionZ = z >> shift; ++ ++ final LongArrayList areaAffected = new LongArrayList(); ++ ++ final Node ret = new Node(this, areaAffected, currThread); ++ ++ final long coordinate = key(sectionX, sectionZ); ++ ++ for (long failures = 0L;;) { ++ final Node park; ++ ++ synchronized (this) { ++ // try to fast acquire area ++ final Node prev = this.nodes.putIfAbsent(coordinate, ret); ++ ++ if (prev == null) { ++ areaAffected.add(coordinate); ++ return ret; ++ } else if (prev.thread != currThread) { ++ park = prev; ++ } else { ++ // only one node we would want to acquire, and it's owned by this thread already ++ return ret; ++ } ++ } ++ ++ ++failures; ++ ++ if (failures > 128L && park.add(currThread)) { ++ LockSupport.park(); ++ } else { ++ // high contention, spin wait ++ if (failures < 128L) { ++ for (long i = 0; i < failures; ++i) { ++ Thread.onSpinWait(); ++ } ++ failures = failures << 1; ++ } else if (failures < 1_200L) { ++ LockSupport.parkNanos(1_000L); ++ failures = failures + 1L; ++ } else { // scale 0.1ms (100us) per failure ++ Thread.yield(); ++ LockSupport.parkNanos(100_000L * failures); ++ failures = failures + 1L; ++ } ++ } ++ } ++ } ++ ++ public Node lock(final int centerX, final int centerZ, final int radius) { ++ return this.lock(centerX - radius, centerZ - radius, centerX + radius, centerZ + radius); ++ } ++ ++ public Node lock(final int fromX, final int fromZ, final int toX, final int toZ) { ++ if (fromX > toX || fromZ > toZ) { ++ throw new IllegalArgumentException(); ++ } ++ ++ final Thread currThread = Thread.currentThread(); ++ final int shift = this.coordinateShift; ++ final int fromSectionX = fromX >> shift; ++ final int fromSectionZ = fromZ >> shift; ++ final int toSectionX = toX >> shift; ++ final int toSectionZ = toZ >> shift; ++ ++ final LongArrayList areaAffected = new LongArrayList(); ++ ++ final Node ret = new Node(this, areaAffected, currThread); ++ ++ for (long failures = 0L;;) { ++ Node park = null; ++ boolean addedToArea = false; ++ ++ synchronized (this) { ++ // try to fast acquire area ++ for (int currZ = fromSectionZ; currZ <= toSectionZ; ++currZ) { ++ for (int currX = fromSectionX; currX <= toSectionX; ++currX) { ++ final long coordinate = key(currX, currZ); ++ ++ final Node prev = this.nodes.putIfAbsent(coordinate, ret); ++ ++ if (prev == null) { ++ addedToArea = true; ++ areaAffected.add(coordinate); ++ continue; ++ } ++ ++ if (prev.thread != currThread) { ++ park = prev; ++ break; ++ } ++ } ++ } ++ ++ if (park == null) { ++ return ret; ++ } ++ ++ // failed, undo logic ++ if (!areaAffected.isEmpty()) { ++ for (int i = 0, len = areaAffected.size(); i < len; ++i) { ++ final long key = areaAffected.getLong(i); ++ ++ if (!this.nodes.remove(key, ret)) { ++ throw new IllegalStateException(); ++ } ++ } ++ } ++ } ++ ++ if (addedToArea) { ++ areaAffected.clear(); ++ // since we inserted, we need to drain waiters ++ Thread unpark; ++ while ((unpark = ret.pollOrBlockAdds()) != null) { ++ LockSupport.unpark(unpark); ++ } ++ } ++ ++ ++failures; ++ ++ if (failures > 128L && park.add(currThread)) { ++ LockSupport.park(); ++ } else { ++ // high contention, spin wait ++ if (failures < 128L) { ++ for (long i = 0; i < failures; ++i) { ++ Thread.onSpinWait(); ++ } ++ failures = failures << 1; ++ } else if (failures < 1_200L) { ++ LockSupport.parkNanos(1_000L); ++ failures = failures + 1L; ++ } else { // scale 0.1ms (100us) per failure ++ Thread.yield(); ++ LockSupport.parkNanos(100_000L * failures); ++ failures = failures + 1L; ++ } ++ } ++ ++ if (addedToArea) { ++ // try again, so we need to allow adds so that other threads can properly block on us ++ ret.allowAdds(); ++ } ++ } ++ } ++ ++ public void unlock(final Node node) { ++ if (node.lock != this) { ++ throw new IllegalStateException("Unlock target lock mismatch"); ++ } ++ ++ final LongArrayList areaAffected = node.areaAffected; ++ ++ if (areaAffected.isEmpty()) { ++ // here we are not in the node map, and so do not need to remove from the node map or unblock any waiters ++ return; ++ } ++ ++ // remove from node map; allowing other threads to lock ++ synchronized (this) { ++ for (int i = 0, len = areaAffected.size(); i < len; ++i) { ++ final long coordinate = areaAffected.getLong(i); ++ if (!this.nodes.remove(coordinate, node)) { ++ throw new IllegalStateException(); ++ } ++ } ++ } ++ ++ Thread unpark; ++ while ((unpark = node.pollOrBlockAdds()) != null) { ++ LockSupport.unpark(unpark); ++ } ++ } ++ ++ public static final class Node extends MultiThreadedQueue { ++ ++ private final SyncReentrantAreaLock lock; ++ private final LongArrayList areaAffected; ++ private final Thread thread; ++ ++ private Node(final SyncReentrantAreaLock lock, final LongArrayList areaAffected, final Thread thread) { ++ this.lock = lock; ++ this.areaAffected = areaAffected; ++ this.thread = thread; ++ } ++ } ++} +diff --git a/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkHolderManager.java b/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkHolderManager.java +index aa6dad3a41077b187ef0702cb27ca03f6d9596fb..eba309c8614eb099d55db9c9ebfe6b09f3403a04 100644 +--- a/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkHolderManager.java ++++ b/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkHolderManager.java +@@ -76,10 +76,50 @@ public final class ChunkHolderManager { + // this field contains chunk holders that were created in addTicketAtLevel + // because the chunk holders were created without a reliable unload hook (i.e creation for entity/poi loading, + // which always check for unload after their tasks finish) we need to do that ourselves later +- private final ReferenceOpenHashSet specialCaseUnload = new ReferenceOpenHashSet<>(); ++ // Folia - use area based lock to reduce contention - no longer needed + // Folia end - region threading + +- public final ReentrantLock ticketLock = new ReentrantLock(); // Folia - region threading ++ // Folia - use area based lock to reduce contention ++ // Folia start - use area based lock to reduce contention ++ public final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock ticketLockArea = new ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock(ChunkTaskScheduler.getChunkSystemLockShift()); ++ ++ private final java.util.concurrent.ConcurrentHashMap>> tickets = new java.util.concurrent.ConcurrentHashMap<>(); ++ private final java.util.concurrent.ConcurrentHashMap sectionToChunkToExpireCount = new java.util.concurrent.ConcurrentHashMap<>(); ++ ++ public boolean processTicketUpdates(final int posX, final int posZ) { ++ final int ticketShift = io.papermc.paper.threadedregions.ThreadedTicketLevelPropagator.SECTION_SHIFT; ++ final int ticketMask = (1 << ticketShift) - 1; ++ final List scheduledTasks = new ArrayList<>(); ++ final List changedFullStatus = new ArrayList<>(); ++ final boolean ret; ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock( ++ ((posX >> ticketShift) - 1) << ticketShift, ++ ((posZ >> ticketShift) - 1) << ticketShift, ++ (((posX >> ticketShift) + 1) << ticketShift) | ticketMask, ++ (((posZ >> ticketShift) + 1) << ticketShift) | ticketMask ++ ); ++ try { ++ ret = this.processTicketUpdatesNoLock(posX >> ticketShift, posZ >> ticketShift, scheduledTasks, changedFullStatus); ++ } finally { ++ this.ticketLockArea.unlock(ticketLock); ++ } ++ ++ this.addChangedStatuses(changedFullStatus); ++ ++ for (int i = 0, len = scheduledTasks.size(); i < len; ++i) { ++ scheduledTasks.get(i).schedule(); ++ } ++ ++ return ret; ++ } ++ ++ private boolean processTicketUpdatesNoLock(final int sectionX, final int sectionZ, final List scheduledTasks, ++ final List changedFullStatus) { ++ return this.ticketLevelPropagator.performUpdate( ++ sectionX, sectionZ, this.taskScheduler.schedulingLockArea, scheduledTasks, changedFullStatus ++ ); ++ } ++ // Folia end - use area based lock to reduce contention + + private final SWMRLong2ObjectHashTable chunkHolders = new SWMRLong2ObjectHashTable<>(16384, 0.25f); + // Folia - region threading +@@ -119,10 +159,7 @@ public final class ChunkHolderManager { + return Long.compare(coord1, coord2); + }); + private long currentTick; +- private final Long2ObjectOpenHashMap>> tickets = new Long2ObjectOpenHashMap<>(8192, 0.25f); +- // what a disaster of a name +- // this is a map of removal tick to a map of chunks and the number of tickets a chunk has that are to expire that tick +- private final Long2ObjectOpenHashMap removeTickToChunkExpireTicketCount = new Long2ObjectOpenHashMap<>(); ++ // Folia - use area based lock to reduce contention - moved to global state + + public void merge(final HolderManagerRegionData into, final long tickOffset) { + // Order doesn't really matter for the pending full update... +@@ -136,34 +173,7 @@ public final class ChunkHolderManager { + into.autoSaveQueue.add(holder); + } + +- final long chunkManagerTickOffset = into.currentTick - this.currentTick; +- for (final Iterator>>> iterator = this.tickets.long2ObjectEntrySet().fastIterator(); +- iterator.hasNext();) { +- final Long2ObjectMap.Entry>> entry = iterator.next(); +- final SortedArraySet> oldTickets = entry.getValue(); +- final SortedArraySet> newTickets = SortedArraySet.create(Math.max(4, oldTickets.size() + 1)); +- for (final Ticket ticket : oldTickets) { +- newTickets.add( +- new Ticket(ticket.getType(), ticket.getTicketLevel(), ticket.key, +- ticket.removalTick == NO_TIMEOUT_MARKER ? NO_TIMEOUT_MARKER : ticket.removalTick + chunkManagerTickOffset) +- ); +- } +- into.tickets.put(entry.getLongKey(), newTickets); +- } +- for (final Iterator> iterator = this.removeTickToChunkExpireTicketCount.long2ObjectEntrySet().fastIterator(); +- iterator.hasNext();) { +- final Long2ObjectMap.Entry entry = iterator.next(); +- into.removeTickToChunkExpireTicketCount.merge( +- (long)(entry.getLongKey() + chunkManagerTickOffset), entry.getValue(), +- (final Long2IntOpenHashMap t, final Long2IntOpenHashMap f) -> { +- for (final Iterator itr = f.long2IntEntrySet().fastIterator(); itr.hasNext();) { +- final Long2IntMap.Entry e = itr.next(); +- t.addTo(e.getLongKey(), e.getIntValue()); +- } +- return t; +- } +- ); +- } ++ // Folia - use area based lock to reduce contention - moved to global state + } + + public void split(final int chunkToRegionShift, final Long2ReferenceOpenHashMap regionToData, +@@ -190,37 +200,7 @@ public final class ChunkHolderManager { + for (final HolderManagerRegionData data : dataSet) { + data.currentTick = this.currentTick; + } +- for (final Iterator>>> iterator = this.tickets.long2ObjectEntrySet().fastIterator(); +- iterator.hasNext();) { +- final Long2ObjectMap.Entry>> entry = iterator.next(); +- final long chunkKey = entry.getLongKey(); +- final int regionCoordinateX = CoordinateUtils.getChunkX(chunkKey) >> chunkToRegionShift; +- final int regionCoordinateZ = CoordinateUtils.getChunkZ(chunkKey) >> chunkToRegionShift; +- +- // can never be null, since a chunk holder exists if the ticket set is not empty +- regionToData.get(CoordinateUtils.getChunkKey(regionCoordinateX, regionCoordinateZ)).tickets.put(chunkKey, entry.getValue()); +- } +- for (final Iterator> iterator = this.removeTickToChunkExpireTicketCount.long2ObjectEntrySet().fastIterator(); +- iterator.hasNext();) { +- final Long2ObjectMap.Entry entry = iterator.next(); +- final long tick = entry.getLongKey(); +- final Long2IntOpenHashMap chunkToCount = entry.getValue(); +- +- for (final Iterator itr = chunkToCount.long2IntEntrySet().fastIterator(); itr.hasNext();) { +- final Long2IntMap.Entry e = itr.next(); +- final long chunkKey = e.getLongKey(); +- final int regionCoordinateX = CoordinateUtils.getChunkX(chunkKey) >> chunkToRegionShift; +- final int regionCoordinateZ = CoordinateUtils.getChunkZ(chunkKey) >> chunkToRegionShift; +- final int count = e.getIntValue(); +- +- // can never be null, since a chunk holder exists if the ticket set is not empty +- final HolderManagerRegionData data = regionToData.get(CoordinateUtils.getChunkKey(regionCoordinateX, regionCoordinateZ)); +- +- data.removeTickToChunkExpireTicketCount.computeIfAbsent(tick, (final long keyInMap) -> { +- return new Long2IntOpenHashMap(); +- }).put(chunkKey, count); +- } +- } ++ // Folia - use area based lock to reduce contention - moved to global state + } + } + +@@ -239,38 +219,21 @@ public final class ChunkHolderManager { + return region.getData().getHolderManagerRegionData(); + } + +- // MUST hold ticket lock +- private ChunkHolderManager.HolderManagerRegionData getDataFor(final long key) { +- return this.getDataFor(CoordinateUtils.getChunkX(key), CoordinateUtils.getChunkZ(key)); +- } +- +- // MUST hold ticket lock +- private ChunkHolderManager.HolderManagerRegionData getDataFor(final int chunkX, final int chunkZ) { +- if (!this.ticketLock.isHeldByCurrentThread()) { +- throw new IllegalStateException("Must hold ticket level lock"); +- } +- +- final ThreadedRegionizer.ThreadedRegion region +- = this.world.regioniser.getRegionAtUnsynchronised(chunkX, chunkZ); +- +- if (region == null) { +- return null; +- } +- +- return region.getData().getHolderManagerRegionData(); +- } +- // Folia end - region threading ++ // Folia - use area based lock to reduce contention + + + public ChunkHolderManager(final ServerLevel world, final ChunkTaskScheduler taskScheduler) { + this.world = world; + this.taskScheduler = taskScheduler; ++ // Folia start - use area based lock to reduce contention ++ this.unloadQueue = new io.papermc.paper.threadedregions.ChunkQueue(world.regioniser.sectionChunkShift); ++ // Folia end - use area based lock to reduce contention + } + +- private long statusUpgradeId; ++ private final java.util.concurrent.atomic.AtomicLong statusUpgradeId = new java.util.concurrent.atomic.AtomicLong(); // Folia - use area based lock to reduce contention + + long getNextStatusUpgradeId() { +- return ++this.statusUpgradeId; ++ return this.statusUpgradeId.incrementAndGet(); // Folia - use area based lock to reduce contention + } + + public List getOldChunkHolders() { +@@ -452,22 +415,65 @@ public final class ChunkHolderManager { + } + } + +- protected final Long2IntLinkedOpenHashMap ticketLevelUpdates = new Long2IntLinkedOpenHashMap() { ++ // Folia start - use area based lock to reduce contention ++ protected final io.papermc.paper.threadedregions.ThreadedTicketLevelPropagator ticketLevelPropagator = new io.papermc.paper.threadedregions.ThreadedTicketLevelPropagator() { + @Override +- protected void rehash(final int newN) { +- // no downsizing allowed +- if (newN < this.n) { +- return; ++ protected void processLevelUpdates(final it.unimi.dsi.fastutil.longs.Long2ByteLinkedOpenHashMap updates) { ++ // first the necessary chunkholders must be created, so just update the ticket levels ++ for (final Iterator iterator = updates.long2ByteEntrySet().fastIterator(); iterator.hasNext();) { ++ final it.unimi.dsi.fastutil.longs.Long2ByteMap.Entry entry = iterator.next(); ++ final long key = entry.getLongKey(); ++ final int newLevel = convertBetweenTicketLevels((int)entry.getByteValue()); ++ ++ NewChunkHolder current = ChunkHolderManager.this.chunkHolders.get(key); ++ if (current == null && newLevel > MAX_TICKET_LEVEL) { ++ // not loaded and it shouldn't be loaded! ++ iterator.remove(); ++ continue; ++ } ++ ++ final int currentLevel = current == null ? MAX_TICKET_LEVEL + 1 : current.getCurrentTicketLevel(); ++ if (currentLevel == newLevel) { ++ // nothing to do ++ iterator.remove(); ++ continue; ++ } ++ ++ if (current == null) { ++ // must create ++ current = ChunkHolderManager.this.createChunkHolder(key); ++ synchronized (ChunkHolderManager.this.chunkHolders) { ++ ChunkHolderManager.this.chunkHolders.put(key, current); ++ } ++ current.updateTicketLevel(newLevel); ++ } else { ++ current.updateTicketLevel(newLevel); ++ } + } +- super.rehash(newN); + } +- }; + +- protected final Delayed8WayDistancePropagator2D ticketLevelPropagator = new Delayed8WayDistancePropagator2D( +- (final long coordinate, final byte oldLevel, final byte newLevel) -> { +- ChunkHolderManager.this.ticketLevelUpdates.putAndMoveToLast(coordinate, convertBetweenTicketLevels(newLevel)); ++ @Override ++ protected void processSchedulingUpdates(final it.unimi.dsi.fastutil.longs.Long2ByteLinkedOpenHashMap updates, final List scheduledTasks, ++ final List changedFullStatus) { ++ final List prev = CURRENT_TICKET_UPDATE_SCHEDULING.get(); ++ CURRENT_TICKET_UPDATE_SCHEDULING.set(scheduledTasks); ++ try { ++ for (final LongIterator iterator = updates.keySet().iterator(); iterator.hasNext();) { ++ final long key = iterator.nextLong(); ++ final NewChunkHolder current = ChunkHolderManager.this.chunkHolders.get(key); ++ ++ if (current == null) { ++ throw new IllegalStateException("Expected chunk holder to be created"); ++ } ++ ++ current.processTicketLevelUpdate(scheduledTasks, changedFullStatus); ++ } ++ } finally { ++ CURRENT_TICKET_UPDATE_SCHEDULING.set(prev); + } +- ); ++ } ++ }; ++ // Folia end - use area based lock to reduce contention + // function for converting between ticket levels and propagator levels and vice versa + // the problem is the ticket level propagator will propagate from a set source down to zero, whereas mojang expects + // levels to propagate from a set value up to a maximum value. so we need to convert the levels we put into the propagator +@@ -482,46 +488,72 @@ public final class ChunkHolderManager { + } + + public String getTicketDebugString(final long coordinate) { +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(CoordinateUtils.getChunkX(coordinate), CoordinateUtils.getChunkZ(coordinate)); // Folia - use area based lock to reduce contention + try { +- // Folia start - region threading +- final ChunkHolderManager.HolderManagerRegionData holderManagerRegionData = this.getDataFor(coordinate); +- final SortedArraySet> tickets = holderManagerRegionData == null ? null : holderManagerRegionData.tickets.get(coordinate); +- // Folia end - region threading ++ final SortedArraySet> tickets = this.tickets.get(new RegionFileIOThread.ChunkCoordinate(coordinate)); // Folia - use area based lock to reduce contention + + return tickets != null ? tickets.first().toString() : "no_ticket"; + } finally { +- this.ticketLock.unlock(); ++ // Folia start - use area based lock to reduce contention ++ if (ticketLock != null) { ++ this.ticketLockArea.unlock(ticketLock); ++ } ++ // Folia end - use area based lock to reduce contention + } + } + + public Long2ObjectOpenHashMap>> getTicketsCopy() { +- this.ticketLock.lock(); +- try { +- // Folia start - region threading +- Long2ObjectOpenHashMap>> ret = new Long2ObjectOpenHashMap<>(); +- this.world.regioniser.computeForAllRegions((region) -> { +- for (final LongIterator iterator = region.getData().getHolderManagerRegionData().tickets.keySet().longIterator(); iterator.hasNext();) { +- final long chunk = iterator.nextLong(); ++ // Folia start - use area based lock to reduce contention ++ final Long2ObjectOpenHashMap>> ret = new Long2ObjectOpenHashMap<>(); ++ final it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap> sections = new it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap(); ++ final int sectionShift = ChunkTaskScheduler.getChunkSystemLockShift(); ++ for (final RegionFileIOThread.ChunkCoordinate coord : this.tickets.keySet()) { ++ sections.computeIfAbsent( ++ CoordinateUtils.getChunkKey( ++ CoordinateUtils.getChunkX(coord.key) >> sectionShift, ++ CoordinateUtils.getChunkZ(coord.key) >> sectionShift ++ ), ++ (final long keyInMap) -> { ++ return new ArrayList<>(); ++ } ++ ).add(coord); ++ } + +- ret.put(chunk, region.getData().getHolderManagerRegionData().tickets.get(chunk)); ++ for (final Iterator>> iterator = sections.long2ObjectEntrySet().fastIterator(); ++ iterator.hasNext();) { ++ final it.unimi.dsi.fastutil.longs.Long2ObjectMap.Entry> entry = iterator.next(); ++ final long sectionKey = entry.getLongKey(); ++ final List coordinates = entry.getValue(); ++ ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock( ++ CoordinateUtils.getChunkX(sectionKey) << sectionShift, ++ CoordinateUtils.getChunkZ(sectionKey) << sectionShift ++ ); ++ try { ++ for (final RegionFileIOThread.ChunkCoordinate coord : coordinates) { ++ final SortedArraySet> tickets = this.tickets.get(coord); ++ if (tickets == null) { ++ // removed before we acquired lock ++ continue; ++ } ++ ret.put(coord.key, new SortedArraySet<>(tickets)); + } +- }); +- return ret; +- // Folia end - region threading +- } finally { +- this.ticketLock.unlock(); ++ } finally { ++ this.ticketLockArea.unlock(ticketLock); ++ } + } ++ ++ return ret; ++ // Folia end - use area based lock to reduce contention + } + + public Collection getPluginChunkTickets(int x, int z) { + ImmutableList.Builder ret; +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(x, z); // Folia - use area based lock to reduce contention + try { + // Folia start - region threading + final long coordinate = CoordinateUtils.getChunkKey(x, z); +- final ChunkHolderManager.HolderManagerRegionData holderManagerRegionData = this.getDataFor(coordinate); +- final SortedArraySet> tickets = holderManagerRegionData == null ? null : holderManagerRegionData.tickets.get(coordinate); ++ final SortedArraySet> tickets = this.tickets.get(new RegionFileIOThread.ChunkCoordinate(coordinate)); // Folia - use area based lock to reduce contention + // Folia end - region threading + + if (tickets == null) { +@@ -535,21 +567,19 @@ public final class ChunkHolderManager { + } + } + } finally { +- this.ticketLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + return ret.build(); + } + +- protected final int getPropagatedTicketLevel(final long coordinate) { +- return convertBetweenTicketLevels(this.ticketLevelPropagator.getLevel(coordinate)); +- } ++ // Folia - use area based lock to reduce contention - method not needed, TODO rebase removal + + protected final void updateTicketLevel(final long coordinate, final int ticketLevel) { + if (ticketLevel > ChunkMap.MAX_CHUNK_DISTANCE) { +- this.ticketLevelPropagator.removeSource(coordinate); ++ this.ticketLevelPropagator.removeSource(CoordinateUtils.getChunkX(coordinate), CoordinateUtils.getChunkZ(coordinate)); // Folia - use area based lock to reduce contention + } else { +- this.ticketLevelPropagator.setSource(coordinate, convertBetweenTicketLevels(ticketLevel)); ++ this.ticketLevelPropagator.setSource(CoordinateUtils.getChunkX(coordinate), CoordinateUtils.getChunkZ(coordinate), convertBetweenTicketLevels(ticketLevel)); // Folia - use area based lock to reduce contention + } + } + +@@ -567,45 +597,66 @@ public final class ChunkHolderManager { + return this.addTicketAtLevel(type, CoordinateUtils.getChunkKey(chunkX, chunkZ), level, identifier); + } + ++ // Folia start - use area based lock to reduce contention ++ private void addExpireCount(final int chunkX, final int chunkZ) { ++ final long chunkKey = CoordinateUtils.getChunkKey(chunkX, chunkZ); ++ ++ final int sectionShift = TickRegions.getRegionChunkShift(); ++ final RegionFileIOThread.ChunkCoordinate sectionKey = new RegionFileIOThread.ChunkCoordinate(CoordinateUtils.getChunkKey( ++ chunkX >> sectionShift, ++ chunkZ >> sectionShift ++ )); ++ ++ this.sectionToChunkToExpireCount.computeIfAbsent(sectionKey, (final RegionFileIOThread.ChunkCoordinate keyInMap) -> { ++ return new Long2IntOpenHashMap(); ++ }).addTo(chunkKey, 1); ++ } ++ ++ private void removeExpireCount(final int chunkX, final int chunkZ) { ++ final long chunkKey = CoordinateUtils.getChunkKey(chunkX, chunkZ); ++ ++ final int sectionShift = TickRegions.getRegionChunkShift(); ++ final RegionFileIOThread.ChunkCoordinate sectionKey = new RegionFileIOThread.ChunkCoordinate(CoordinateUtils.getChunkKey( ++ chunkX >> sectionShift, ++ chunkZ >> sectionShift ++ )); ++ ++ final Long2IntOpenHashMap removeCounts = this.sectionToChunkToExpireCount.get(sectionKey); ++ final int prevCount = removeCounts.addTo(chunkKey, -1); ++ ++ if (prevCount == 1) { ++ removeCounts.remove(chunkKey); ++ if (removeCounts.isEmpty()) { ++ this.sectionToChunkToExpireCount.remove(sectionKey); ++ } ++ } ++ } ++ // Folia end - use area based lock to reduce contention ++ + // supposed to return true if the ticket was added and did not replace another + // but, we always return false if the ticket cannot be added + public boolean addTicketAtLevel(final TicketType type, final long chunk, final int level, final T identifier) { +- final long removeDelay = Math.max(0, type.timeout); ++ // Folia start - use area based lock to reduce contention ++ return this.addTicketAtLevel(type, chunk, level, identifier, true); ++ } ++ boolean addTicketAtLevel(final TicketType type, final long chunk, final int level, final T identifier, final boolean lock) { ++ final long removeDelay = type.timeout <= 0 ? NO_TIMEOUT_MARKER : type.timeout; ++ // Folia end - use area based lock to reduce contention + if (level > MAX_TICKET_LEVEL) { + return false; + } + +- // Folia start - region threading +- final ThreadedRegionizer.ThreadedRegion currRegion = TickRegionScheduler.getCurrentRegion(); +- final boolean lock = currRegion == null || this.world.regioniser.getRegionAtUnsynchronised( +- CoordinateUtils.getChunkX(chunk), CoordinateUtils.getChunkZ(chunk) +- ) != currRegion; +- // Folia end - region threading ++ // Folia start - use area based lock to reduce contention ++ final int chunkX = CoordinateUtils.getChunkX(chunk); ++ final int chunkZ = CoordinateUtils.getChunkZ(chunk); ++ final RegionFileIOThread.ChunkCoordinate chunkCoord = new RegionFileIOThread.ChunkCoordinate(chunk); ++ final Ticket ticket = new Ticket<>(type, level, identifier, removeDelay); + +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = lock ? this.ticketLockArea.lock(chunkX, chunkZ) : null; + try { +- // Folia start - region threading +- NewChunkHolder holder = this.chunkHolders.get(chunk); +- if (holder == null) { +- // we need to guarantee that a chunk holder exists for each ticket +- // this must be executed before retrieving the holder manager data for a target chunk, to ensure the +- // region will exist +- this.chunkHolders.put(chunk, holder = this.createChunkHolder(chunk)); +- this.specialCaseUnload.add(holder); +- } ++ // Folia end - use area based lock to reduce contention + +- if (lock) { +- // we just need to prevent merging, so we only need the read lock +- // additionally, this will prevent deadlock in the remove all tickets function by using the read lock +- this.world.regioniser.acquireReadLock(); +- } +- try { +- final ChunkHolderManager.HolderManagerRegionData targetData = lock ? this.getDataFor(chunk) : currRegion.getData().getHolderManagerRegionData(); +- // Folia end - region threading +- final long removeTick = removeDelay == 0 ? NO_TIMEOUT_MARKER : targetData.currentTick + removeDelay; // Folia - region threading +- final Ticket ticket = new Ticket<>(type, level, identifier, removeTick); +- +- final SortedArraySet> ticketsAtChunk = targetData.tickets.computeIfAbsent(chunk, (final long keyInMap) -> { // Folia - region threading ++ final SortedArraySet> ticketsAtChunk = this.tickets.computeIfAbsent(chunkCoord, (final RegionFileIOThread.ChunkCoordinate keyInMap) -> { // Folia - region threading // Folia - use area based lock to reduce contention + return SortedArraySet.create(4); + }); + +@@ -614,30 +665,20 @@ public final class ChunkHolderManager { + final int levelAfter = getTicketLevelAt(ticketsAtChunk); + + if (current != ticket) { +- final long oldRemovalTick = current.removalTick; +- if (removeTick != oldRemovalTick) { +- if (oldRemovalTick != NO_TIMEOUT_MARKER) { +- final Long2IntOpenHashMap removeCounts = targetData.removeTickToChunkExpireTicketCount.get(oldRemovalTick); // Folia - region threading +- final int prevCount = removeCounts.addTo(chunk, -1); +- +- if (prevCount == 1) { +- removeCounts.remove(chunk); +- if (removeCounts.isEmpty()) { +- targetData.removeTickToChunkExpireTicketCount.remove(oldRemovalTick); // Folia - region threading +- } +- } +- } +- if (removeTick != NO_TIMEOUT_MARKER) { +- targetData.removeTickToChunkExpireTicketCount.computeIfAbsent(removeTick, (final long keyInMap) -> { // Folia - region threading +- return new Long2IntOpenHashMap(); +- }).addTo(chunk, 1); ++ final long oldRemoveDelay = current.removeDelay; // Folia - use area based lock to reduce contention ++ // Folia start - use area based lock to reduce contention ++ if (removeDelay != oldRemoveDelay) { ++ if (oldRemoveDelay != NO_TIMEOUT_MARKER && removeDelay == NO_TIMEOUT_MARKER) { ++ this.removeExpireCount(chunkX, chunkZ); ++ } else if (oldRemoveDelay == NO_TIMEOUT_MARKER) { ++ // since old != new, we have that NO_TIMEOUT_MARKER != new ++ this.addExpireCount(chunkX, chunkZ); ++ // Folia end - use area based lock to reduce contention + } + } + } else { +- if (removeTick != NO_TIMEOUT_MARKER) { +- targetData.removeTickToChunkExpireTicketCount.computeIfAbsent(removeTick, (final long keyInMap) -> { // Folia - region threading +- return new Long2IntOpenHashMap(); +- }).addTo(chunk, 1); ++ if (removeDelay != NO_TIMEOUT_MARKER) { ++ this.addExpireCount(chunkX, chunkZ); // Folia - use area based lock to reduce contention + } + } + +@@ -646,13 +687,13 @@ public final class ChunkHolderManager { + } + + return current == ticket; +- } finally { // Folia start - region threading +- if (lock) { +- this.world.regioniser.releaseReadLock(); +- } +- } // Folia end - region threading ++ // Folia - use area based lock to reduce contention + } finally { +- this.ticketLock.unlock(); ++ // Folia start - use area based lock to reduce contention ++ if (ticketLock != null) { ++ this.ticketLockArea.unlock(ticketLock); ++ } ++ // Folia end - use area based lock to reduce contention + } + } + +@@ -665,117 +706,104 @@ public final class ChunkHolderManager { + } + + public boolean removeTicketAtLevel(final TicketType type, final long chunk, final int level, final T identifier) { ++ // Folia start - use area based lock to reduce contention ++ return this.removeTicketAtLevel(type, chunk, level, identifier, true); ++ } ++ boolean removeTicketAtLevel(final TicketType type, final long chunk, final int level, final T identifier, final boolean lock) { ++ // Folia end - use area based lock to reduce contention + if (level > MAX_TICKET_LEVEL) { + return false; + } + +- // Folia start - region threading +- final ThreadedRegionizer.ThreadedRegion currRegion = TickRegionScheduler.getCurrentRegion(); +- final boolean lock = currRegion == null || this.world.regioniser.getRegionAtUnsynchronised( +- CoordinateUtils.getChunkX(chunk), CoordinateUtils.getChunkZ(chunk) +- ) != currRegion; +- // Folia end - region threading ++ // Folia start - use area based lock to reduce contention ++ final int chunkX = CoordinateUtils.getChunkX(chunk); ++ final int chunkZ = CoordinateUtils.getChunkZ(chunk); ++ final RegionFileIOThread.ChunkCoordinate chunkCoord = new RegionFileIOThread.ChunkCoordinate(chunk); ++ final Ticket probe = new Ticket<>(type, level, identifier, PROBE_MARKER); + +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = lock ? this.ticketLockArea.lock(chunkX, chunkZ) : null; + try { +- // Folia start - region threading +- if (lock) { +- // we just need to prevent merging, so we only need the read lock +- // additionally, this will prevent deadlock in the remove all tickets function by using the read lock +- this.world.regioniser.acquireReadLock(); +- } +- try { +- final ChunkHolderManager.HolderManagerRegionData targetData = lock ? this.getDataFor(chunk) : currRegion.getData().getHolderManagerRegionData(); +- // Folia end - region threading +- +- final SortedArraySet> ticketsAtChunk = targetData == null ? null : targetData.tickets.get(chunk); +- // Folia end - region threading ++ final SortedArraySet> ticketsAtChunk = this.tickets.get(chunkCoord); ++ // Folia end - use area based lock to reduce contention + if (ticketsAtChunk == null) { + return false; + } + + final int oldLevel = getTicketLevelAt(ticketsAtChunk); +- final Ticket ticket = (Ticket)ticketsAtChunk.removeAndGet(new Ticket<>(type, level, identifier, PROBE_MARKER)); // Folia - region threading ++ final Ticket ticket = (Ticket)ticketsAtChunk.removeAndGet(probe); // Folia - region threading // Folia - use area based lock to reduce contention + + if (ticket == null) { + return false; + } + +- int newLevel = getTicketLevelAt(ticketsAtChunk); // Folia - region threading - moved up from below +- // Folia start - region threading ++ final int newLevel = getTicketLevelAt(ticketsAtChunk); // Folia - region threading - moved up from below // Folia start - use area based lock to reduce contention ++ // Folia start - use area based lock to reduce contention + // we should not change the ticket levels while the target region may be ticking +- if (newLevel > level) { +- final long unknownRemoveTick = targetData.currentTick + Math.max(0, TicketType.UNKNOWN.timeout); +- final Ticket unknownTicket = new Ticket<>(TicketType.UNKNOWN, level, new ChunkPos(chunk), unknownRemoveTick); ++ if (oldLevel != newLevel) { ++ // we always expect UNKNOWN timeout to be 1, but just in case use max... ++ final Ticket unknownTicket = new Ticket<>(TicketType.UNKNOWN, level, new ChunkPos(chunk), Math.max(1, TicketType.UNKNOWN.timeout)); + if (ticketsAtChunk.add(unknownTicket)) { +- targetData.removeTickToChunkExpireTicketCount.computeIfAbsent(unknownRemoveTick, (final long keyInMap) -> { +- return new Long2IntOpenHashMap(); +- }).addTo(chunk, 1); ++ this.addExpireCount(chunkX, chunkZ); ++ // Folia end - use area based lock to reduce contention + } else { + throw new IllegalStateException("Should have been able to add " + unknownTicket + " to " + ticketsAtChunk); + } +- newLevel = level; + } ++ // Folia end - use area based lock to reduce contention + // Folia end - region threading + +- if (ticketsAtChunk.isEmpty()) { +- targetData.tickets.remove(chunk); // Folia - region threading +- } ++ // Folia - use area based lock to reduce contention - not possible anymore + + // Folia - region threading - move up + +- final long removeTick = ticket.removalTick; +- if (removeTick != NO_TIMEOUT_MARKER) { +- final Long2IntOpenHashMap removeCounts = targetData.removeTickToChunkExpireTicketCount.get(removeTick); // Folia - region threading +- final int currCount = removeCounts.addTo(chunk, -1); +- +- if (currCount == 1) { +- removeCounts.remove(chunk); +- if (removeCounts.isEmpty()) { +- targetData.removeTickToChunkExpireTicketCount.remove(removeTick); // Folia - region threading +- } +- } ++ // Folia start - use area based lock to reduce contention ++ final long removeDelay = ticket.removeDelay; ++ if (removeDelay != NO_TIMEOUT_MARKER) { ++ this.removeExpireCount(chunkX, chunkZ); ++ // Folia end - use area based lock to reduce contention + } + +- if (oldLevel != newLevel) { +- this.updateTicketLevel(chunk, newLevel); +- } ++ // Folia - use area based lock to reduce contention - not possible anymore + + return true; +- } finally { // Folia start - region threading +- if (lock) { +- this.world.regioniser.releaseReadLock(); +- } +- } // Folia end - region threading ++ // Folia - use area based lock to reduce contention + } finally { +- this.ticketLock.unlock(); ++ // Folia start - use area based lock to reduce contention ++ if (ticketLock != null) { ++ this.ticketLockArea.unlock(ticketLock); ++ } ++ // Folia end - use area based lock to reduce contention + } + } + + // atomic with respect to all add/remove/addandremove ticket calls for the given chunk + public void addAndRemoveTickets(final long chunk, final TicketType addType, final int addLevel, final T addIdentifier, + final TicketType removeType, final int removeLevel, final V removeIdentifier) { +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(CoordinateUtils.getChunkX(chunk), CoordinateUtils.getChunkZ(chunk)); // Folia - use area based lock to reduce contention + try { +- this.addTicketAtLevel(addType, chunk, addLevel, addIdentifier); +- this.removeTicketAtLevel(removeType, chunk, removeLevel, removeIdentifier); ++ // Folia start - use area based lock to reduce contention ++ this.addTicketAtLevel(addType, chunk, addLevel, addIdentifier, false); ++ this.removeTicketAtLevel(removeType, chunk, removeLevel, removeIdentifier, false); ++ // Folia end - use area based lock to reduce contention + } finally { +- this.ticketLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + } + + // atomic with respect to all add/remove/addandremove ticket calls for the given chunk + public boolean addIfRemovedTicket(final long chunk, final TicketType addType, final int addLevel, final T addIdentifier, + final TicketType removeType, final int removeLevel, final V removeIdentifier) { +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(CoordinateUtils.getChunkX(chunk), CoordinateUtils.getChunkZ(chunk)); // Folia - use area based lock to reduce contention + try { +- if (this.removeTicketAtLevel(removeType, chunk, removeLevel, removeIdentifier)) { +- this.addTicketAtLevel(addType, chunk, addLevel, addIdentifier); ++ // Folia start - use area based lock to reduce contention ++ if (this.removeTicketAtLevel(removeType, chunk, removeLevel, removeIdentifier, false)) { ++ this.addTicketAtLevel(addType, chunk, addLevel, addIdentifier, false); ++ // Folia end - use area based lock to reduce contention + return true; + } + return false; + } finally { +- this.ticketLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + } + +@@ -784,58 +812,122 @@ public final class ChunkHolderManager { + return; + } + +- this.ticketLock.lock(); +- try { +- // Folia start - region threading +- this.world.regioniser.computeForAllRegions((region) -> { +- for (final LongIterator iterator = new LongArrayList(region.getData().getHolderManagerRegionData().tickets.keySet()).longIterator(); iterator.hasNext();) { +- final long chunk = iterator.nextLong(); ++ // Folia start - use area based lock to reduce contention ++ final it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap> sections = new it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap(); ++ final int sectionShift = ChunkTaskScheduler.getChunkSystemLockShift(); ++ for (final RegionFileIOThread.ChunkCoordinate coord : this.tickets.keySet()) { ++ sections.computeIfAbsent( ++ CoordinateUtils.getChunkKey( ++ CoordinateUtils.getChunkX(coord.key) >> sectionShift, ++ CoordinateUtils.getChunkZ(coord.key) >> sectionShift ++ ), ++ (final long keyInMap) -> { ++ return new ArrayList<>(); ++ } ++ ).add(coord); ++ } ++ ++ for (final Iterator>> iterator = sections.long2ObjectEntrySet().fastIterator(); ++ iterator.hasNext();) { ++ final it.unimi.dsi.fastutil.longs.Long2ObjectMap.Entry> entry = iterator.next(); ++ final long sectionKey = entry.getLongKey(); ++ final List coordinates = entry.getValue(); + +- this.removeTicketAtLevel(ticketType, chunk, ticketLevel, ticketIdentifier); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock( ++ CoordinateUtils.getChunkX(sectionKey) << sectionShift, ++ CoordinateUtils.getChunkZ(sectionKey) << sectionShift ++ ); ++ try { ++ for (final RegionFileIOThread.ChunkCoordinate coord : coordinates) { ++ this.removeTicketAtLevel(ticketType, coord.key, ticketLevel, ticketIdentifier, false); + } +- }); +- // Folia end - region threading +- } finally { +- this.ticketLock.unlock(); ++ } finally { ++ this.ticketLockArea.unlock(ticketLock); ++ } + } ++ // Folia end - use area based lock to reduce contention + } + + public void tick() { +- // Folia start - region threading +- final ChunkHolderManager.HolderManagerRegionData data = this.getCurrentRegionData(); +- if (data == null) { ++ // Folia start - use area based lock to reduce contention ++ final ThreadedRegionizer.ThreadedRegion region = ++ TickRegionScheduler.getCurrentRegion(); ++ if (region == null) { + throw new IllegalStateException("Not running tick() while on a region"); + } +- // Folia end - region threading + +- this.ticketLock.lock(); +- try { +- final long tick = ++data.currentTick; // Folia - region threading ++ final int sectionShift = TickRegions.getRegionChunkShift(); ++ ++ final Predicate> expireNow = (final Ticket ticket) -> { ++ if (ticket.removeDelay == NO_TIMEOUT_MARKER) { ++ return false; ++ } ++ return --ticket.removeDelay <= 0L; ++ }; ++ ++ for (final LongIterator iterator = region.getOwnedSectionsUnsynchronised(); iterator.hasNext();) { ++ final long sectionKey = iterator.nextLong(); + +- final Long2IntOpenHashMap toRemove = data.removeTickToChunkExpireTicketCount.remove(tick); // Folia - region threading ++ final RegionFileIOThread.ChunkCoordinate section = new RegionFileIOThread.ChunkCoordinate(sectionKey); + +- if (toRemove == null) { +- return; ++ if (!this.sectionToChunkToExpireCount.containsKey(section)) { ++ continue; + } + +- final Predicate> expireNow = (final Ticket ticket) -> { +- return ticket.removalTick == tick; +- }; ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock( ++ CoordinateUtils.getChunkX(sectionKey) << sectionShift, ++ CoordinateUtils.getChunkZ(sectionKey) << sectionShift ++ ); + +- for (final LongIterator iterator = toRemove.keySet().longIterator(); iterator.hasNext();) { +- final long chunk = iterator.nextLong(); ++ try { ++ final Long2IntOpenHashMap chunkToExpireCount = this.sectionToChunkToExpireCount.get(section); ++ if (chunkToExpireCount == null) { ++ // lost to some race ++ continue; ++ } + +- final SortedArraySet> tickets = data.tickets.get(chunk); // Folia - region threading +- tickets.removeIf(expireNow); +- if (tickets.isEmpty()) { +- data.tickets.remove(chunk); // Folia - region threading +- this.ticketLevelPropagator.removeSource(chunk); +- } else { +- this.ticketLevelPropagator.setSource(chunk, convertBetweenTicketLevels(tickets.first().getTicketLevel())); ++ for (final Iterator iterator1 = chunkToExpireCount.long2IntEntrySet().fastIterator(); iterator1.hasNext();) { ++ final Long2IntMap.Entry entry = iterator1.next(); ++ ++ final long chunkKey = entry.getLongKey(); ++ final int expireCount = entry.getIntValue(); ++ ++ final RegionFileIOThread.ChunkCoordinate chunk = new RegionFileIOThread.ChunkCoordinate(chunkKey); ++ ++ final SortedArraySet> tickets = this.tickets.get(chunk); ++ final int levelBefore = getTicketLevelAt(tickets); ++ ++ final int sizeBefore = tickets.size(); ++ tickets.removeIf(expireNow); ++ final int sizeAfter = tickets.size(); ++ final int levelAfter = getTicketLevelAt(tickets); ++ ++ if (tickets.isEmpty()) { ++ this.tickets.remove(chunk); ++ } ++ if (levelBefore != levelAfter) { ++ this.updateTicketLevel(chunkKey, levelAfter); ++ } ++ ++ final int newExpireCount = expireCount - (sizeBefore - sizeAfter); ++ ++ if (newExpireCount == expireCount) { ++ continue; ++ } ++ ++ if (newExpireCount != 0) { ++ entry.setValue(newExpireCount); ++ } else { ++ iterator1.remove(); ++ } ++ } ++ ++ if (chunkToExpireCount.isEmpty()) { ++ this.sectionToChunkToExpireCount.remove(section); + } ++ } finally { ++ this.ticketLockArea.unlock(ticketLock); + } +- } finally { +- this.ticketLock.unlock(); + } + + this.processTicketUpdates(); +@@ -887,10 +979,11 @@ public final class ChunkHolderManager { + } + + private NewChunkHolder getOrCreateChunkHolder(final long position) { +- if (!this.ticketLock.isHeldByCurrentThread()) { ++ final int chunkX = CoordinateUtils.getChunkX(position); final int chunkZ = CoordinateUtils.getChunkZ(position); // Folia - use area based lock to reduce contention ++ if (!this.ticketLockArea.isHeldByCurrentThread(chunkX, chunkZ)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Must hold ticket level update lock!"); + } +- if (!this.taskScheduler.schedulingLock.isHeldByCurrentThread()) { ++ if (!this.taskScheduler.schedulingLockArea.isHeldByCurrentThread(chunkX, chunkZ)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Must hold scheduler lock!!"); + } + +@@ -903,12 +996,14 @@ public final class ChunkHolderManager { + } + + current = this.createChunkHolder(position); ++ synchronized (this.chunkHolders) { // Folia - use area based lock to reduce contention + this.chunkHolders.put(position, current); ++ } // Folia - use area based lock to reduce contention + + return current; + } + +- private long entityLoadCounter; ++ private final java.util.concurrent.atomic.AtomicLong entityLoadCounter = new java.util.concurrent.atomic.AtomicLong(); // Folia - use area based lock to reduce contention + + public ChunkEntitySlices getOrCreateEntityChunk(final int chunkX, final int chunkZ, final boolean transientChunk) { + TickThread.ensureTickThread(this.world, chunkX, chunkZ, "Cannot create entity chunk off-main"); +@@ -921,13 +1016,13 @@ public final class ChunkHolderManager { + + final AtomicBoolean isCompleted = new AtomicBoolean(); + final Thread waiter = Thread.currentThread(); +- final Long entityLoadId; ++ final Long entityLoadId = Long.valueOf(this.entityLoadCounter.getAndIncrement()); // Folia - use area based lock to reduce contention + NewChunkHolder.GenericDataLoadTaskCallback loadTask = null; +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(chunkX, chunkZ); // Folia - use area based lock to reduce contention + try { +- entityLoadId = Long.valueOf(this.entityLoadCounter++); ++ // Folia - use area based lock to reduce contention + this.addTicketAtLevel(TicketType.ENTITY_LOAD, chunkX, chunkZ, MAX_TICKET_LEVEL, entityLoadId); +- this.taskScheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.taskScheduler.schedulingLockArea.lock(chunkX, chunkZ); // Folia - use area based lock to reduce contention + try { + current = this.getOrCreateChunkHolder(chunkX, chunkZ); + if ((ret = current.getEntityChunk()) != null && (transientChunk || !ret.isTransient())) { +@@ -951,10 +1046,10 @@ public final class ChunkHolderManager { + } + } + } finally { +- this.taskScheduler.schedulingLock.unlock(); ++ this.taskScheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } finally { +- this.ticketLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + if (loadTask != null) { +@@ -996,7 +1091,7 @@ public final class ChunkHolderManager { + return null; + } + +- private long poiLoadCounter; ++ private final java.util.concurrent.atomic.AtomicLong poiLoadCounter = new java.util.concurrent.atomic.AtomicLong(); // Folia - use area based lock to reduce contention + + public PoiChunk loadPoiChunk(final int chunkX, final int chunkZ) { + TickThread.ensureTickThread(this.world, chunkX, chunkZ, "Cannot create poi chunk off-main"); +@@ -1013,13 +1108,13 @@ public final class ChunkHolderManager { + final AtomicReference completed = new AtomicReference<>(); + final AtomicBoolean isCompleted = new AtomicBoolean(); + final Thread waiter = Thread.currentThread(); +- final Long poiLoadId; ++ final Long poiLoadId = Long.valueOf(this.poiLoadCounter.getAndIncrement()); + NewChunkHolder.GenericDataLoadTaskCallback loadTask = null; +- this.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(chunkX, chunkZ); // Folia - use area based lock to reduce contention + try { +- poiLoadId = Long.valueOf(this.poiLoadCounter++); ++ // Folia - use area based lock to reduce contention + this.addTicketAtLevel(TicketType.POI_LOAD, chunkX, chunkZ, MAX_TICKET_LEVEL, poiLoadId); +- this.taskScheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.taskScheduler.schedulingLockArea.lock(chunkX, chunkZ); // Folia - use area based lock to reduce contention + try { + current = this.getOrCreateChunkHolder(chunkX, chunkZ); + if (current.isPoiChunkLoaded()) { +@@ -1038,10 +1133,10 @@ public final class ChunkHolderManager { + poiLoad.raisePriority(PrioritisedExecutor.Priority.BLOCKING); + } + } finally { +- this.taskScheduler.schedulingLock.unlock(); ++ this.taskScheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } finally { +- this.ticketLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + if (loadTask != null) { +@@ -1122,7 +1217,9 @@ public final class ChunkHolderManager { + } + } + +- final ReferenceLinkedOpenHashSet unloadQueue = new ReferenceLinkedOpenHashSet<>(); ++ // Folia start - use area based lock to reduce contention ++ final io.papermc.paper.threadedregions.ChunkQueue unloadQueue; ++ // Folia end - use area based lock to reduce contention + + /* + * Note: Only called on chunk holders that the current ticking region owns +@@ -1133,7 +1230,9 @@ public final class ChunkHolderManager { + // Folia - region threading + ChunkSystem.onChunkHolderDelete(this.world, holder.vanillaChunkHolder); + this.getCurrentRegionData().autoSaveQueue.remove(holder); // Folia - region threading ++ synchronized (this.chunkHolders) { // Folia - use area based lock to reduce contention + this.chunkHolders.remove(CoordinateUtils.getChunkKey(holder.chunkX, holder.chunkZ)); ++ } // Folia - use area based lock to reduce contention + } + + // note: never call while inside the chunk system, this will absolutely break everything +@@ -1143,100 +1242,150 @@ public final class ChunkHolderManager { + if (BLOCK_TICKET_UPDATES.get() == Boolean.TRUE) { + throw new IllegalStateException("Cannot unload chunks recursively"); + } +- if (this.ticketLock.isHeldByCurrentThread()) { +- throw new IllegalStateException("Cannot hold ticket update lock while calling processUnloads"); ++ // Folia start - use area based lock to reduce contention ++ final int sectionShift = this.unloadQueue.coordinateShift; // sectionShift <= lock shift ++ final List unloadSectionsForRegion = this.unloadQueue.retrieveForCurrentRegion(); ++ int unloadCountTentative = 0; ++ for (final io.papermc.paper.threadedregions.ChunkQueue.SectionToUnload sectionRef : unloadSectionsForRegion) { ++ final io.papermc.paper.threadedregions.ChunkQueue.UnloadSection section ++ = this.unloadQueue.getSectionUnsynchronized(sectionRef.sectionX(), sectionRef.sectionZ()); ++ ++ if (section == null) { ++ // removed concurrently ++ continue; ++ } ++ ++ // technically reading the size field is unsafe, and it may be incorrect. ++ // We assume that the error here cumulatively goes away over many ticks. If it did not, then it is possible ++ // for chunks to never unload or not unload fast enough. ++ unloadCountTentative += section.chunks.size(); + } +- if (this.taskScheduler.schedulingLock.isHeldByCurrentThread()) { +- throw new IllegalStateException("Cannot hold scheduling lock while calling processUnloads"); ++ ++ if (unloadCountTentative <= 0) { ++ // no work to do ++ return; + } + +- final ChunkHolderManager.HolderManagerRegionData currentData = this.getCurrentRegionData(); // Folia - region threading ++ // Note: The behaviour that we process ticket updates while holding the lock has been dropped here, as it is racey behavior. ++ // But, we do need to process updates here so that any add ticket that is synchronised before this call does not go missed. ++ this.processTicketUpdates(); + +- final List unloadQueue; +- final List scheduleList = new ArrayList<>(); +- this.ticketLock.lock(); +- try { +- this.taskScheduler.schedulingLock.lock(); ++ final int toUnloadCount = Math.max(50, (int)(unloadCountTentative * 0.05)); ++ int processedCount = 0; ++ ++ for (final io.papermc.paper.threadedregions.ChunkQueue.SectionToUnload sectionRef : unloadSectionsForRegion) { ++ final List stage1 = new ArrayList<>(); ++ final List stage2 = new ArrayList<>(); ++ ++ final int sectionLowerX = sectionRef.sectionX() << sectionShift; ++ final int sectionLowerZ = sectionRef.sectionZ() << sectionShift; ++ ++ // stage 1: set up for stage 2 while holding critical locks ++ ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.ticketLockArea.lock(sectionLowerX, sectionLowerZ); + try { +- if (this.unloadQueue.isEmpty()) { +- return; +- } +- // in order to ensure all chunks in the unload queue do not have a pending ticket level update, +- // process them now +- this.processTicketUpdates(false, false, scheduleList); +- +- // Folia start - region threading +- final ArrayDeque toUnload = new ArrayDeque<>(); +- // The unload queue is globally maintained, but we can only unload chunks in our region +- for (final NewChunkHolder holder : this.unloadQueue) { +- if (TickThread.isTickThreadFor(this.world, holder.chunkX, holder.chunkZ)) { +- toUnload.add(holder); +- } +- } +- // Folia end - region threading +- +- final int unloadCount = Math.max(50, (int)(toUnload.size() * 0.05)); // Folia - region threading +- unloadQueue = new ArrayList<>(unloadCount + 1); // Folia - region threading +- for (int i = 0; i < unloadCount && !toUnload.isEmpty(); ++i) { // Folia - region threading +- final NewChunkHolder chunkHolder = toUnload.removeFirst(); // Folia - region threading +- this.unloadQueue.remove(chunkHolder); // Folia - region threading +- if (chunkHolder.isSafeToUnload() != null) { +- LOGGER.error("Chunkholder " + chunkHolder + " is not safe to unload but is inside the unload queue?"); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node scheduleLock = this.taskScheduler.schedulingLockArea.lock(sectionLowerX, sectionLowerZ); ++ try { ++ final io.papermc.paper.threadedregions.ChunkQueue.UnloadSection section ++ = this.unloadQueue.getSectionUnsynchronized(sectionRef.sectionX(), sectionRef.sectionZ()); ++ ++ if (section == null) { ++ // removed concurrently + continue; + } +- final NewChunkHolder.UnloadState state = chunkHolder.unloadStage1(); +- if (state == null) { +- // can unload immediately +- this.removeChunkHolder(chunkHolder); +- continue; ++ ++ // collect the holders to run stage 1 on ++ final int sectionCount = section.chunks.size(); ++ ++ if ((sectionCount + processedCount) <= toUnloadCount) { ++ // we can just drain the entire section ++ ++ for (final LongIterator iterator = section.chunks.iterator(); iterator.hasNext();) { ++ final NewChunkHolder holder = this.chunkHolders.get(iterator.nextLong()); ++ if (holder == null) { ++ throw new IllegalStateException(); ++ } ++ stage1.add(holder); ++ } ++ ++ // remove section ++ this.unloadQueue.removeSection(sectionRef.sectionX(), sectionRef.sectionZ()); ++ } else { ++ // processedCount + len = toUnloadCount ++ // we cannot drain the entire section ++ for (int i = 0, len = toUnloadCount - processedCount; i < len; ++i) { ++ final NewChunkHolder holder = this.chunkHolders.get(section.chunks.removeFirstLong()); ++ if (holder == null) { ++ throw new IllegalStateException(); ++ } ++ stage1.add(holder); ++ } + } +- unloadQueue.add(state); ++ ++ // run stage 1 ++ for (int i = 0, len = stage1.size(); i < len; ++i) { ++ final NewChunkHolder chunkHolder = stage1.get(i); ++ if (chunkHolder.isSafeToUnload() != null) { ++ LOGGER.error("Chunkholder " + chunkHolder + " is not safe to unload but is inside the unload queue?"); ++ continue; ++ } ++ final NewChunkHolder.UnloadState state = chunkHolder.unloadStage1(); ++ if (state == null) { ++ // can unload immediately ++ this.removeChunkHolder(chunkHolder); ++ continue; ++ } ++ stage2.add(state); ++ } ++ } finally { ++ this.taskScheduler.schedulingLockArea.unlock(scheduleLock); + } + } finally { +- this.taskScheduler.schedulingLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); + } +- } finally { +- this.ticketLock.unlock(); +- } +- // schedule tasks, we can't let processTicketUpdates do this because we call it holding the schedule lock +- for (int i = 0, len = scheduleList.size(); i < len; ++i) { +- scheduleList.get(i).schedule(); +- } + +- final List toRemove = new ArrayList<>(unloadQueue.size()); ++ // stage 2: invoke expensive unload logic, designed to run without locks thanks to stage 1 ++ final List stage3 = new ArrayList<>(stage2.size()); + +- final Boolean before = this.blockTicketUpdates(); +- try { +- for (int i = 0, len = unloadQueue.size(); i < len; ++i) { +- final NewChunkHolder.UnloadState state = unloadQueue.get(i); +- final NewChunkHolder holder = state.holder(); ++ final Boolean before = this.blockTicketUpdates(); ++ try { ++ for (int i = 0, len = stage2.size(); i < len; ++i) { ++ final NewChunkHolder.UnloadState state = stage2.get(i); ++ final NewChunkHolder holder = state.holder(); + +- holder.unloadStage2(state); +- toRemove.add(holder); ++ holder.unloadStage2(state); ++ stage3.add(holder); ++ } ++ } finally { ++ this.unblockTicketUpdates(before); + } +- } finally { +- this.unblockTicketUpdates(before); +- } + +- this.ticketLock.lock(); +- try { +- this.taskScheduler.schedulingLock.lock(); ++ // stage 3: actually attempt to remove the chunk holders ++ ticketLock = this.ticketLockArea.lock(sectionLowerX, sectionLowerZ); + try { +- for (int i = 0, len = toRemove.size(); i < len; ++i) { +- final NewChunkHolder holder = toRemove.get(i); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node scheduleLock = this.taskScheduler.schedulingLockArea.lock(sectionLowerX, sectionLowerZ); ++ try { ++ for (int i = 0, len = stage3.size(); i < len; ++i) { ++ final NewChunkHolder holder = stage3.get(i); + +- if (holder.unloadStage3()) { +- this.removeChunkHolder(holder); +- } else { +- // add cooldown so the next unload check is not immediately next tick +- this.addTicketAtLevel(TicketType.UNLOAD_COOLDOWN, holder.chunkX, holder.chunkZ, MAX_TICKET_LEVEL, Unit.INSTANCE); ++ if (holder.unloadStage3()) { ++ this.removeChunkHolder(holder); ++ } else { ++ // add cooldown so the next unload check is not immediately next tick ++ this.addTicketAtLevel(TicketType.UNLOAD_COOLDOWN, CoordinateUtils.getChunkKey(holder.chunkX, holder.chunkZ), MAX_TICKET_LEVEL, Unit.INSTANCE, false); ++ } + } ++ } finally { ++ this.taskScheduler.schedulingLockArea.unlock(scheduleLock); + } + } finally { +- this.taskScheduler.schedulingLock.unlock(); ++ this.ticketLockArea.unlock(ticketLock); ++ } ++ ++ processedCount += stage1.size(); ++ ++ if (processedCount >= toUnloadCount) { ++ break; + } +- } finally { +- this.ticketLock.unlock(); + } + } + +@@ -1298,88 +1447,71 @@ public final class ChunkHolderManager { + } + } + +- private final MultiThreadedQueue> delayedTicketUpdates = new MultiThreadedQueue<>(); ++ // Folia - use area based lock to reduce contention + +- // note: MUST hold ticket lock, otherwise operation ordering is lost +- private boolean drainTicketUpdates() { +- boolean ret = false; + +- TicketOperation operation; +- while ((operation = this.delayedTicketUpdates.poll()) != null) { +- switch (operation.op) { +- case ADD: { +- ret |= this.addTicketAtLevel(operation.ticketType, operation.chunkCoord, operation.ticketLevel, operation.identifier); +- break; +- } +- case REMOVE: { +- ret |= this.removeTicketAtLevel(operation.ticketType, operation.chunkCoord, operation.ticketLevel, operation.identifier); +- break; +- } +- case ADD_IF_REMOVED: { +- ret |= this.addIfRemovedTicket( +- operation.chunkCoord, +- operation.ticketType, operation.ticketLevel, operation.identifier, +- operation.ticketType2, operation.ticketLevel2, operation.identifier2 +- ); +- break; +- } +- case ADD_AND_REMOVE: { +- ret = true; +- this.addAndRemoveTickets( +- operation.chunkCoord, +- operation.ticketType, operation.ticketLevel, operation.identifier, +- operation.ticketType2, operation.ticketLevel2, operation.identifier2 +- ); +- break; +- } ++ // Folia start - use area based lock to reduce contention ++ private boolean processTicketOp(TicketOperation operation) { ++ boolean ret = false; ++ switch (operation.op) { ++ case ADD: { ++ ret |= this.addTicketAtLevel(operation.ticketType, operation.chunkCoord, operation.ticketLevel, operation.identifier); ++ break; ++ } ++ case REMOVE: { ++ ret |= this.removeTicketAtLevel(operation.ticketType, operation.chunkCoord, operation.ticketLevel, operation.identifier); ++ break; ++ } ++ case ADD_IF_REMOVED: { ++ ret |= this.addIfRemovedTicket( ++ operation.chunkCoord, ++ operation.ticketType, operation.ticketLevel, operation.identifier, ++ operation.ticketType2, operation.ticketLevel2, operation.identifier2 ++ ); ++ break; ++ } ++ case ADD_AND_REMOVE: { ++ ret = true; ++ this.addAndRemoveTickets( ++ operation.chunkCoord, ++ operation.ticketType, operation.ticketLevel, operation.identifier, ++ operation.ticketType2, operation.ticketLevel2, operation.identifier2 ++ ); ++ break; + } + } + + return ret; + } ++ // Folia end - use area based lock to reduce contention + +- public Boolean tryDrainTicketUpdates() { ++ // note: MUST hold ticket lock, otherwise operation ordering is lost ++ private boolean drainTicketUpdates() { + boolean ret = false; +- for (;;) { +- final boolean acquired = this.ticketLock.tryLock(); +- try { +- if (!acquired) { +- return ret ? Boolean.TRUE : null; +- } + +- ret |= this.drainTicketUpdates(); +- } finally { +- if (acquired) { +- this.ticketLock.unlock(); +- } +- } +- if (this.delayedTicketUpdates.isEmpty()) { +- return Boolean.valueOf(ret); +- } // else: try to re-acquire +- } ++ // Folia - use area based lock to reduce contention ++ ++ return ret; ++ } ++ ++ public Boolean tryDrainTicketUpdates() { ++ return Boolean.FALSE; // Folia start - use area based lock to reduce contention + } + + public void pushDelayedTicketUpdate(final TicketOperation operation) { +- this.delayedTicketUpdates.add(operation); ++ this.processTicketOp(operation); // Folia - use area based lock to reduce contention + } + + public void pushDelayedTicketUpdates(final Collection> operations) { +- this.delayedTicketUpdates.addAll(operations); ++ // Folia start - use area based lock to reduce contention ++ for (final TicketOperation operation : operations) { ++ this.processTicketOp(operation); ++ } ++ // Folia end - use area based lock to reduce contention + } + + public Boolean tryProcessTicketUpdates() { +- final boolean acquired = this.ticketLock.tryLock(); +- try { +- if (!acquired) { +- return null; +- } +- +- return Boolean.valueOf(this.processTicketUpdates(false, true, null)); +- } finally { +- if (acquired) { +- this.ticketLock.unlock(); +- } +- } ++ return Boolean.valueOf(this.processTicketUpdates()); // Folia - use area based lock to reduce contention + } + + private final ThreadLocal BLOCK_TICKET_UPDATES = ThreadLocal.withInitial(() -> { +@@ -1413,12 +1545,7 @@ public final class ChunkHolderManager { + if (BLOCK_TICKET_UPDATES.get() == Boolean.TRUE) { + throw new IllegalStateException("Cannot update ticket level while unloading chunks or updating entity manager"); + } +- if (checkLocks && this.ticketLock.isHeldByCurrentThread()) { +- throw new IllegalStateException("Illegal recursive processTicketUpdates!"); +- } +- if (checkLocks && this.taskScheduler.schedulingLock.isHeldByCurrentThread()) { +- throw new IllegalStateException("Cannot update ticket levels from a scheduler context!"); +- } ++ // Folia - use area based lock to reduce contention + + List changedFullStatus = null; + +@@ -1428,94 +1555,19 @@ public final class ChunkHolderManager { + final boolean canProcessFullUpdates = processFullUpdates & isTickThread; + final boolean canProcessScheduling = scheduledTasks == null; + +- this.ticketLock.lock(); +- try { +- this.drainTicketUpdates(); +- +- final boolean levelsUpdated = this.ticketLevelPropagator.propagateUpdates(); +- if (levelsUpdated) { +- // Unlike CB, ticket level updates cannot happen recursively. Thank god. +- if (!this.ticketLevelUpdates.isEmpty()) { +- ret = true; +- +- // first the necessary chunkholders must be created, so just update the ticket levels +- for (final Iterator iterator = this.ticketLevelUpdates.long2IntEntrySet().fastIterator(); iterator.hasNext();) { +- final Long2IntMap.Entry entry = iterator.next(); +- final long key = entry.getLongKey(); +- final int newLevel = entry.getIntValue(); +- +- NewChunkHolder current = this.chunkHolders.get(key); +- if (current == null && newLevel > MAX_TICKET_LEVEL) { +- // not loaded and it shouldn't be loaded! +- iterator.remove(); +- continue; +- } +- +- final int currentLevel = current == null ? MAX_TICKET_LEVEL + 1 : current.getCurrentTicketLevel(); +- if (currentLevel == newLevel) { +- // nothing to do +- iterator.remove(); +- continue; +- } +- +- if (current == null) { +- // must create +- current = this.createChunkHolder(key); +- this.chunkHolders.put(key, current); +- current.updateTicketLevel(newLevel); +- } else { +- current.updateTicketLevel(newLevel); +- } +- } +- +- if (scheduledTasks == null) { +- scheduledTasks = new ArrayList<>(); +- } +- changedFullStatus = new ArrayList<>(); +- +- // allow the chunkholders to process ticket level updates without needing to acquire the schedule lock every time +- final List prev = CURRENT_TICKET_UPDATE_SCHEDULING.get(); +- CURRENT_TICKET_UPDATE_SCHEDULING.set(scheduledTasks); +- try { +- this.taskScheduler.schedulingLock.lock(); +- try { +- for (final Iterator iterator = this.ticketLevelUpdates.long2IntEntrySet().fastIterator(); iterator.hasNext();) { +- final Long2IntMap.Entry entry = iterator.next(); +- final long key = entry.getLongKey(); +- final NewChunkHolder current = this.chunkHolders.get(key); +- +- if (current == null) { +- throw new IllegalStateException("Expected chunk holder to be created"); +- } +- +- current.processTicketLevelUpdate(scheduledTasks, changedFullStatus); +- } +- } finally { +- this.taskScheduler.schedulingLock.unlock(); +- } +- } finally { +- CURRENT_TICKET_UPDATE_SCHEDULING.set(prev); +- } +- +- this.ticketLevelUpdates.clear(); +- } ++ // Folia start - use area based lock to reduce contention ++ if (this.ticketLevelPropagator.hasPendingUpdates()) { ++ if (scheduledTasks == null) { ++ scheduledTasks = new ArrayList<>(); + } ++ changedFullStatus = new ArrayList<>(); + +- // Folia start - region threading +- // it is possible that a special case new chunk holder had its ticket removed before it was propagated, +- // which means checkUnload was never invoked. By checking unload here, we ensure that either the +- // ticket level was propagated (in which case, a later depropagation would check again) or that +- // we called checkUnload for it. +- if (!this.specialCaseUnload.isEmpty()) { +- for (final NewChunkHolder special : this.specialCaseUnload) { +- special.checkUnload(); +- } +- this.specialCaseUnload.clear(); +- } +- // Folia end - region threading +- } finally { +- this.ticketLock.unlock(); ++ ret |= this.ticketLevelPropagator.performUpdates( ++ this.ticketLockArea, this.taskScheduler.schedulingLockArea, ++ scheduledTasks, changedFullStatus ++ ); + } ++ // Folia end - use area based lock to reduce contention + + if (changedFullStatus != null) { + this.addChangedStatuses(changedFullStatus); +@@ -1561,43 +1613,7 @@ public final class ChunkHolderManager { + } + + public JsonObject getDebugJsonForWatchdog() { +- // try and detect any potential deadlock that would require us to read unlocked +- try { +- if (this.ticketLock.tryLock(10, TimeUnit.SECONDS)) { +- try { +- if (this.taskScheduler.schedulingLock.tryLock(10, TimeUnit.SECONDS)) { +- try { +- return this.getDebugJsonNoLock(); +- } finally { +- this.taskScheduler.schedulingLock.unlock(); +- } +- } +- } finally { +- this.ticketLock.unlock(); +- } +- } +- } catch (final InterruptedException ignore) {} +- +- LOGGER.error("Failed to acquire ticket and scheduling lock before timeout for world " + this.world.getWorld().getName()); +- +- // because we read without locks, it may throw exceptions for fastutil maps +- // so just try until it works... +- Throwable lastException = null; +- for (int count = 0;count < 1000;++count) { +- try { +- return this.getDebugJsonNoLock(); +- } catch (final ThreadDeath death) { +- throw death; +- } catch (final Throwable thr) { +- lastException = thr; +- Thread.yield(); +- LockSupport.parkNanos(10_000L); +- } +- } +- +- // failed, return +- LOGGER.error("Failed to retrieve debug json for watchdog thread without locking", lastException); +- return null; ++ return this.getDebugJsonNoLock(); // Folia - use area based lock to reduce contention + } + + private JsonObject getDebugJsonNoLock() { +@@ -1606,12 +1622,31 @@ public final class ChunkHolderManager { + + final JsonArray unloadQueue = new JsonArray(); + ret.add("unload_queue", unloadQueue); +- for (final NewChunkHolder holder : this.unloadQueue) { +- final JsonObject coordinate = new JsonObject(); +- unloadQueue.add(coordinate); ++ // Folia start - use area based lock to reduce contention ++ ret.addProperty("lock_shift", Integer.valueOf(ChunkTaskScheduler.getChunkSystemLockShift())); ++ ret.addProperty("ticket_shift", Integer.valueOf(io.papermc.paper.threadedregions.ThreadedTicketLevelPropagator.SECTION_SHIFT)); ++ ret.addProperty("region_shift", Integer.valueOf(this.world.regioniser.sectionChunkShift)); ++ for (final io.papermc.paper.threadedregions.ChunkQueue.SectionToUnload section : this.unloadQueue.retrieveForAllRegions()) { ++ final JsonObject sectionJson = new JsonObject(); ++ unloadQueue.add(sectionJson); ++ sectionJson.addProperty("sectionX", section.sectionX()); ++ sectionJson.addProperty("sectionZ", section.sectionX()); ++ sectionJson.addProperty("order", section.order()); ++ ++ final JsonArray coordinates = new JsonArray(); ++ sectionJson.add("coordinates", coordinates); ++ ++ final io.papermc.paper.threadedregions.ChunkQueue.UnloadSection actualSection = this.unloadQueue.getSectionUnsynchronized(section.sectionX(), section.sectionZ()); ++ for (final LongIterator iterator = actualSection.chunks.iterator(); iterator.hasNext();) { ++ final long coordinate = iterator.nextLong(); ++ ++ final JsonObject coordinateJson = new JsonObject(); ++ coordinates.add(coordinateJson); + +- coordinate.addProperty("chunkX", Integer.valueOf(holder.chunkX)); +- coordinate.addProperty("chunkZ", Integer.valueOf(holder.chunkZ)); ++ coordinateJson.addProperty("chunkX", Integer.valueOf(CoordinateUtils.getChunkX(coordinate))); ++ coordinateJson.addProperty("chunkZ", Integer.valueOf(CoordinateUtils.getChunkZ(coordinate))); ++ } ++ // Folia end - use area based lock to reduce contention + } + + final JsonArray holders = new JsonArray(); +@@ -1621,71 +1656,77 @@ public final class ChunkHolderManager { + holders.add(holder.getDebugJson()); + } + +- // Folia start - region threading +- final JsonArray regions = new JsonArray(); +- ret.add("regions", regions); +- this.world.regioniser.computeForAllRegionsUnsynchronised((region) -> { +- final JsonObject regionJson = new JsonObject(); +- regions.add(regionJson); ++ // Folia start - use area based lock to reduce contention ++ // TODO ++ /* ++ final JsonArray removeTickToChunkExpireTicketCount = new JsonArray(); ++ regionJson.add("remove_tick_to_chunk_expire_ticket_count", removeTickToChunkExpireTicketCount); + +- final TickRegions.TickRegionData regionData = region.getData(); ++ for (final Long2ObjectMap.Entry tickEntry : regionData.getHolderManagerRegionData().removeTickToChunkExpireTicketCount.long2ObjectEntrySet()) { ++ final long tick = tickEntry.getLongKey(); ++ final Long2IntOpenHashMap coordinateToCount = tickEntry.getValue(); + +- regionJson.addProperty("current_tick", Long.valueOf(regionData.getCurrentTick())); ++ final JsonObject tickJson = new JsonObject(); ++ removeTickToChunkExpireTicketCount.add(tickJson); + +- final JsonArray removeTickToChunkExpireTicketCount = new JsonArray(); +- regionJson.add("remove_tick_to_chunk_expire_ticket_count", removeTickToChunkExpireTicketCount); ++ tickJson.addProperty("tick", Long.valueOf(tick)); + +- for (final Long2ObjectMap.Entry tickEntry : regionData.getHolderManagerRegionData().removeTickToChunkExpireTicketCount.long2ObjectEntrySet()) { +- final long tick = tickEntry.getLongKey(); +- final Long2IntOpenHashMap coordinateToCount = tickEntry.getValue(); ++ final JsonArray tickEntries = new JsonArray(); ++ tickJson.add("entries", tickEntries); + +- final JsonObject tickJson = new JsonObject(); +- removeTickToChunkExpireTicketCount.add(tickJson); ++ for (final Long2IntMap.Entry entry : coordinateToCount.long2IntEntrySet()) { ++ final long coordinate = entry.getLongKey(); ++ final int count = entry.getIntValue(); + +- tickJson.addProperty("tick", Long.valueOf(tick)); ++ final JsonObject entryJson = new JsonObject(); ++ tickEntries.add(entryJson); + +- final JsonArray tickEntries = new JsonArray(); +- tickJson.add("entries", tickEntries); ++ entryJson.addProperty("chunkX", Long.valueOf(CoordinateUtils.getChunkX(coordinate))); ++ entryJson.addProperty("chunkZ", Long.valueOf(CoordinateUtils.getChunkZ(coordinate))); ++ entryJson.addProperty("count", Integer.valueOf(count)); ++ } ++ } + +- for (final Long2IntMap.Entry entry : coordinateToCount.long2IntEntrySet()) { +- final long coordinate = entry.getLongKey(); +- final int count = entry.getIntValue(); ++ final JsonArray allTicketsJson = new JsonArray(); ++ regionJson.add("tickets", allTicketsJson); + +- final JsonObject entryJson = new JsonObject(); +- tickEntries.add(entryJson); ++ for (final Long2ObjectMap.Entry>> coordinateTickets : regionData.getHolderManagerRegionData().tickets.long2ObjectEntrySet()) { ++ final long coordinate = coordinateTickets.getLongKey(); ++ final SortedArraySet> tickets = coordinateTickets.getValue(); + +- entryJson.addProperty("chunkX", Long.valueOf(CoordinateUtils.getChunkX(coordinate))); +- entryJson.addProperty("chunkZ", Long.valueOf(CoordinateUtils.getChunkZ(coordinate))); +- entryJson.addProperty("count", Integer.valueOf(count)); +- } +- } ++ final JsonObject coordinateJson = new JsonObject(); ++ allTicketsJson.add(coordinateJson); + +- final JsonArray allTicketsJson = new JsonArray(); +- regionJson.add("tickets", allTicketsJson); ++ coordinateJson.addProperty("chunkX", Long.valueOf(CoordinateUtils.getChunkX(coordinate))); ++ coordinateJson.addProperty("chunkZ", Long.valueOf(CoordinateUtils.getChunkZ(coordinate))); + +- for (final Long2ObjectMap.Entry>> coordinateTickets : regionData.getHolderManagerRegionData().tickets.long2ObjectEntrySet()) { +- final long coordinate = coordinateTickets.getLongKey(); +- final SortedArraySet> tickets = coordinateTickets.getValue(); ++ final JsonArray ticketsSerialized = new JsonArray(); ++ coordinateJson.add("tickets", ticketsSerialized); + +- final JsonObject coordinateJson = new JsonObject(); +- allTicketsJson.add(coordinateJson); ++ for (final Ticket ticket : tickets) { ++ final JsonObject ticketSerialized = new JsonObject(); ++ ticketsSerialized.add(ticketSerialized); + +- coordinateJson.addProperty("chunkX", Long.valueOf(CoordinateUtils.getChunkX(coordinate))); +- coordinateJson.addProperty("chunkZ", Long.valueOf(CoordinateUtils.getChunkZ(coordinate))); ++ ticketSerialized.addProperty("type", ticket.getType().toString()); ++ ticketSerialized.addProperty("level", Integer.valueOf(ticket.getTicketLevel())); ++ ticketSerialized.addProperty("identifier", Objects.toString(ticket.key)); ++ ticketSerialized.addProperty("remove_tick", Long.valueOf(ticket.removalTick)); ++ } ++ } ++ */ ++ // Folia end - use area based lock to reduce contention + +- final JsonArray ticketsSerialized = new JsonArray(); +- coordinateJson.add("tickets", ticketsSerialized); ++ // Folia start - region threading ++ final JsonArray regions = new JsonArray(); ++ ret.add("regions", regions); ++ this.world.regioniser.computeForAllRegionsUnsynchronised((region) -> { ++ final JsonObject regionJson = new JsonObject(); ++ regions.add(regionJson); + +- for (final Ticket ticket : tickets) { +- final JsonObject ticketSerialized = new JsonObject(); +- ticketsSerialized.add(ticketSerialized); ++ final TickRegions.TickRegionData regionData = region.getData(); + +- ticketSerialized.addProperty("type", ticket.getType().toString()); +- ticketSerialized.addProperty("level", Integer.valueOf(ticket.getTicketLevel())); +- ticketSerialized.addProperty("identifier", Objects.toString(ticket.key)); +- ticketSerialized.addProperty("remove_tick", Long.valueOf(ticket.removalTick)); +- } +- } ++ regionJson.addProperty("current_tick", Long.valueOf(regionData.getCurrentTick())); ++ // Folia - use area based lock to reduce contention + }); + // Folia end - region threading + +@@ -1693,27 +1734,6 @@ public final class ChunkHolderManager { + } + + public JsonObject getDebugJson() { +- final List scheduleList = new ArrayList<>(); +- try { +- final JsonObject ret; +- this.ticketLock.lock(); +- try { +- this.taskScheduler.schedulingLock.lock(); +- try { +- this.processTicketUpdates(false, false, scheduleList); +- ret = this.getDebugJsonNoLock(); +- } finally { +- this.taskScheduler.schedulingLock.unlock(); +- } +- } finally { +- this.ticketLock.unlock(); +- } +- return ret; +- } finally { +- // schedule tasks, we can't let processTicketUpdates do this because we call it holding the schedule lock +- for (int i = 0, len = scheduleList.size(); i < len; ++i) { +- scheduleList.get(i).schedule(); +- } +- } ++ return this.getDebugJsonNoLock(); // Folia - use area based lock to reduce contention + } + } +diff --git a/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkLoadTask.java b/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkLoadTask.java +index 1f7c146ff0b2a835c818f49da6c1f1411f26aa39..7b8362625b48f1829ed4fd3c7fde6a4bec8e4099 100644 +--- a/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkLoadTask.java ++++ b/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkLoadTask.java +@@ -82,7 +82,7 @@ public final class ChunkLoadTask extends ChunkProgressionTask { + + // NOTE: it is IMPOSSIBLE for getOrLoadEntityData/getOrLoadPoiData to complete synchronously, because + // they must schedule a task to off main or to on main to complete +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + if (this.scheduled) { + throw new IllegalStateException("schedule() called twice"); +@@ -108,7 +108,7 @@ public final class ChunkLoadTask extends ChunkProgressionTask { + this.entityLoadTask = entityLoadTask; + this.poiLoadTask = poiLoadTask; + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + + if (entityLoadTask != null) { +@@ -125,11 +125,11 @@ public final class ChunkLoadTask extends ChunkProgressionTask { + @Override + public void cancel() { + // must be before load task access, so we can synchronise with the writes to the fields +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + this.cancelled = true; + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + + /* +diff --git a/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkTaskScheduler.java b/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkTaskScheduler.java +index 5ca4e9c85c957c669d54fd9e5e52f13502b592da..4ea4b5a78fe629d5aa565d8d9a47f58d96084c0c 100644 +--- a/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkTaskScheduler.java ++++ b/src/main/java/io/papermc/paper/chunk/system/scheduling/ChunkTaskScheduler.java +@@ -117,7 +117,7 @@ public final class ChunkTaskScheduler { + + // Folia - regionised ticking + +- final ReentrantLock schedulingLock = new ReentrantLock(); ++ // Folia - use area based lock to reduce contention - replaced by schedulingLockArea + public final ChunkHolderManager chunkHolderManager; + + static { +@@ -193,6 +193,73 @@ public final class ChunkTaskScheduler { + } + } + ++ // Folia start - use area based lock to reduce contention ++ // must be >= region shift and must be >= ticket propagator section shift ++ // it must be >= region shift since the regioniser assumes ticket updates do not occur in parallel for the region sections ++ // it must be >= ticket propagator section shift so that the ticket propagator can assume that owning a position implies owning ++ // the entire section ++ // we just take the max, as we want the smallest shift that satifies these properties ++ private static final int LOCK_SHIFT = Math.max(io.papermc.paper.threadedregions.ThreadedTicketLevelPropagator.SECTION_SHIFT, io.papermc.paper.threadedregions.TickRegions.getRegionChunkShift()); ++ public static int getChunkSystemLockShift() { ++ return LOCK_SHIFT; ++ } ++ ++ private static final int[] ACCESS_RADIUS_TABLE = new int[ChunkStatus.getStatusList().size()]; ++ private static final int[] MAX_ACCESS_RADIUS_TABLE = new int[ACCESS_RADIUS_TABLE.length]; ++ static { ++ Arrays.fill(ACCESS_RADIUS_TABLE, -1); ++ } ++ ++ private static int getAccessRadius0(final ChunkStatus genStatus) { ++ if (genStatus == ChunkStatus.EMPTY) { ++ return 0; ++ } ++ ++ final int radius = Math.max(genStatus.loadRange, genStatus.getRange()); ++ int maxRange = radius; ++ ++ for (int dist = 1; dist <= radius; ++dist) { ++ final ChunkStatus requiredNeighbourStatus = ChunkMap.getDependencyStatus(genStatus, radius); ++ final int rad = ACCESS_RADIUS_TABLE[requiredNeighbourStatus.getIndex()]; ++ if (rad == -1) { ++ throw new IllegalStateException(); ++ } ++ ++ maxRange = Math.max(maxRange, dist + rad); ++ } ++ ++ return maxRange; ++ } ++ ++ private static int maxAccessRadius; ++ ++ static { ++ final List statuses = ChunkStatus.getStatusList(); ++ for (int i = 0, len = statuses.size(); i < len; ++i) { ++ ACCESS_RADIUS_TABLE[i] = getAccessRadius0(statuses.get(i)); ++ } ++ int max = 0; ++ for (int i = 0, len = statuses.size(); i < len; ++i) { ++ MAX_ACCESS_RADIUS_TABLE[i] = max = Math.max(ACCESS_RADIUS_TABLE[i], max); ++ } ++ maxAccessRadius = max; ++ } ++ ++ public static int getMaxAccessRadius() { ++ return maxAccessRadius; ++ } ++ ++ public static int getAccessRadius(final ChunkStatus genStatus) { ++ return ACCESS_RADIUS_TABLE[genStatus.getIndex()]; ++ } ++ ++ public static int getAccessRadius(final ChunkHolder.FullChunkStatus status) { ++ return (status.ordinal() - 1) + getAccessRadius(ChunkStatus.FULL); ++ } ++ ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock schedulingLockArea = new ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock(getChunkSystemLockShift()); ++ // Folia end - use area based lock to reduce contention ++ + public ChunkTaskScheduler(final ServerLevel world, final PrioritisedThreadPool workers) { + this.world = world; + this.workers = workers; +@@ -273,10 +340,11 @@ public final class ChunkTaskScheduler { + }, priority); + return; + } +- if (this.chunkHolderManager.ticketLock.isHeldByCurrentThread()) { ++ final int accessRadius = getAccessRadius(toStatus); // Folia - use area based lock to reduce contention ++ if (this.chunkHolderManager.ticketLockArea.isHeldByCurrentThread(chunkX, chunkZ, accessRadius)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Cannot schedule chunk load during ticket level update"); + } +- if (this.schedulingLock.isHeldByCurrentThread()) { ++ if (this.schedulingLockArea.isHeldByCurrentThread(chunkX, chunkZ, accessRadius)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Cannot schedule chunk loading recursively"); + } + +@@ -310,9 +378,9 @@ public final class ChunkTaskScheduler { + + final boolean scheduled; + final LevelChunk chunk; +- this.chunkHolderManager.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.chunkHolderManager.ticketLockArea.lock(chunkX, chunkZ, accessRadius); // Folia - use area based lock to reduce contention + try { +- this.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.schedulingLockArea.lock(chunkX, chunkZ, accessRadius); // Folia - use area based lock to reduce contention + try { + final NewChunkHolder chunkHolder = this.chunkHolderManager.getChunkHolder(chunkKey); + if (chunkHolder == null || chunkHolder.getTicketLevel() > minLevel) { +@@ -343,10 +411,10 @@ public final class ChunkTaskScheduler { + } + } + } finally { +- this.schedulingLock.unlock(); ++ this.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } finally { +- this.chunkHolderManager.ticketLock.unlock(); ++ this.chunkHolderManager.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + if (!scheduled) { +@@ -384,12 +452,13 @@ public final class ChunkTaskScheduler { + // only appropriate to use with ServerLevel#syncLoadNonFull + public boolean beginChunkLoadForNonFullSync(final int chunkX, final int chunkZ, final ChunkStatus toStatus, + final PrioritisedExecutor.Priority priority) { ++ final int accessRadius = getAccessRadius(toStatus); // Folia - use area based lock to reduce contention + final long chunkKey = CoordinateUtils.getChunkKey(chunkX, chunkZ); + final int minLevel = 33 + ChunkStatus.getDistance(toStatus); + final List tasks = new ArrayList<>(); +- this.chunkHolderManager.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.chunkHolderManager.ticketLockArea.lock(chunkX, chunkZ, accessRadius); // Folia - use area based lock to reduce contention + try { +- this.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.schedulingLockArea.lock(chunkX, chunkZ, accessRadius); // Folia - use area based lock to reduce contention + try { + final NewChunkHolder chunkHolder = this.chunkHolderManager.getChunkHolder(chunkKey); + if (chunkHolder == null || chunkHolder.getTicketLevel() > minLevel) { +@@ -407,10 +476,10 @@ public final class ChunkTaskScheduler { + } + } + } finally { +- this.schedulingLock.unlock(); ++ this.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } finally { +- this.chunkHolderManager.ticketLock.unlock(); ++ this.chunkHolderManager.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + for (int i = 0, len = tasks.size(); i < len; ++i) { +@@ -429,10 +498,11 @@ public final class ChunkTaskScheduler { + }, priority); + return; + } +- if (this.chunkHolderManager.ticketLock.isHeldByCurrentThread()) { ++ final int accessRadius = getAccessRadius(toStatus); // Folia - use area based lock to reduce contention ++ if (this.chunkHolderManager.ticketLockArea.isHeldByCurrentThread(chunkX, chunkZ, accessRadius)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Cannot schedule chunk load during ticket level update"); + } +- if (this.schedulingLock.isHeldByCurrentThread()) { ++ if (this.schedulingLockArea.isHeldByCurrentThread(chunkX, chunkZ, accessRadius)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Cannot schedule chunk loading recursively"); + } + +@@ -469,9 +539,9 @@ public final class ChunkTaskScheduler { + + final boolean scheduled; + final ChunkAccess chunk; +- this.chunkHolderManager.ticketLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.chunkHolderManager.ticketLockArea.lock(chunkX, chunkZ, accessRadius); // Folia - use area based lock to reduce contention + try { +- this.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.schedulingLockArea.lock(chunkX, chunkZ, accessRadius); // Folia - use area based lock to reduce contention + try { + final NewChunkHolder chunkHolder = this.chunkHolderManager.getChunkHolder(chunkKey); + if (chunkHolder == null || chunkHolder.getTicketLevel() > minLevel) { +@@ -496,10 +566,10 @@ public final class ChunkTaskScheduler { + } + } + } finally { +- this.schedulingLock.unlock(); ++ this.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } finally { +- this.chunkHolderManager.ticketLock.unlock(); ++ this.chunkHolderManager.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + for (int i = 0, len = tasks.size(); i < len; ++i) { +@@ -546,7 +616,7 @@ public final class ChunkTaskScheduler { + private ChunkProgressionTask schedule(final int chunkX, final int chunkZ, final ChunkStatus targetStatus, + final NewChunkHolder chunkHolder, final List allTasks, + final PrioritisedExecutor.Priority minPriority) { +- if (!this.schedulingLock.isHeldByCurrentThread()) { ++ if (!this.schedulingLockArea.isHeldByCurrentThread(chunkX, chunkZ, getAccessRadius(targetStatus))) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Not holding scheduling lock"); + } + +diff --git a/src/main/java/io/papermc/paper/chunk/system/scheduling/NewChunkHolder.java b/src/main/java/io/papermc/paper/chunk/system/scheduling/NewChunkHolder.java +index 12feb739a784a0108256451a37d94d041b7a5cdc..1ff6b138ccf4a1cefa719cd0b2b3af02d18a26fb 100644 +--- a/src/main/java/io/papermc/paper/chunk/system/scheduling/NewChunkHolder.java ++++ b/src/main/java/io/papermc/paper/chunk/system/scheduling/NewChunkHolder.java +@@ -74,7 +74,7 @@ public final class NewChunkHolder { + TickThread.ensureTickThread(this.world, this.chunkX, this.chunkZ, "Cannot sync load entity data off-main"); + final CompoundTag entityChunk; + final ChunkEntitySlices ret; +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + if (this.entityChunk != null && (transientChunk || !this.entityChunk.isTransient())) { + return this.entityChunk; +@@ -106,7 +106,7 @@ public final class NewChunkHolder { + entityChunk = null; + } + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + + if (!transientChunk) { +@@ -145,7 +145,7 @@ public final class NewChunkHolder { + final List completeWaiters; + ChunkLoadTask.EntityDataLoadTask entityDataLoadTask = null; + boolean scheduleEntityTask = false; +- this.scheduler.schedulingLock.lock(); ++ ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + final List waiters = this.entityDataLoadTaskWaiters; + this.entityDataLoadTask = null; +@@ -177,7 +177,7 @@ public final class NewChunkHolder { + } + } + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + + if (scheduleEntityTask) { +@@ -191,11 +191,11 @@ public final class NewChunkHolder { + } + } + +- this.scheduler.schedulingLock.lock(); ++ schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + this.checkUnload(); + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } + +@@ -206,7 +206,7 @@ public final class NewChunkHolder { + throw new IllegalStateException("Cannot load entity data, it is already loaded"); + } + // why not just acquire the lock? because the caller NEEDS to call isEntityChunkNBTLoaded before this! +- if (!this.scheduler.schedulingLock.isHeldByCurrentThread()) { ++ if (!this.scheduler.schedulingLockArea.isHeldByCurrentThread(this.chunkX, this.chunkZ)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Must hold scheduling lock"); + } + +@@ -262,7 +262,7 @@ public final class NewChunkHolder { + final List completeWaiters; + ChunkLoadTask.PoiDataLoadTask poiDataLoadTask = null; + boolean schedulePoiTask = false; +- this.scheduler.schedulingLock.lock(); ++ ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + final List waiters = this.poiDataLoadTaskWaiters; + this.poiDataLoadTask = null; +@@ -294,7 +294,7 @@ public final class NewChunkHolder { + } + } + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + + if (schedulePoiTask) { +@@ -307,11 +307,11 @@ public final class NewChunkHolder { + callback.accept(result); + } + } +- this.scheduler.schedulingLock.lock(); ++ schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + this.checkUnload(); + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } + +@@ -322,7 +322,7 @@ public final class NewChunkHolder { + throw new IllegalStateException("Cannot load poi data, it is already loaded"); + } + // why not just acquire the lock? because the caller NEEDS to call isPoiChunkLoaded before this! +- if (!this.scheduler.schedulingLock.isHeldByCurrentThread()) { ++ if (!this.scheduler.schedulingLockArea.isHeldByCurrentThread(this.chunkX, this.chunkZ)) { // Folia - use area based lock to reduce contention + throw new IllegalStateException("Must hold scheduling lock"); + } + +@@ -411,7 +411,8 @@ public final class NewChunkHolder { + + @Override + public boolean cancel() { +- this.chunkHolder.scheduler.schedulingLock.lock(); ++ final NewChunkHolder holder = this.chunkHolder; // Folia - use area based lock to reduce contention ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = holder.scheduler.schedulingLockArea.lock(holder.chunkX, holder.chunkZ); // Folia - use area based lock to reduce contention + try { + if (!this.completed) { + this.completed = true; +@@ -420,7 +421,7 @@ public final class NewChunkHolder { + } + return false; + } finally { +- this.chunkHolder.scheduler.schedulingLock.unlock(); ++ holder.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } + } +@@ -714,10 +715,10 @@ public final class NewChunkHolder { + } + if (this.isSafeToUnload() == null) { + // ensure in unload queue +- this.scheduler.chunkHolderManager.unloadQueue.add(this); ++ this.scheduler.chunkHolderManager.unloadQueue.addChunk(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + } else { + // ensure not in unload queue +- this.scheduler.chunkHolderManager.unloadQueue.remove(this); ++ this.scheduler.chunkHolderManager.unloadQueue.removeChunk(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + } + } + +@@ -787,13 +788,13 @@ public final class NewChunkHolder { + RegionFileIOThread.scheduleSave(this.world, this.chunkX, this.chunkZ, data, RegionFileIOThread.RegionFileType.CHUNK_DATA); + } + this.chunkDataUnload.completable().complete(data); +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + // can only write to these fields while holding the schedule lock + this.chunkDataUnload = null; + this.checkUnload(); + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } + +@@ -830,12 +831,12 @@ public final class NewChunkHolder { + this.lastEntityUnload = null; + + if (entityChunk.unload()) { +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + entityChunk.setTransient(true); + this.entityChunk = entityChunk; + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } else { + this.world.getEntityLookup().entitySectionUnload(this.chunkX, this.chunkZ); +@@ -1206,7 +1207,7 @@ public final class NewChunkHolder { + // only call on main thread // Folia - update comment + private void onFullChunkLoadChange(final boolean loaded, final List changedFullStatus) { + // Folia start - chunk system fix - acquire scheduling lock +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ, NEIGHBOUR_RADIUS); // Folia - use area based lock to reduce contention + try { + // Folia end - chunk system fix - acquire scheduling lock + for (int dz = -NEIGHBOUR_RADIUS; dz <= NEIGHBOUR_RADIUS; ++dz) { +@@ -1225,7 +1226,7 @@ public final class NewChunkHolder { + } + // Folia start - chunk system fix - acquire scheduling lock + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + // Folia end - chunk system fix - acquire scheduling lock + } +@@ -1265,7 +1266,7 @@ public final class NewChunkHolder { + // note: use opaque reads for chunk status read since we need it to be atomic + + // test if anything changed +- final long statusCheck = (long)CHUNK_STATUS_HANDLE.getOpaque((NewChunkHolder)this); ++ long statusCheck = (long)CHUNK_STATUS_HANDLE.getOpaque((NewChunkHolder)this); // Folia - use area based lock to reduce contention + if ((int)statusCheck == (int)(statusCheck >>> 32)) { + // nothing changed + return ret; +@@ -1274,14 +1275,23 @@ public final class NewChunkHolder { + final ChunkTaskScheduler scheduler = this.scheduler; + final ChunkHolderManager holderManager = scheduler.chunkHolderManager; + final int ticketKeep; +- final Long ticketId; +- holderManager.ticketLock.lock(); ++ // Folia start - use area based lock to reduce contention ++ final Long ticketId = Long.valueOf(holderManager.getNextStatusUpgradeId()); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = holderManager.ticketLockArea.lock(this.chunkX, this.chunkZ); ++ // Folia end - use area based lock to reduce contention + try { + ticketKeep = this.currentTicketLevel; +- ticketId = Long.valueOf(holderManager.getNextStatusUpgradeId()); +- holderManager.addTicketAtLevel(TicketType.STATUS_UPGRADE, this.chunkX, this.chunkZ, ticketKeep, ticketId); ++ // Folia start - use area based lock to reduce contention ++ statusCheck = (long)CHUNK_STATUS_HANDLE.getOpaque((NewChunkHolder)this); ++ // handle race condition where ticket level and target status is updated concurrently ++ if ((int)statusCheck == (int)(statusCheck >>> 32)) { ++ // nothing changed ++ return ret; ++ } ++ holderManager.addTicketAtLevel(TicketType.STATUS_UPGRADE, CoordinateUtils.getChunkKey(this.chunkX, this.chunkZ), ticketKeep, ticketId, false); ++ // Folia end - use area based lock to reduce contention + } finally { +- holderManager.ticketLock.unlock(); ++ holderManager.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + + this.processingFullStatus = true; +@@ -1292,11 +1302,11 @@ public final class NewChunkHolder { + ChunkHolder.FullChunkStatus nextState = getPendingChunkStatus(currStateEncoded); + if (currState == nextState) { + if (nextState == ChunkHolder.FullChunkStatus.INACCESSIBLE) { +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ); // Folia - use area based lock to reduce contention + try { + this.checkUnload(); + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + } + break; +@@ -1696,14 +1706,14 @@ public final class NewChunkHolder { + // this means we have to leave the ticket level update to handle the scheduling + } + final List changedLoadStatus = new ArrayList<>(); +- this.scheduler.schedulingLock.lock(); ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node schedulingLock = this.scheduler.schedulingLockArea.lock(this.chunkX, this.chunkZ, 2 * ChunkTaskScheduler.getMaxAccessRadius()); // Folia - use area based lock to reduce contention - theoretically, we could schedule a chunk at the max radius which performs another max radius access. So we need to double. + try { + for (int i = 0, len = neighbours.size(); i < len; ++i) { + neighbours.get(i).removeNeighbourUsingChunk(); + } + this.onChunkGenComplete(access, taskStatus, tasks, changedLoadStatus); + } finally { +- this.scheduler.schedulingLock.unlock(); ++ this.scheduler.schedulingLockArea.unlock(schedulingLock); // Folia - use area based lock to reduce contention + } + this.scheduler.chunkHolderManager.addChangedStatuses(changedLoadStatus); + +diff --git a/src/main/java/io/papermc/paper/threadedregions/ChunkQueue.java b/src/main/java/io/papermc/paper/threadedregions/ChunkQueue.java +new file mode 100644 +index 0000000000000000000000000000000000000000..50417ac73d7695e8e4eb463a906034bbd31b384b +--- /dev/null ++++ b/src/main/java/io/papermc/paper/threadedregions/ChunkQueue.java +@@ -0,0 +1,191 @@ ++package io.papermc.paper.threadedregions; ++ ++import it.unimi.dsi.fastutil.HashCommon; ++import it.unimi.dsi.fastutil.longs.LongLinkedOpenHashSet; ++import java.util.ArrayList; ++import java.util.List; ++import java.util.Map; ++import java.util.concurrent.ConcurrentHashMap; ++import java.util.concurrent.atomic.AtomicLong; ++ ++public final class ChunkQueue { ++ ++ public final int coordinateShift; ++ private final AtomicLong orderGenerator = new AtomicLong(); ++ private final ConcurrentHashMap unloadSections = new ConcurrentHashMap<>(); ++ ++ /* ++ * Note: write operations do not occur in parallel for any given section. ++ * Note: coordinateShift <= region shift in order for retrieveForCurrentRegion() to function correctly ++ */ ++ ++ public ChunkQueue(final int coordinateShift) { ++ this.coordinateShift = coordinateShift; ++ } ++ ++ public static record SectionToUnload(int sectionX, int sectionZ, Coordinate coord, long order, int count) {} ++ ++ public List retrieveForAllRegions() { ++ final List ret = new ArrayList<>(); ++ ++ for (final Map.Entry entry : this.unloadSections.entrySet()) { ++ final Coordinate coord = entry.getKey(); ++ final long key = coord.key; ++ final UnloadSection section = entry.getValue(); ++ final int sectionX = Coordinate.x(key); ++ final int sectionZ = Coordinate.z(key); ++ ++ ret.add(new SectionToUnload(sectionX, sectionZ, coord, section.order, section.chunks.size())); ++ } ++ ++ ret.sort((final SectionToUnload s1, final SectionToUnload s2) -> { ++ return Long.compare(s1.order, s2.order); ++ }); ++ ++ return ret; ++ } ++ ++ public List retrieveForCurrentRegion() { ++ final ThreadedRegionizer.ThreadedRegion region = ++ TickRegionScheduler.getCurrentRegion(); ++ final ThreadedRegionizer regionizer = region.regioniser; ++ final int shift = this.coordinateShift; ++ ++ final List ret = new ArrayList<>(); ++ ++ for (final Map.Entry entry : this.unloadSections.entrySet()) { ++ final Coordinate coord = entry.getKey(); ++ final long key = coord.key; ++ final UnloadSection section = entry.getValue(); ++ final int sectionX = Coordinate.x(key); ++ final int sectionZ = Coordinate.z(key); ++ final int chunkX = sectionX << shift; ++ final int chunkZ = sectionZ << shift; ++ ++ if (regionizer.getRegionAtUnsynchronised(chunkX, chunkZ) != region) { ++ continue; ++ } ++ ++ ret.add(new SectionToUnload(sectionX, sectionZ, coord, section.order, section.chunks.size())); ++ } ++ ++ ret.sort((final SectionToUnload s1, final SectionToUnload s2) -> { ++ return Long.compare(s1.order, s2.order); ++ }); ++ ++ return ret; ++ } ++ ++ public UnloadSection getSectionUnsynchronized(final int sectionX, final int sectionZ) { ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ return this.unloadSections.get(coordinate); ++ } ++ ++ public UnloadSection removeSection(final int sectionX, final int sectionZ) { ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ return this.unloadSections.remove(coordinate); ++ } ++ ++ // write operation ++ public boolean addChunk(final int chunkX, final int chunkZ) { ++ final int shift = this.coordinateShift; ++ final int sectionX = chunkX >> shift; ++ final int sectionZ = chunkZ >> shift; ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ final long chunkKey = Coordinate.key(chunkX, chunkZ); ++ ++ UnloadSection section = this.unloadSections.get(coordinate); ++ if (section == null) { ++ section = new UnloadSection(this.orderGenerator.getAndIncrement()); ++ // write operations do not occur in parallel for a given section ++ this.unloadSections.put(coordinate, section); ++ } ++ ++ return section.chunks.add(chunkKey); ++ } ++ ++ // write operation ++ public boolean removeChunk(final int chunkX, final int chunkZ) { ++ final int shift = this.coordinateShift; ++ final int sectionX = chunkX >> shift; ++ final int sectionZ = chunkZ >> shift; ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ final long chunkKey = Coordinate.key(chunkX, chunkZ); ++ ++ final UnloadSection section = this.unloadSections.get(coordinate); ++ ++ if (section == null) { ++ return false; ++ } ++ ++ if (!section.chunks.remove(chunkKey)) { ++ return false; ++ } ++ ++ if (section.chunks.isEmpty()) { ++ this.unloadSections.remove(coordinate); ++ } ++ ++ return true; ++ } ++ ++ public static final class UnloadSection { ++ ++ public final long order; ++ public final LongLinkedOpenHashSet chunks = new LongLinkedOpenHashSet(); ++ ++ public UnloadSection(final long order) { ++ this.order = order; ++ } ++ } ++ ++ private static final class Coordinate implements Comparable { ++ ++ public final long key; ++ ++ public Coordinate(final long key) { ++ this.key = key; ++ } ++ ++ public Coordinate(final int x, final int z) { ++ this.key = key(x, z); ++ } ++ ++ public static long key(final int x, final int z) { ++ return ((long)z << 32) | (x & 0xFFFFFFFFL); ++ } ++ ++ public static int x(final long key) { ++ return (int)key; ++ } ++ ++ public static int z(final long key) { ++ return (int)(key >>> 32); ++ } ++ ++ @Override ++ public int hashCode() { ++ return (int)HashCommon.mix(this.key); ++ } ++ ++ @Override ++ public boolean equals(final Object obj) { ++ if (this == obj) { ++ return true; ++ } ++ ++ if (!(obj instanceof Coordinate other)) { ++ return false; ++ } ++ ++ return this.key == other.key; ++ } ++ ++ // This class is intended for HashMap/ConcurrentHashMap usage, which do treeify bin nodes if the chain ++ // is too large. So we should implement compareTo to help. ++ @Override ++ public int compareTo(final Coordinate other) { ++ return Long.compare(this.key, other.key); ++ } ++ } ++} +diff --git a/src/main/java/io/papermc/paper/threadedregions/RegionizedServer.java b/src/main/java/io/papermc/paper/threadedregions/RegionizedServer.java +index 6c1d55144f044f39926ddf998104950b9efe3ee1..8e31c6ee9ee16aff699e124a9b0554eaafa5c1ac 100644 +--- a/src/main/java/io/papermc/paper/threadedregions/RegionizedServer.java ++++ b/src/main/java/io/papermc/paper/threadedregions/RegionizedServer.java +@@ -185,7 +185,96 @@ public final class RegionizedServer { + private long lastServerStatus; + private long tickCount; + ++ /* ++ private final java.util.Random random = new java.util.Random(4L); ++ private final List> walkers = ++ new java.util.ArrayList<>(); ++ static final int PLAYERS = 100; ++ static final int RAD_BLOCKS = 10000; ++ static final int RAD = RAD_BLOCKS >> 4; ++ static final int RAD_BIG_BLOCKS = 100_000; ++ static final int RAD_BIG = RAD_BIG_BLOCKS >> 4; ++ static final int VD = 4; ++ static final int BIG_PLAYERS = 50; ++ static final double WALK_CHANCE = 0.10; ++ static final double TP_CHANCE = 0.01; ++ ++ private ServerLevel getWorld() { ++ return this.worlds.get(0); ++ } ++ ++ private void init2() { ++ for (int i = 0; i < PLAYERS; ++i) { ++ int rad = i < BIG_PLAYERS ? RAD_BIG : RAD; ++ int posX = this.random.nextInt(-rad, rad + 1); ++ int posZ = this.random.nextInt(-rad, rad + 1); ++ ++ io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap map = new io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap<>(null) { ++ @Override ++ protected void addCallback(Void parameter, int chunkX, int chunkZ) { ++ ServerLevel world = RegionizedServer.this.getWorld(); ++ world.chunkTaskScheduler.chunkHolderManager.addTicketAtLevel( ++ net.minecraft.server.level.TicketType.PLAYER, chunkX, chunkZ, io.papermc.paper.chunk.system.scheduling.ChunkHolderManager.ENTITY_TICKING_TICKET_LEVEL, new net.minecraft.world.level.ChunkPos(posX, posZ) ++ ); ++ } ++ ++ @Override ++ protected void removeCallback(Void parameter, int chunkX, int chunkZ) { ++ ServerLevel world = RegionizedServer.this.getWorld(); ++ world.chunkTaskScheduler.chunkHolderManager.removeTicketAtLevel( ++ net.minecraft.server.level.TicketType.PLAYER, chunkX, chunkZ, io.papermc.paper.chunk.system.scheduling.ChunkHolderManager.ENTITY_TICKING_TICKET_LEVEL, new net.minecraft.world.level.ChunkPos(posX, posZ) ++ ); ++ } ++ }; ++ ++ map.add(posX, posZ, VD); ++ ++ walkers.add(map); ++ } ++ } ++ ++ private void randomWalk() { ++ if (this.walkers.isEmpty()) { ++ this.init2(); ++ return; ++ } ++ ++ for (int i = 0; i < PLAYERS; ++i) { ++ if (this.random.nextDouble() > WALK_CHANCE) { ++ continue; ++ } ++ ++ io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap map = this.walkers.get(i); ++ ++ int updateX = this.random.nextInt(-1, 2); ++ int updateZ = this.random.nextInt(-1, 2); ++ ++ map.update(map.lastChunkX + updateX, map.lastChunkZ + updateZ, VD); ++ } ++ ++ for (int i = 0; i < PLAYERS; ++i) { ++ if (random.nextDouble() >= TP_CHANCE) { ++ continue; ++ } ++ ++ int rad = i < BIG_PLAYERS ? RAD_BIG : RAD; ++ int posX = random.nextInt(-rad, rad + 1); ++ int posZ = random.nextInt(-rad, rad + 1); ++ ++ io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap map = walkers.get(i); ++ ++ map.update(posX, posZ, VD); ++ } ++ } ++ */ ++ + private void globalTick(final int tickCount) { ++ /* ++ if (false) { ++ io.papermc.paper.threadedregions.ThreadedTicketLevelPropagator.main(null); ++ } ++ this.randomWalk(); ++ */ + ++this.tickCount; + // expire invalid click command callbacks + io.papermc.paper.adventure.providers.ClickCallbackProviderImpl.CALLBACK_MANAGER.handleQueue((int)this.tickCount); +@@ -311,6 +400,8 @@ public final class RegionizedServer { + this.tickTime(world, tickCount); + + world.updateTickData(); ++ ++ world.chunkTaskScheduler.chunkHolderManager.processTicketUpdates(); // Folia - use area based lock to reduce contention - required now to eventually process ticket updates + } + + private void updateRaids(final ServerLevel world) { +diff --git a/src/main/java/io/papermc/paper/threadedregions/RegionizedTaskQueue.java b/src/main/java/io/papermc/paper/threadedregions/RegionizedTaskQueue.java +index 18cbf9f1bcf39d607809627cb47332c27dabfe59..282d8814a4610648e790c1142cdaf86d84468c03 100644 +--- a/src/main/java/io/papermc/paper/threadedregions/RegionizedTaskQueue.java ++++ b/src/main/java/io/papermc/paper/threadedregions/RegionizedTaskQueue.java +@@ -69,7 +69,7 @@ public final class RegionizedTaskQueue { + public static final class WorldRegionTaskData { + private final ServerLevel world; + private final MultiThreadedQueue globalChunkTask = new MultiThreadedQueue<>(); +- private final SWMRLong2ObjectHashTable referenceCounters = new SWMRLong2ObjectHashTable<>(); ++ private final java.util.concurrent.ConcurrentHashMap referenceCounters = new java.util.concurrent.ConcurrentHashMap<>(); // Folia - use area based lock to reduce contention + + public WorldRegionTaskData(final ServerLevel world) { + this.world = world; +@@ -115,17 +115,25 @@ public final class RegionizedTaskQueue { + ); + } + ++ // Folia start - use area based lock to reduce contention ++ private void processTicketUpdates(final long coord) { ++ this.world.chunkTaskScheduler.chunkHolderManager.processTicketUpdates(CoordinateUtils.getChunkX(coord), CoordinateUtils.getChunkZ(coord)); ++ } ++ // Folia end - use area based lock to reduce contention ++ + private void decrementReference(final AtomicLong reference, final long coord) { + final long val = reference.decrementAndGet(); + if (val == 0L) { +- final ReentrantLock ticketLock = this.world.chunkTaskScheduler.chunkHolderManager.ticketLock; +- ticketLock.lock(); ++ final int chunkX = CoordinateUtils.getChunkX(coord); // Folia - use area based lock to reduce contention ++ final int chunkZ = CoordinateUtils.getChunkZ(coord); // Folia - use area based lock to reduce contention ++ final io.papermc.paper.chunk.system.io.RegionFileIOThread.ChunkCoordinate key = new io.papermc.paper.chunk.system.io.RegionFileIOThread.ChunkCoordinate(coord); // Folia - use area based lock to reduce contention ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.world.chunkTaskScheduler.chunkHolderManager.ticketLockArea.lock(chunkX, chunkZ); // Folia - use area based lock to reduce contention + try { +- if (this.referenceCounters.remove(coord, reference)) { ++ if (this.referenceCounters.remove(key, reference)) { // Folia - use area based lock to reduce contention + WorldRegionTaskData.this.removeTicket(coord); + } // else: race condition, something replaced our reference - not our issue anymore + } finally { +- ticketLock.unlock(); ++ this.world.chunkTaskScheduler.chunkHolderManager.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention + } + } else if (val < 0L) { + throw new IllegalStateException("Reference count < 0: " + val); +@@ -133,7 +141,8 @@ public final class RegionizedTaskQueue { + } + + private AtomicLong incrementReference(final long coord) { +- final AtomicLong ret = this.referenceCounters.get(coord); ++ final io.papermc.paper.chunk.system.io.RegionFileIOThread.ChunkCoordinate key = new io.papermc.paper.chunk.system.io.RegionFileIOThread.ChunkCoordinate(coord); // Folia - use area based lock to reduce contention ++ final AtomicLong ret = this.referenceCounters.get(key); // Folia - use area based lock to reduce contention + if (ret != null) { + // try to fast acquire counter + int failures = 0; +@@ -156,41 +165,54 @@ public final class RegionizedTaskQueue { + } + + // slow acquire +- final ReentrantLock ticketLock = this.world.chunkTaskScheduler.chunkHolderManager.ticketLock; +- ticketLock.lock(); ++ final int chunkX = CoordinateUtils.getChunkX(coord); // Folia - use area based lock to reduce contention ++ final int chunkZ = CoordinateUtils.getChunkZ(coord); // Folia - use area based lock to reduce contention ++ final ca.spottedleaf.concurrentutil.lock.ReentrantAreaLock.Node ticketLock = this.world.chunkTaskScheduler.chunkHolderManager.ticketLockArea.lock(chunkX, chunkZ); // Folia - use area based lock to reduce contention ++ final AtomicLong ret2; ++ final boolean processTicketUpdates; + try { + final AtomicLong replace = new AtomicLong(1L); +- final AtomicLong valueInMap = this.referenceCounters.putIfAbsent(coord, replace); ++ final AtomicLong valueInMap = this.referenceCounters.putIfAbsent(key, replace); // Folia - use area based lock to reduce contention + if (valueInMap == null) { + // replaced, we should usually be here + this.addTicket(coord); +- return replace; +- } // else: need to attempt to acquire the reference ++ ret2 = replace; ++ processTicketUpdates = true; ++ } else { ++ processTicketUpdates = false; ++ int failures = 0; ++ for (long curr = valueInMap.get();;) { ++ if (curr == 0L) { ++ // don't need to add ticket here, since ticket is only removed during the lock ++ // we just need to replace the value in the map so that the thread removing fails and doesn't ++ // remove the ticket (see decrementReference) ++ this.referenceCounters.put(key, replace); // Folia - use area based lock to reduce contention ++ ret2 = replace; ++ break; ++ } + +- int failures = 0; +- for (long curr = valueInMap.get();;) { +- if (curr == 0L) { +- // don't need to add ticket here, since ticket is only removed during the lock +- // we just need to replace the value in the map so that the thread removing fails and doesn't +- // remove the ticket (see decrementReference) +- this.referenceCounters.put(coord, replace); +- return replace; +- } ++ for (int i = 0; i < failures; ++i) { ++ ConcurrentUtil.backoff(); ++ } + +- for (int i = 0; i < failures; ++i) { +- ConcurrentUtil.backoff(); +- } ++ if (curr == (curr = valueInMap.compareAndExchange(curr, curr + 1L))) { ++ // acquired ++ ret2 = valueInMap; ++ break; ++ } + +- if (curr == (curr = valueInMap.compareAndExchange(curr, curr + 1L))) { +- // acquired +- return valueInMap; ++ ++failures; + } +- +- ++failures; + } + } finally { +- ticketLock.unlock(); ++ this.world.chunkTaskScheduler.chunkHolderManager.ticketLockArea.unlock(ticketLock); // Folia - use area based lock to reduce contention ++ } ++ ++ if (processTicketUpdates) { ++ this.processTicketUpdates(coord); + } ++ ++ return ret2; + } + } + +diff --git a/src/main/java/io/papermc/paper/threadedregions/ThreadedRegionizer.java b/src/main/java/io/papermc/paper/threadedregions/ThreadedRegionizer.java +index 5170b43743ea27a5c2aaee37d76f4e7e730fd808..1a4d820535f7b04671525c4f0e8691c9e82e075f 100644 +--- a/src/main/java/io/papermc/paper/threadedregions/ThreadedRegionizer.java ++++ b/src/main/java/io/papermc/paper/threadedregions/ThreadedRegionizer.java +@@ -674,6 +674,14 @@ public final class ThreadedRegionizer sections = new ConcurrentHashMap<>(); ++ ++ public ThreadedTicketLevelPropagator() { ++ this.updateQueue = new UpdateQueue(); ++ } ++ ++ // must hold ticket lock for: ++ // (posX & ~(SECTION_SIZE - 1), posZ & ~(SECTION_SIZE - 1)) to (posX | (SECTION_SIZE - 1), posZ | (SECTION_SIZE - 1)) ++ public void setSource(final int posX, final int posZ, final int to) { ++ if (to < 1 || to > MAX_SOURCE_LEVEL) { ++ throw new IllegalArgumentException("Source: " + to); ++ } ++ ++ final int sectionX = posX >> SECTION_SHIFT; ++ final int sectionZ = posZ >> SECTION_SHIFT; ++ ++ final Coordinate coordinate = new Coordinate(sectionX, sectionZ); ++ Section section = this.sections.get(coordinate); ++ if (section == null) { ++ if (null != this.sections.putIfAbsent(coordinate, section = new Section(sectionX, sectionZ))) { ++ throw new IllegalStateException("Race condition while creating new section"); ++ } ++ } ++ ++ final int localIdx = (posX & (SECTION_SIZE - 1)) | ((posZ & (SECTION_SIZE - 1)) << SECTION_SHIFT); ++ final short sLocalIdx = (short)localIdx; ++ ++ final short sourceAndLevel = section.levels[localIdx]; ++ final int currentSource = (sourceAndLevel >>> 8) & 0xFF; ++ ++ if (currentSource == to) { ++ // nothing to do ++ // make sure to kill the current update, if any ++ section.queuedSources.replace(sLocalIdx, (byte)to); ++ return; ++ } ++ ++ if (section.queuedSources.put(sLocalIdx, (byte)to) == Section.NO_QUEUED_UPDATE && section.queuedSources.size() == 1) { ++ this.queueSectionUpdate(section); ++ } ++ } ++ ++ // must hold ticket lock for: ++ // (posX & ~(SECTION_SIZE - 1), posZ & ~(SECTION_SIZE - 1)) to (posX | (SECTION_SIZE - 1), posZ | (SECTION_SIZE - 1)) ++ public void removeSource(final int posX, final int posZ) { ++ final int sectionX = posX >> SECTION_SHIFT; ++ final int sectionZ = posZ >> SECTION_SHIFT; ++ ++ final Coordinate coordinate = new Coordinate(sectionX, sectionZ); ++ final Section section = this.sections.get(coordinate); ++ ++ if (section == null) { ++ return; ++ } ++ ++ final int localIdx = (posX & (SECTION_SIZE - 1)) | ((posZ & (SECTION_SIZE - 1)) << SECTION_SHIFT); ++ final short sLocalIdx = (short)localIdx; ++ ++ final int currentSource = (section.levels[localIdx] >>> 8) & 0xFF; ++ ++ if (currentSource == 0) { ++ // we use replace here so that we do not possibly multi-queue a section for an update ++ section.queuedSources.replace(sLocalIdx, (byte)0); ++ return; ++ } ++ ++ if (section.queuedSources.put(sLocalIdx, (byte)0) == Section.NO_QUEUED_UPDATE && section.queuedSources.size() == 1) { ++ this.queueSectionUpdate(section); ++ } ++ } ++ ++ private void queueSectionUpdate(final Section section) { ++ this.updateQueue.append(new UpdateQueue.UpdateQueueNode(section, null)); ++ } ++ ++ public boolean hasPendingUpdates() { ++ return !this.updateQueue.isEmpty(); ++ } ++ ++ // holds ticket lock for every chunk section represented by any position in the key set ++ // updates is modifiable and passed to processSchedulingUpdates after this call ++ protected abstract void processLevelUpdates(final Long2ByteLinkedOpenHashMap updates); ++ ++ // holds ticket lock for every chunk section represented by any position in the key set ++ // holds scheduling lock in max access radius for every position held by the ticket lock ++ // updates is cleared after this call ++ protected abstract void processSchedulingUpdates(final Long2ByteLinkedOpenHashMap updates, final List scheduledTasks, ++ final List changedFullStatus); ++ ++ // must hold ticket lock for every position in the sections in one radius around sectionX,sectionZ ++ public boolean performUpdate(final int sectionX, final int sectionZ, final ReentrantAreaLock schedulingLock, ++ final List scheduledTasks, final List changedFullStatus) { ++ if (!this.hasPendingUpdates()) { ++ return false; ++ } ++ ++ final Coordinate coordinate = new Coordinate(Coordinate.key(sectionX, sectionZ)); ++ final Section section = this.sections.get(coordinate); ++ ++ if (section == null || section.queuedSources.isEmpty()) { ++ // no section or no updates ++ return false; ++ } ++ ++ final Propagator propagator = Propagator.acquirePropagator(); ++ final boolean ret = this.performUpdate(section, null, propagator, ++ null, schedulingLock, scheduledTasks, changedFullStatus ++ ); ++ Propagator.returnPropagator(propagator); ++ return ret; ++ } ++ ++ private boolean performUpdate(final Section section, final UpdateQueue.UpdateQueueNode node, final Propagator propagator, ++ final ReentrantAreaLock ticketLock, final ReentrantAreaLock schedulingLock, ++ final List scheduledTasks, final List changedFullStatus) { ++ final int sectionX = section.sectionX; ++ final int sectionZ = section.sectionZ; ++ ++ final int rad1MinX = (sectionX - 1) << SECTION_SHIFT; ++ final int rad1MinZ = (sectionZ - 1) << SECTION_SHIFT; ++ final int rad1MaxX = ((sectionX + 1) << SECTION_SHIFT) | (SECTION_SIZE - 1); ++ final int rad1MaxZ = ((sectionZ + 1) << SECTION_SHIFT) | (SECTION_SIZE - 1); ++ ++ // set up encode offset first as we need to queue level changes _before_ ++ propagator.setupEncodeOffset(sectionX, sectionZ); ++ ++ final int coordinateOffset = propagator.coordinateOffset; ++ ++ final ReentrantAreaLock.Node ticketNode = ticketLock == null ? null : ticketLock.lock(rad1MinX, rad1MinZ, rad1MaxX, rad1MaxZ); ++ final boolean ret; ++ try { ++ // first, check if this update was stolen ++ if (section != this.sections.get(new Coordinate(sectionX, sectionZ))) { ++ // occurs when a stolen update deletes this section ++ // it is possible that another update is scheduled, but that one will have the correct section ++ return false; ++ } ++ ++ final int oldSourceSize = section.sources.size(); ++ ++ // process pending sources ++ for (final Iterator iterator = section.queuedSources.short2ByteEntrySet().fastIterator(); iterator.hasNext();) { ++ final Short2ByteMap.Entry entry = iterator.next(); ++ final int pos = (int)entry.getShortKey(); ++ final int posX = (pos & (SECTION_SIZE - 1)) | (sectionX << SECTION_SHIFT); ++ final int posZ = ((pos >> SECTION_SHIFT) & (SECTION_SIZE - 1)) | (sectionZ << SECTION_SHIFT); ++ final int newSource = (int)entry.getByteValue(); ++ ++ final short currentEncoded = section.levels[pos]; ++ final int currLevel = currentEncoded & 0xFF; ++ final int prevSource = (currentEncoded >>> 8) & 0xFF; ++ ++ if (prevSource == newSource) { ++ // nothing changed ++ continue; ++ } ++ ++ if ((prevSource < currLevel && newSource <= currLevel) || newSource == currLevel) { ++ // just update the source, don't need to propagate change ++ section.levels[pos] = (short)(currLevel | (newSource << 8)); ++ // level is unchanged, don't add to changed positions ++ } else { ++ // set current level and current source to new source ++ section.levels[pos] = (short)(newSource | (newSource << 8)); ++ // must add to updated positions in case this is final ++ propagator.updatedPositions.put(Coordinate.key(posX, posZ), (byte)newSource); ++ if (newSource != 0) { ++ // queue increase with new source level ++ propagator.appendToIncreaseQueue( ++ ((long)(posX + (posZ << Propagator.COORDINATE_BITS) + coordinateOffset) & ((1L << (Propagator.COORDINATE_BITS + Propagator.COORDINATE_BITS)) - 1)) | ++ ((newSource & (LEVEL_COUNT - 1L)) << (Propagator.COORDINATE_BITS + Propagator.COORDINATE_BITS)) | ++ (Propagator.ALL_DIRECTIONS_BITSET << (Propagator.COORDINATE_BITS + Propagator.COORDINATE_BITS + LEVEL_BITS)) ++ ); ++ } ++ // queue decrease with previous level ++ if (newSource < currLevel) { ++ propagator.appendToDecreaseQueue( ++ ((long)(posX + (posZ << Propagator.COORDINATE_BITS) + coordinateOffset) & ((1L << (Propagator.COORDINATE_BITS + Propagator.COORDINATE_BITS)) - 1)) | ++ ((currLevel & (LEVEL_COUNT - 1L)) << (Propagator.COORDINATE_BITS + Propagator.COORDINATE_BITS)) | ++ (Propagator.ALL_DIRECTIONS_BITSET << (Propagator.COORDINATE_BITS + Propagator.COORDINATE_BITS + LEVEL_BITS)) ++ ); ++ } ++ } ++ ++ if (newSource == 0) { ++ // prevSource != newSource, so we are removing this source ++ section.sources.remove((short)pos); ++ } else if (prevSource == 0) { ++ // prevSource != newSource, so we are adding this source ++ section.sources.add((short)pos); ++ } ++ } ++ ++ section.queuedSources.clear(); ++ ++ final int newSourceSize = section.sources.size(); ++ ++ if (oldSourceSize == 0 && newSourceSize != 0) { ++ // need to make sure the sections in 1 radius are initialised ++ for (int dz = -1; dz <= 1; ++dz) { ++ for (int dx = -1; dx <= 1; ++dx) { ++ if ((dx | dz) == 0) { ++ continue; ++ } ++ final int offX = dx + sectionX; ++ final int offZ = dz + sectionZ; ++ final Coordinate coordinate = new Coordinate(offX, offZ); ++ final Section neighbour = this.sections.computeIfAbsent(coordinate, (final Coordinate keyInMap) -> { ++ return new Section(Coordinate.x(keyInMap.key), Coordinate.z(keyInMap.key)); ++ }); ++ ++ // increase ref count ++ ++neighbour.oneRadNeighboursWithSources; ++ if (neighbour.oneRadNeighboursWithSources <= 0 || neighbour.oneRadNeighboursWithSources > 8) { ++ throw new IllegalStateException(Integer.toString(neighbour.oneRadNeighboursWithSources)); ++ } ++ } ++ } ++ } ++ ++ if (propagator.hasUpdates()) { ++ propagator.setupCaches(this, sectionX, sectionZ, 1); ++ propagator.performDecrease(); ++ // don't need try-finally, as any exception will cause the propagator to not be returned ++ propagator.destroyCaches(); ++ } ++ ++ if (newSourceSize == 0) { ++ final boolean decrementRef = oldSourceSize != 0; ++ // check for section de-init ++ for (int dz = -1; dz <= 1; ++dz) { ++ for (int dx = -1; dx <= 1; ++dx) { ++ final int offX = dx + sectionX; ++ final int offZ = dz + sectionZ; ++ final Coordinate coordinate = new Coordinate(offX, offZ); ++ final Section neighbour = this.sections.get(coordinate); ++ ++ if (neighbour == null) { ++ if (oldSourceSize == 0 && (dx | dz) != 0) { ++ // since we don't have sources, this section is allowed to null ++ continue; ++ } ++ throw new IllegalStateException("??"); ++ } ++ ++ if (decrementRef && (dx | dz) != 0) { ++ // decrease ref count, but only for neighbours ++ --neighbour.oneRadNeighboursWithSources; ++ } ++ ++ // we need to check the current section for de-init as well ++ if (neighbour.oneRadNeighboursWithSources == 0) { ++ if (neighbour.queuedSources.isEmpty() && neighbour.sources.isEmpty()) { ++ // need to de-init ++ this.sections.remove(coordinate); ++ } // else: neighbour is queued for an update, and it will de-init itself ++ } else if (neighbour.oneRadNeighboursWithSources < 0 || neighbour.oneRadNeighboursWithSources > 8) { ++ throw new IllegalStateException(Integer.toString(neighbour.oneRadNeighboursWithSources)); ++ } ++ } ++ } ++ } ++ ++ ++ ret = !propagator.updatedPositions.isEmpty(); ++ ++ if (ret) { ++ this.processLevelUpdates(propagator.updatedPositions); ++ ++ if (!propagator.updatedPositions.isEmpty()) { ++ // now we can actually update the ticket levels in the chunk holders ++ final int maxScheduleRadius = 2 * ChunkTaskScheduler.getMaxAccessRadius(); ++ ++ // allow the chunkholders to process ticket level updates without needing to acquire the schedule lock every time ++ final ReentrantAreaLock.Node schedulingNode = schedulingLock.lock( ++ rad1MinX - maxScheduleRadius, rad1MinZ - maxScheduleRadius, ++ rad1MaxX + maxScheduleRadius, rad1MaxZ + maxScheduleRadius ++ ); ++ try { ++ this.processSchedulingUpdates(propagator.updatedPositions, scheduledTasks, changedFullStatus); ++ } finally { ++ schedulingLock.unlock(schedulingNode); ++ } ++ } ++ ++ propagator.updatedPositions.clear(); ++ } ++ } finally { ++ if (ticketLock != null) { ++ ticketLock.unlock(ticketNode); ++ } ++ } ++ ++ // finished ++ if (node != null) { ++ this.updateQueue.remove(node); ++ } ++ ++ return ret; ++ } ++ ++ public boolean performUpdates(final ReentrantAreaLock ticketLock, final ReentrantAreaLock schedulingLock, ++ final List scheduledTasks, final List changedFullStatus) { ++ if (this.updateQueue.isEmpty()) { ++ return false; ++ } ++ ++ final long maxOrder = this.updateQueue.getLastOrder(); ++ ++ boolean updated = false; ++ Propagator propagator = null; ++ ++ for (;;) { ++ final UpdateQueue.UpdateQueueNode toUpdate = this.updateQueue.acquireNextToUpdate(maxOrder); ++ if (toUpdate == null) { ++ this.updateQueue.awaitFirst(maxOrder); ++ ++ if (!this.updateQueue.hasRemainingUpdates(maxOrder)) { ++ if (propagator != null) { ++ Propagator.returnPropagator(propagator); ++ } ++ return updated; ++ } ++ ++ continue; ++ } ++ ++ if (propagator == null) { ++ propagator = Propagator.acquirePropagator(); ++ } ++ ++ updated |= this.performUpdate(toUpdate.section, toUpdate, propagator, ticketLock, schedulingLock, scheduledTasks, changedFullStatus); ++ } ++ } ++ ++ private static final class UpdateQueue { ++ ++ private volatile UpdateQueueNode head; ++ private volatile UpdateQueueNode tail; ++ private volatile UpdateQueueNode lastUpdating; ++ ++ protected static final VarHandle HEAD_HANDLE = ConcurrentUtil.getVarHandle(UpdateQueue.class, "head", UpdateQueueNode.class); ++ protected static final VarHandle TAIL_HANDLE = ConcurrentUtil.getVarHandle(UpdateQueue.class, "tail", UpdateQueueNode.class); ++ protected static final VarHandle LAST_UPDATING = ConcurrentUtil.getVarHandle(UpdateQueue.class, "lastUpdating", UpdateQueueNode.class); ++ ++ /* head */ ++ ++ protected final void setHeadPlain(final UpdateQueueNode newHead) { ++ HEAD_HANDLE.set(this, newHead); ++ } ++ ++ protected final void setHeadOpaque(final UpdateQueueNode newHead) { ++ HEAD_HANDLE.setOpaque(this, newHead); ++ } ++ ++ protected final UpdateQueueNode getHeadPlain() { ++ return (UpdateQueueNode)HEAD_HANDLE.get(this); ++ } ++ ++ protected final UpdateQueueNode getHeadOpaque() { ++ return (UpdateQueueNode)HEAD_HANDLE.getOpaque(this); ++ } ++ ++ protected final UpdateQueueNode getHeadAcquire() { ++ return (UpdateQueueNode)HEAD_HANDLE.getAcquire(this); ++ } ++ ++ /* tail */ ++ ++ protected final void setTailPlain(final UpdateQueueNode newTail) { ++ TAIL_HANDLE.set(this, newTail); ++ } ++ ++ protected final void setTailOpaque(final UpdateQueueNode newTail) { ++ TAIL_HANDLE.setOpaque(this, newTail); ++ } ++ ++ protected final UpdateQueueNode getTailPlain() { ++ return (UpdateQueueNode)TAIL_HANDLE.get(this); ++ } ++ ++ protected final UpdateQueueNode getTailOpaque() { ++ return (UpdateQueueNode)TAIL_HANDLE.getOpaque(this); ++ } ++ ++ /* lastUpdating */ ++ ++ protected final UpdateQueueNode getLastUpdatingVolatile() { ++ return (UpdateQueueNode)LAST_UPDATING.getVolatile(this); ++ } ++ ++ protected final UpdateQueueNode compareAndExchangeLastUpdatingVolatile(final UpdateQueueNode expect, final UpdateQueueNode update) { ++ return (UpdateQueueNode)LAST_UPDATING.compareAndExchange(this, expect, update); ++ } ++ ++ public UpdateQueue() { ++ final UpdateQueueNode dummy = new UpdateQueueNode(null, null); ++ dummy.order = -1L; ++ dummy.preventAdds(); ++ ++ this.setHeadPlain(dummy); ++ this.setTailPlain(dummy); ++ } ++ ++ public boolean isEmpty() { ++ return this.peek() == null; ++ } ++ ++ public boolean hasRemainingUpdates(final long maxUpdate) { ++ final UpdateQueueNode node = this.peek(); ++ return node != null && node.order <= maxUpdate; ++ } ++ ++ public long getLastOrder() { ++ for (UpdateQueueNode tail = this.getTailOpaque(), curr = tail;;) { ++ final UpdateQueueNode next = curr.getNextVolatile(); ++ if (next == null) { ++ // try to update stale tail ++ if (this.getTailOpaque() == tail && curr != tail) { ++ this.setTailOpaque(curr); ++ } ++ return curr.order; ++ } ++ curr = next; ++ } ++ } ++ ++ public UpdateQueueNode acquireNextToUpdate(final long maxOrder) { ++ int failures = 0; ++ for (UpdateQueueNode prev = this.getLastUpdatingVolatile();;) { ++ UpdateQueueNode next = prev == null ? this.peek() : prev.next; ++ ++ if (next == null || next.order > maxOrder) { ++ return null; ++ } ++ ++ for (int i = 0; i < failures; ++i) { ++ ConcurrentUtil.backoff(); ++ } ++ ++ if (prev == (prev = this.compareAndExchangeLastUpdatingVolatile(prev, next))) { ++ return next; ++ } ++ ++ ++failures; ++ } ++ } ++ ++ public void awaitFirst(final long maxOrder) { ++ final UpdateQueueNode earliest = this.peek(); ++ if (earliest == null || earliest.order > maxOrder) { ++ return; ++ } ++ ++ final Thread currThread = Thread.currentThread(); ++ // we do not use add-blocking because we use the nullability of the section to block ++ // remove() does not begin to poll from the wait queue until the section is null'd, ++ // and so provided we check the nullability before parking there is no ordering of these operations ++ // such that remove() finishes polling from the wait queue while section is not null ++ earliest.add(currThread); ++ ++ // wait until completed ++ while (earliest.getSectionVolatile() != null) { ++ LockSupport.park(); ++ } ++ } ++ ++ public UpdateQueueNode peek() { ++ for (UpdateQueueNode head = this.getHeadOpaque(), curr = head;;) { ++ final UpdateQueueNode next = curr.getNextVolatile(); ++ final Section element = curr.getSectionVolatile(); /* Likely in sync */ ++ ++ if (element != null) { ++ if (this.getHeadOpaque() == head && curr != head) { ++ this.setHeadOpaque(curr); ++ } ++ return curr; ++ } ++ ++ if (next == null) { ++ if (this.getHeadOpaque() == head && curr != head) { ++ this.setHeadOpaque(curr); ++ } ++ return null; ++ } ++ curr = next; ++ } ++ } ++ ++ public void remove(final UpdateQueueNode node) { ++ // mark as removed ++ node.setSectionVolatile(null); ++ ++ // use peek to advance head ++ this.peek(); ++ ++ // unpark any waiters / block the wait queue ++ Thread unpark; ++ while ((unpark = node.poll()) != null) { ++ LockSupport.unpark(unpark); ++ } ++ } ++ ++ public void append(final UpdateQueueNode node) { ++ int failures = 0; ++ ++ for (UpdateQueueNode currTail = this.getTailOpaque(), curr = currTail;;) { ++ /* It has been experimentally shown that placing the read before the backoff results in significantly greater performance */ ++ /* It is likely due to a cache miss caused by another write to the next field */ ++ final UpdateQueueNode next = curr.getNextVolatile(); ++ ++ for (int i = 0; i < failures; ++i) { ++ ConcurrentUtil.backoff(); ++ } ++ ++ if (next == null) { ++ node.order = curr.order + 1L; ++ final UpdateQueueNode compared = curr.compareExchangeNextVolatile(null, node); ++ ++ if (compared == null) { ++ /* Added */ ++ /* Avoid CASing on tail more than we need to */ ++ /* CAS to avoid setting an out-of-date tail */ ++ if (this.getTailOpaque() == currTail) { ++ this.setTailOpaque(node); ++ } ++ return; ++ } ++ ++ ++failures; ++ curr = compared; ++ continue; ++ } ++ ++ if (curr == currTail) { ++ /* Tail is likely not up-to-date */ ++ curr = next; ++ } else { ++ /* Try to update to tail */ ++ if (currTail == (currTail = this.getTailOpaque())) { ++ curr = next; ++ } else { ++ curr = currTail; ++ } ++ } ++ } ++ } ++ ++ // each node also represents a set of waiters, represented by the MTQ ++ // if the queue is add-blocked, then the update is complete ++ private static final class UpdateQueueNode extends MultiThreadedQueue { ++ private long order; ++ private Section section; ++ private volatile UpdateQueueNode next; ++ ++ protected static final VarHandle SECTION_HANDLE = ConcurrentUtil.getVarHandle(UpdateQueueNode.class, "section", Section.class); ++ protected static final VarHandle NEXT_HANDLE = ConcurrentUtil.getVarHandle(UpdateQueueNode.class, "next", UpdateQueueNode.class); ++ ++ public UpdateQueueNode(final Section section, final UpdateQueueNode next) { ++ SECTION_HANDLE.set(this, section); ++ NEXT_HANDLE.set(this, next); ++ } ++ ++ /* section */ ++ ++ protected final Section getSectionPlain() { ++ return (Section)SECTION_HANDLE.get(this); ++ } ++ ++ protected final Section getSectionVolatile() { ++ return (Section)SECTION_HANDLE.getVolatile(this); ++ } ++ ++ protected final void setSectionPlain(final Section update) { ++ SECTION_HANDLE.set(this, update); ++ } ++ ++ protected final void setSectionOpaque(final Section update) { ++ SECTION_HANDLE.setOpaque(this, update); ++ } ++ ++ protected final void setSectionVolatile(final Section update) { ++ SECTION_HANDLE.setVolatile(this, update); ++ } ++ ++ protected final Section getAndSetSectionVolatile(final Section update) { ++ return (Section)SECTION_HANDLE.getAndSet(this, update); ++ } ++ ++ protected final Section compareExchangeSectionVolatile(final Section expect, final Section update) { ++ return (Section)SECTION_HANDLE.compareAndExchange(this, expect, update); ++ } ++ ++ /* next */ ++ ++ protected final UpdateQueueNode getNextPlain() { ++ return (UpdateQueueNode)NEXT_HANDLE.get(this); ++ } ++ ++ protected final UpdateQueueNode getNextOpaque() { ++ return (UpdateQueueNode)NEXT_HANDLE.getOpaque(this); ++ } ++ ++ protected final UpdateQueueNode getNextAcquire() { ++ return (UpdateQueueNode)NEXT_HANDLE.getAcquire(this); ++ } ++ ++ protected final UpdateQueueNode getNextVolatile() { ++ return (UpdateQueueNode)NEXT_HANDLE.getVolatile(this); ++ } ++ ++ protected final void setNextPlain(final UpdateQueueNode next) { ++ NEXT_HANDLE.set(this, next); ++ } ++ ++ protected final void setNextVolatile(final UpdateQueueNode next) { ++ NEXT_HANDLE.setVolatile(this, next); ++ } ++ ++ protected final UpdateQueueNode compareExchangeNextVolatile(final UpdateQueueNode expect, final UpdateQueueNode set) { ++ return (UpdateQueueNode)NEXT_HANDLE.compareAndExchange(this, expect, set); ++ } ++ } ++ } ++ ++ private static final class Section { ++ ++ // upper 8 bits: sources, lower 8 bits: level ++ // if we REALLY wanted to get crazy, we could make the increase propagator use MethodHandles#byteArrayViewVarHandle ++ // to read and write the lower 8 bits of this array directly rather than reading, updating the bits, then writing back. ++ private final short[] levels = new short[SECTION_SIZE * SECTION_SIZE]; ++ // set of local positions that represent sources ++ private final ShortOpenHashSet sources = new ShortOpenHashSet(); ++ // map of local index to new source level ++ // the source level _cannot_ be updated in the backing storage immediately since the update ++ private static final byte NO_QUEUED_UPDATE = (byte)-1; ++ private final Short2ByteLinkedOpenHashMap queuedSources = new Short2ByteLinkedOpenHashMap(); ++ { ++ this.queuedSources.defaultReturnValue(NO_QUEUED_UPDATE); ++ } ++ private int oneRadNeighboursWithSources = 0; ++ ++ public final int sectionX; ++ public final int sectionZ; ++ ++ public Section(final int sectionX, final int sectionZ) { ++ this.sectionX = sectionX; ++ this.sectionZ = sectionZ; ++ } ++ ++ @Override ++ public String toString() { ++ final StringBuilder ret = new StringBuilder(); ++ ++ for (int x = 0; x < SECTION_SIZE; ++x) { ++ ret.append("x=").append(x).append("\n"); ++ for (int z = 0; z < SECTION_SIZE; ++z) { ++ final short v = this.levels[x | (z << SECTION_SHIFT)]; ++ ret.append(v & 0xFF).append("."); ++ } ++ ret.append("\n\n"); ++ } ++ ++ return ret.toString(); ++ } ++ } ++ ++ ++ private static final class Propagator { ++ ++ private static final ArrayDeque CACHED_PROPAGATORS = new ArrayDeque<>(); ++ private static final int MAX_PROPAGATORS = Runtime.getRuntime().availableProcessors() * 2; ++ ++ private static Propagator acquirePropagator() { ++ synchronized (CACHED_PROPAGATORS) { ++ final Propagator ret = CACHED_PROPAGATORS.pollFirst(); ++ if (ret != null) { ++ return ret; ++ } ++ } ++ return new Propagator(); ++ } ++ ++ private static void returnPropagator(final Propagator propagator) { ++ synchronized (CACHED_PROPAGATORS) { ++ if (CACHED_PROPAGATORS.size() < MAX_PROPAGATORS) { ++ CACHED_PROPAGATORS.add(propagator); ++ } ++ } ++ } ++ ++ private static final int SECTION_RADIUS = 2; ++ private static final int SECTION_CACHE_WIDTH = 2 * SECTION_RADIUS + 1; ++ // minimum number of bits to represent [0, SECTION_SIZE * SECTION_CACHE_WIDTH) ++ private static final int COORDINATE_BITS = 9; ++ private static final int COORDINATE_SIZE = 1 << COORDINATE_BITS; ++ static { ++ if ((SECTION_SIZE * SECTION_CACHE_WIDTH) > (1 << COORDINATE_BITS)) { ++ throw new IllegalStateException("Adjust COORDINATE_BITS"); ++ } ++ } ++ // index = x + (z * SECTION_CACHE_WIDTH) ++ // (this requires x >= 0 and z >= 0) ++ private final Section[] sections = new Section[SECTION_CACHE_WIDTH * SECTION_CACHE_WIDTH]; ++ ++ private int encodeOffsetX; ++ private int encodeOffsetZ; ++ ++ private int coordinateOffset; ++ ++ private int encodeSectionOffsetX; ++ private int encodeSectionOffsetZ; ++ ++ private int sectionIndexOffset; ++ ++ public final boolean hasUpdates() { ++ return this.decreaseQueueInitialLength != 0 || this.increaseQueueInitialLength != 0; ++ } ++ ++ protected final void setupEncodeOffset(final int centerSectionX, final int centerSectionZ) { ++ final int maxCoordinate = (SECTION_RADIUS * SECTION_SIZE - 1); ++ // must have that encoded >= 0 ++ // coordinates can range from [-maxCoordinate + centerSection*SECTION_SIZE, maxCoordinate + centerSection*SECTION_SIZE] ++ // we want a range of [0, maxCoordinate*2] ++ // so, 0 = -maxCoordinate + centerSection*SECTION_SIZE + offset ++ this.encodeOffsetX = maxCoordinate - (centerSectionX << SECTION_SHIFT); ++ this.encodeOffsetZ = maxCoordinate - (centerSectionZ << SECTION_SHIFT); ++ ++ // encoded coordinates range from [0, SECTION_SIZE * SECTION_CACHE_WIDTH) ++ // coordinate index = (x + encodeOffsetX) + ((z + encodeOffsetZ) << COORDINATE_BITS) ++ this.coordinateOffset = this.encodeOffsetX + (this.encodeOffsetZ << COORDINATE_BITS); ++ ++ // need encoded values to be >= 0 ++ // so, 0 = (-SECTION_RADIUS + centerSectionX) + encodeOffset ++ this.encodeSectionOffsetX = SECTION_RADIUS - centerSectionX; ++ this.encodeSectionOffsetZ = SECTION_RADIUS - centerSectionZ; ++ ++ // section index = (secX + encodeSectionOffsetX) + ((secZ + encodeSectionOffsetZ) * SECTION_CACHE_WIDTH) ++ this.sectionIndexOffset = this.encodeSectionOffsetX + (this.encodeSectionOffsetZ * SECTION_CACHE_WIDTH); ++ } ++ ++ // must hold ticket lock for (centerSectionX,centerSectionZ) in radius rad ++ // must call setupEncodeOffset ++ protected final void setupCaches(final ThreadedTicketLevelPropagator propagator, ++ final int centerSectionX, final int centerSectionZ, ++ final int rad) { ++ for (int dz = -rad; dz <= rad; ++dz) { ++ for (int dx = -rad; dx <= rad; ++dx) { ++ final int sectionX = centerSectionX + dx; ++ final int sectionZ = centerSectionZ + dz; ++ final Coordinate coordinate = new Coordinate(sectionX, sectionZ); ++ final Section section = propagator.sections.get(coordinate); ++ ++ if (section == null) { ++ throw new IllegalStateException("Section at " + coordinate + " should not be null"); ++ } ++ ++ this.setSectionInCache(sectionX, sectionZ, section); ++ } ++ } ++ } ++ ++ protected final void setSectionInCache(final int sectionX, final int sectionZ, final Section section) { ++ this.sections[sectionX + SECTION_CACHE_WIDTH*sectionZ + this.sectionIndexOffset] = section; ++ } ++ ++ protected final Section getSection(final int sectionX, final int sectionZ) { ++ return this.sections[sectionX + SECTION_CACHE_WIDTH*sectionZ + this.sectionIndexOffset]; ++ } ++ ++ protected final int getLevel(final int posX, final int posZ) { ++ final Section section = this.sections[(posX >> SECTION_SHIFT) + SECTION_CACHE_WIDTH*(posZ >> SECTION_SHIFT) + this.sectionIndexOffset]; ++ if (section != null) { ++ return (int)section.levels[(posX & (SECTION_SIZE - 1)) | ((posZ & (SECTION_SIZE - 1)) << SECTION_SHIFT)] & 0xFF; ++ } ++ ++ return 0; ++ } ++ ++ protected final void setLevel(final int posX, final int posZ, final int to) { ++ final Section section = this.sections[(posX >> SECTION_SHIFT) + SECTION_CACHE_WIDTH*(posZ >> SECTION_SHIFT) + this.sectionIndexOffset]; ++ if (section != null) { ++ final int index = (posX & (SECTION_SIZE - 1)) | ((posZ & (SECTION_SIZE - 1)) << SECTION_SHIFT); ++ final short level = section.levels[index]; ++ section.levels[index] = (short)((level & ~0xFF) | (to & 0xFF)); ++ this.updatedPositions.put(Coordinate.key(posX, posZ), (byte)to); ++ } ++ } ++ ++ protected final void destroyCaches() { ++ Arrays.fill(this.sections, null); ++ } ++ ++ // contains: ++ // lower (COORDINATE_BITS(9) + COORDINATE_BITS(9) = 18) bits encoded position: (x | (z << COORDINATE_BITS)) ++ // next LEVEL_BITS (6) bits: propagated level [0, 63] ++ // propagation directions bitset (16 bits): ++ protected static final long ALL_DIRECTIONS_BITSET = ( ++ // z = -1 ++ (1L << ((1 - 1) | ((1 - 1) << 2))) | ++ (1L << ((1 + 0) | ((1 - 1) << 2))) | ++ (1L << ((1 + 1) | ((1 - 1) << 2))) | ++ ++ // z = 0 ++ (1L << ((1 - 1) | ((1 + 0) << 2))) | ++ //(1L << ((1 + 0) | ((1 + 0) << 2))) | // exclude (0,0) ++ (1L << ((1 + 1) | ((1 + 0) << 2))) | ++ ++ // z = 1 ++ (1L << ((1 - 1) | ((1 + 1) << 2))) | ++ (1L << ((1 + 0) | ((1 + 1) << 2))) | ++ (1L << ((1 + 1) | ((1 + 1) << 2))) ++ ); ++ ++ private void ex(int bitset) { ++ for (int i = 0, len = Integer.bitCount(bitset); i < len; ++i) { ++ final int set = Integer.numberOfTrailingZeros(bitset); ++ final int tailingBit = (-bitset) & bitset; ++ // XOR to remove the trailing bit ++ bitset ^= tailingBit; ++ ++ // the encoded value set is (x_val) | (z_val << 2), totaling 4 bits ++ // thus, the bitset is 16 bits wide where each one represents a direction to propagate and the ++ // index of the set bit is the encoded value ++ // the encoded coordinate has 3 valid states: ++ // 0b00 (0) -> -1 ++ // 0b01 (1) -> 0 ++ // 0b10 (2) -> 1 ++ // the decode operation then is val - 1, and the encode operation is val + 1 ++ final int xOff = (set & 3) - 1; ++ final int zOff = ((set >>> 2) & 3) - 1; ++ System.out.println("Encoded: (" + xOff + "," + zOff + ")"); ++ } ++ } ++ ++ private void ch(long bs, int shift) { ++ int bitset = (int)(bs >>> shift); ++ for (int i = 0, len = Integer.bitCount(bitset); i < len; ++i) { ++ final int set = Integer.numberOfTrailingZeros(bitset); ++ final int tailingBit = (-bitset) & bitset; ++ // XOR to remove the trailing bit ++ bitset ^= tailingBit; ++ ++ // the encoded value set is (x_val) | (z_val << 2), totaling 4 bits ++ // thus, the bitset is 16 bits wide where each one represents a direction to propagate and the ++ // index of the set bit is the encoded value ++ // the encoded coordinate has 3 valid states: ++ // 0b00 (0) -> -1 ++ // 0b01 (1) -> 0 ++ // 0b10 (2) -> 1 ++ // the decode operation then is val - 1, and the encode operation is val + 1 ++ final int xOff = (set & 3) - 1; ++ final int zOff = ((set >>> 2) & 3) - 1; ++ if (Math.abs(xOff) > 1 || Math.abs(zOff) > 1 || (xOff | zOff) == 0) { ++ throw new IllegalStateException(); ++ } ++ } ++ } ++ ++ // whether the increase propagator needs to write the propagated level to the position, used to avoid cascading ++ // updates for sources ++ protected static final long FLAG_WRITE_LEVEL = Long.MIN_VALUE >>> 1; ++ // whether the propagation needs to check if its current level is equal to the expected level ++ // used only in increase propagation ++ protected static final long FLAG_RECHECK_LEVEL = Long.MIN_VALUE >>> 0; ++ ++ protected long[] increaseQueue = new long[SECTION_SIZE * SECTION_SIZE * 2]; ++ protected int increaseQueueInitialLength; ++ protected long[] decreaseQueue = new long[SECTION_SIZE * SECTION_SIZE * 2]; ++ protected int decreaseQueueInitialLength; ++ ++ protected final Long2ByteLinkedOpenHashMap updatedPositions = new Long2ByteLinkedOpenHashMap(); ++ ++ protected final long[] resizeIncreaseQueue() { ++ return this.increaseQueue = Arrays.copyOf(this.increaseQueue, this.increaseQueue.length * 2); ++ } ++ ++ protected final long[] resizeDecreaseQueue() { ++ return this.decreaseQueue = Arrays.copyOf(this.decreaseQueue, this.decreaseQueue.length * 2); ++ } ++ ++ protected final void appendToIncreaseQueue(final long value) { ++ final int idx = this.increaseQueueInitialLength++; ++ long[] queue = this.increaseQueue; ++ if (idx >= queue.length) { ++ queue = this.resizeIncreaseQueue(); ++ queue[idx] = value; ++ return; ++ } else { ++ queue[idx] = value; ++ return; ++ } ++ } ++ ++ protected final void appendToDecreaseQueue(final long value) { ++ final int idx = this.decreaseQueueInitialLength++; ++ long[] queue = this.decreaseQueue; ++ if (idx >= queue.length) { ++ queue = this.resizeDecreaseQueue(); ++ queue[idx] = value; ++ return; ++ } else { ++ queue[idx] = value; ++ return; ++ } ++ } ++ ++ protected final void performIncrease() { ++ long[] queue = this.increaseQueue; ++ int queueReadIndex = 0; ++ int queueLength = this.increaseQueueInitialLength; ++ this.increaseQueueInitialLength = 0; ++ final int decodeOffsetX = -this.encodeOffsetX; ++ final int decodeOffsetZ = -this.encodeOffsetZ; ++ final int encodeOffset = this.coordinateOffset; ++ final int sectionOffset = this.sectionIndexOffset; ++ ++ final Long2ByteLinkedOpenHashMap updatedPositions = this.updatedPositions; ++ ++ while (queueReadIndex < queueLength) { ++ final long queueValue = queue[queueReadIndex++]; ++ ++ final int posX = ((int)queueValue & (COORDINATE_SIZE - 1)) + decodeOffsetX; ++ final int posZ = (((int)queueValue >>> COORDINATE_BITS) & (COORDINATE_SIZE - 1)) + decodeOffsetZ; ++ final int propagatedLevel = ((int)queueValue >>> (COORDINATE_BITS + COORDINATE_BITS)) & (LEVEL_COUNT - 1); ++ // note: the above code requires coordinate bits * 2 < 32 ++ // bitset is 16 bits ++ int propagateDirectionBitset = (int)(queueValue >>> (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)) & ((1 << 16) - 1); ++ ++ if ((queueValue & FLAG_RECHECK_LEVEL) != 0L) { ++ if (this.getLevel(posX, posZ) != propagatedLevel) { ++ // not at the level we expect, so something changed. ++ continue; ++ } ++ } else if ((queueValue & FLAG_WRITE_LEVEL) != 0L) { ++ // these are used to restore sources after a propagation decrease ++ this.setLevel(posX, posZ, propagatedLevel); ++ } ++ ++ // this bitset represents the values that we have not propagated to ++ // this bitset lets us determine what directions the neighbours we set should propagate to, in most cases ++ // significantly reducing the total number of ops ++ // since we propagate in a 1 radius, we need a 2 radius bitset to hold all possible values we would possibly need ++ // but if we use only 5x5 bits, then we need to use div/mod to retrieve coordinates from the bitset, so instead ++ // we use an 8x8 bitset and luckily that can be fit into only one long value (64 bits) ++ // to make things easy, we use positions [0, 4] in the bitset, with current position being 2 ++ // index = x | (z << 3) ++ ++ // to start, we eliminate everything 1 radius from the current position as the previous propagator ++ // must guarantee that either we propagate everything in 1 radius or we partially propagate for 1 radius ++ // but the rest not propagated are already handled ++ long currentPropagation = ~( ++ // z = -1 ++ (1L << ((2 - 1) | ((2 - 1) << 3))) | ++ (1L << ((2 + 0) | ((2 - 1) << 3))) | ++ (1L << ((2 + 1) | ((2 - 1) << 3))) | ++ ++ // z = 0 ++ (1L << ((2 - 1) | ((2 + 0) << 3))) | ++ (1L << ((2 + 0) | ((2 + 0) << 3))) | ++ (1L << ((2 + 1) | ((2 + 0) << 3))) | ++ ++ // z = 1 ++ (1L << ((2 - 1) | ((2 + 1) << 3))) | ++ (1L << ((2 + 0) | ((2 + 1) << 3))) | ++ (1L << ((2 + 1) | ((2 + 1) << 3))) ++ ); ++ ++ final int toPropagate = propagatedLevel - 1; ++ ++ // we could use while (propagateDirectionBitset != 0), but it's not a predictable branch. By counting ++ // the bits, the cpu loop predictor should perfectly predict the loop. ++ for (int l = 0, len = Integer.bitCount(propagateDirectionBitset); l < len; ++l) { ++ final int set = Integer.numberOfTrailingZeros(propagateDirectionBitset); ++ final int tailingBit = (-propagateDirectionBitset) & propagateDirectionBitset; ++ propagateDirectionBitset ^= tailingBit; ++ ++ ++ // pDecode is from [0, 2], and 1 must be subtracted to fully decode the offset ++ // it has been split to save some cycles via parallelism ++ final int pDecodeX = (set & 3); ++ final int pDecodeZ = ((set >>> 2) & 3); ++ ++ // re-ordered -1 on the position decode into pos - 1 to occur in parallel with determining pDecodeX ++ final int offX = (posX - 1) + pDecodeX; ++ final int offZ = (posZ - 1) + pDecodeZ; ++ ++ final int sectionIndex = (offX >> SECTION_SHIFT) + ((offZ >> SECTION_SHIFT) * SECTION_CACHE_WIDTH) + sectionOffset; ++ final int localIndex = (offX & (SECTION_SIZE - 1)) | ((offZ & (SECTION_SIZE - 1)) << SECTION_SHIFT); ++ ++ // to retrieve a set of bits from a long value: (n_bitmask << (nstartidx)) & bitset ++ // bitset idx = x | (z << 3) ++ ++ // read three bits, so we need 7L ++ // note that generally: off - pos = (pos - 1) + pDecode - pos = pDecode - 1 ++ // nstartidx1 = x rel -1 for z rel -1 ++ // = (offX - posX - 1 + 2) | ((offZ - posZ - 1 + 2) << 3) ++ // = (pDecodeX - 1 - 1 + 2) | ((pDecodeZ - 1 - 1 + 2) << 3) ++ // = pDecodeX | (pDecodeZ << 3) = start ++ final int start = pDecodeX | (pDecodeZ << 3); ++ final long bitsetLine1 = currentPropagation & (7L << (start)); ++ ++ // nstartidx2 = x rel -1 for z rel 0 = line after line1, so we can just add 8 (row length of bitset) ++ final long bitsetLine2 = currentPropagation & (7L << (start + 8)); ++ ++ // nstartidx2 = x rel -1 for z rel 0 = line after line2, so we can just add 8 (row length of bitset) ++ final long bitsetLine3 = currentPropagation & (7L << (start + (8 + 8))); ++ ++ // remove ("take") lines from bitset ++ currentPropagation ^= (bitsetLine1 | bitsetLine2 | bitsetLine3); ++ ++ // now try to propagate ++ final Section section = this.sections[sectionIndex]; ++ ++ // lower 8 bits are current level, next upper 7 bits are source level, next 1 bit is updated source flag ++ final short currentStoredLevel = section.levels[localIndex]; ++ final int currentLevel = currentStoredLevel & 0xFF; ++ ++ if (currentLevel >= toPropagate) { ++ continue; // already at the level we want ++ } ++ ++ // update level ++ section.levels[localIndex] = (short)((currentStoredLevel & ~0xFF) | (toPropagate & 0xFF)); ++ updatedPositions.putAndMoveToLast(Coordinate.key(offX, offZ), (byte)toPropagate); ++ ++ // queue next ++ if (toPropagate > 1) { ++ // now combine into one bitset to pass to child ++ // the child bitset is 4x4, so we just shift each line by 4 ++ // add the propagation bitset offset to each line to make it easy to OR it into the propagation queue value ++ final long childPropagation = ++ ((bitsetLine1 >>> (start)) << (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)) | // z = -1 ++ ((bitsetLine2 >>> (start + 8)) << (4 + COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)) | // z = 0 ++ ((bitsetLine3 >>> (start + (8 + 8))) << (4 + 4 + COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)); // z = 1 ++ ++ // don't queue update if toPropagate cannot propagate anything to neighbours ++ // (for increase, propagating 0 to neighbours is useless) ++ if (queueLength >= queue.length) { ++ queue = this.resizeIncreaseQueue(); ++ } ++ queue[queueLength++] = ++ ((long)(offX + (offZ << COORDINATE_BITS) + encodeOffset) & ((1L << (COORDINATE_BITS + COORDINATE_BITS)) - 1)) | ++ ((toPropagate & (LEVEL_COUNT - 1L)) << (COORDINATE_BITS + COORDINATE_BITS)) | ++ childPropagation; //(ALL_DIRECTIONS_BITSET << (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)); ++ continue; ++ } ++ continue; ++ } ++ } ++ } ++ ++ protected final void performDecrease() { ++ long[] queue = this.decreaseQueue; ++ long[] increaseQueue = this.increaseQueue; ++ int queueReadIndex = 0; ++ int queueLength = this.decreaseQueueInitialLength; ++ this.decreaseQueueInitialLength = 0; ++ int increaseQueueLength = this.increaseQueueInitialLength; ++ final int decodeOffsetX = -this.encodeOffsetX; ++ final int decodeOffsetZ = -this.encodeOffsetZ; ++ final int encodeOffset = this.coordinateOffset; ++ final int sectionOffset = this.sectionIndexOffset; ++ ++ final Long2ByteLinkedOpenHashMap updatedPositions = this.updatedPositions; ++ ++ while (queueReadIndex < queueLength) { ++ final long queueValue = queue[queueReadIndex++]; ++ ++ final int posX = ((int)queueValue & (COORDINATE_SIZE - 1)) + decodeOffsetX; ++ final int posZ = (((int)queueValue >>> COORDINATE_BITS) & (COORDINATE_SIZE - 1)) + decodeOffsetZ; ++ final int propagatedLevel = ((int)queueValue >>> (COORDINATE_BITS + COORDINATE_BITS)) & (LEVEL_COUNT - 1); ++ // note: the above code requires coordinate bits * 2 < 32 ++ // bitset is 16 bits ++ int propagateDirectionBitset = (int)(queueValue >>> (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)) & ((1 << 16) - 1); ++ ++ // this bitset represents the values that we have not propagated to ++ // this bitset lets us determine what directions the neighbours we set should propagate to, in most cases ++ // significantly reducing the total number of ops ++ // since we propagate in a 1 radius, we need a 2 radius bitset to hold all possible values we would possibly need ++ // but if we use only 5x5 bits, then we need to use div/mod to retrieve coordinates from the bitset, so instead ++ // we use an 8x8 bitset and luckily that can be fit into only one long value (64 bits) ++ // to make things easy, we use positions [0, 4] in the bitset, with current position being 2 ++ // index = x | (z << 3) ++ ++ // to start, we eliminate everything 1 radius from the current position as the previous propagator ++ // must guarantee that either we propagate everything in 1 radius or we partially propagate for 1 radius ++ // but the rest not propagated are already handled ++ long currentPropagation = ~( ++ // z = -1 ++ (1L << ((2 - 1) | ((2 - 1) << 3))) | ++ (1L << ((2 + 0) | ((2 - 1) << 3))) | ++ (1L << ((2 + 1) | ((2 - 1) << 3))) | ++ ++ // z = 0 ++ (1L << ((2 - 1) | ((2 + 0) << 3))) | ++ (1L << ((2 + 0) | ((2 + 0) << 3))) | ++ (1L << ((2 + 1) | ((2 + 0) << 3))) | ++ ++ // z = 1 ++ (1L << ((2 - 1) | ((2 + 1) << 3))) | ++ (1L << ((2 + 0) | ((2 + 1) << 3))) | ++ (1L << ((2 + 1) | ((2 + 1) << 3))) ++ ); ++ ++ final int toPropagate = propagatedLevel - 1; ++ ++ // we could use while (propagateDirectionBitset != 0), but it's not a predictable branch. By counting ++ // the bits, the cpu loop predictor should perfectly predict the loop. ++ for (int l = 0, len = Integer.bitCount(propagateDirectionBitset); l < len; ++l) { ++ final int set = Integer.numberOfTrailingZeros(propagateDirectionBitset); ++ final int tailingBit = (-propagateDirectionBitset) & propagateDirectionBitset; ++ propagateDirectionBitset ^= tailingBit; ++ ++ ++ // pDecode is from [0, 2], and 1 must be subtracted to fully decode the offset ++ // it has been split to save some cycles via parallelism ++ final int pDecodeX = (set & 3); ++ final int pDecodeZ = ((set >>> 2) & 3); ++ ++ // re-ordered -1 on the position decode into pos - 1 to occur in parallel with determining pDecodeX ++ final int offX = (posX - 1) + pDecodeX; ++ final int offZ = (posZ - 1) + pDecodeZ; ++ ++ final int sectionIndex = (offX >> SECTION_SHIFT) + ((offZ >> SECTION_SHIFT) * SECTION_CACHE_WIDTH) + sectionOffset; ++ final int localIndex = (offX & (SECTION_SIZE - 1)) | ((offZ & (SECTION_SIZE - 1)) << SECTION_SHIFT); ++ ++ // to retrieve a set of bits from a long value: (n_bitmask << (nstartidx)) & bitset ++ // bitset idx = x | (z << 3) ++ ++ // read three bits, so we need 7L ++ // note that generally: off - pos = (pos - 1) + pDecode - pos = pDecode - 1 ++ // nstartidx1 = x rel -1 for z rel -1 ++ // = (offX - posX - 1 + 2) | ((offZ - posZ - 1 + 2) << 3) ++ // = (pDecodeX - 1 - 1 + 2) | ((pDecodeZ - 1 - 1 + 2) << 3) ++ // = pDecodeX | (pDecodeZ << 3) = start ++ final int start = pDecodeX | (pDecodeZ << 3); ++ final long bitsetLine1 = currentPropagation & (7L << (start)); ++ ++ // nstartidx2 = x rel -1 for z rel 0 = line after line1, so we can just add 8 (row length of bitset) ++ final long bitsetLine2 = currentPropagation & (7L << (start + 8)); ++ ++ // nstartidx2 = x rel -1 for z rel 0 = line after line2, so we can just add 8 (row length of bitset) ++ final long bitsetLine3 = currentPropagation & (7L << (start + (8 + 8))); ++ ++ // remove ("take") lines from bitset ++ // can't do this during decrease, TODO WHY? ++ //currentPropagation ^= (bitsetLine1 | bitsetLine2 | bitsetLine3); ++ ++ // now try to propagate ++ final Section section = this.sections[sectionIndex]; ++ ++ // lower 8 bits are current level, next upper 7 bits are source level, next 1 bit is updated source flag ++ final short currentStoredLevel = section.levels[localIndex]; ++ final int currentLevel = currentStoredLevel & 0xFF; ++ final int sourceLevel = (currentStoredLevel >>> 8) & 0xFF; ++ ++ if (currentLevel == 0) { ++ continue; // already at the level we want ++ } ++ ++ if (currentLevel > toPropagate) { ++ // it looks like another source propagated here, so re-propagate it ++ if (increaseQueueLength >= increaseQueue.length) { ++ increaseQueue = this.resizeIncreaseQueue(); ++ } ++ increaseQueue[increaseQueueLength++] = ++ ((long)(offX + (offZ << COORDINATE_BITS) + encodeOffset) & ((1L << (COORDINATE_BITS + COORDINATE_BITS)) - 1)) | ++ ((currentLevel & (LEVEL_COUNT - 1L)) << (COORDINATE_BITS + COORDINATE_BITS)) | ++ (FLAG_RECHECK_LEVEL | (ALL_DIRECTIONS_BITSET << (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS))); ++ continue; ++ } ++ ++ // update level ++ section.levels[localIndex] = (short)((currentStoredLevel & ~0xFF)); ++ updatedPositions.putAndMoveToLast(Coordinate.key(offX, offZ), (byte)0); ++ ++ if (sourceLevel != 0) { ++ // re-propagate source ++ // note: do not set recheck level, or else the propagation will fail ++ if (increaseQueueLength >= increaseQueue.length) { ++ increaseQueue = this.resizeIncreaseQueue(); ++ } ++ increaseQueue[increaseQueueLength++] = ++ ((long)(offX + (offZ << COORDINATE_BITS) + encodeOffset) & ((1L << (COORDINATE_BITS + COORDINATE_BITS)) - 1)) | ++ ((sourceLevel & (LEVEL_COUNT - 1L)) << (COORDINATE_BITS + COORDINATE_BITS)) | ++ (FLAG_WRITE_LEVEL | (ALL_DIRECTIONS_BITSET << (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS))); ++ } ++ ++ // queue next ++ // note: targetLevel > 0 here, since toPropagate >= currentLevel and currentLevel > 0 ++ // now combine into one bitset to pass to child ++ // the child bitset is 4x4, so we just shift each line by 4 ++ // add the propagation bitset offset to each line to make it easy to OR it into the propagation queue value ++ final long childPropagation = ++ ((bitsetLine1 >>> (start)) << (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)) | // z = -1 ++ ((bitsetLine2 >>> (start + 8)) << (4 + COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)) | // z = 0 ++ ((bitsetLine3 >>> (start + (8 + 8))) << (4 + 4 + COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)); // z = 1 ++ ++ // don't queue update if toPropagate cannot propagate anything to neighbours ++ // (for increase, propagating 0 to neighbours is useless) ++ if (queueLength >= queue.length) { ++ queue = this.resizeIncreaseQueue(); ++ } ++ queue[queueLength++] = ++ ((long)(offX + (offZ << COORDINATE_BITS) + encodeOffset) & ((1L << (COORDINATE_BITS + COORDINATE_BITS)) - 1)) | ++ ((toPropagate & (LEVEL_COUNT - 1L)) << (COORDINATE_BITS + COORDINATE_BITS)) | ++ childPropagation; //(ALL_DIRECTIONS_BITSET << (COORDINATE_BITS + COORDINATE_BITS + LEVEL_BITS)); ++ continue; ++ } ++ } ++ ++ // propagate sources we clobbered ++ this.increaseQueueInitialLength = increaseQueueLength; ++ this.performIncrease(); ++ } ++ } ++ ++ private static final class Coordinate implements Comparable { ++ ++ public final long key; ++ ++ public Coordinate(final long key) { ++ this.key = key; ++ } ++ ++ public Coordinate(final int x, final int z) { ++ this.key = key(x, z); ++ } ++ ++ public static long key(final int x, final int z) { ++ return ((long)z << 32) | (x & 0xFFFFFFFFL); ++ } ++ ++ public static int x(final long key) { ++ return (int)key; ++ } ++ ++ public static int z(final long key) { ++ return (int)(key >>> 32); ++ } ++ ++ @Override ++ public int hashCode() { ++ return (int)HashCommon.mix(this.key); ++ } ++ ++ @Override ++ public boolean equals(final Object obj) { ++ if (this == obj) { ++ return true; ++ } ++ ++ if (!(obj instanceof Coordinate other)) { ++ return false; ++ } ++ ++ return this.key == other.key; ++ } ++ ++ // This class is intended for HashMap/ConcurrentHashMap usage, which do treeify bin nodes if the chain ++ // is too large. So we should implement compareTo to help. ++ @Override ++ public int compareTo(final Coordinate other) { ++ return Long.compare(this.key, other.key); ++ } ++ ++ @Override ++ public String toString() { ++ return "[" + x(this.key) + "," + z(this.key) + "]"; ++ } ++ } ++ ++ /* ++ private static final java.util.Random random = new java.util.Random(4L); ++ private static final List> walkers = ++ new java.util.ArrayList<>(); ++ static final int PLAYERS = 100; ++ static final int RAD_BLOCKS = 10000; ++ static final int RAD = RAD_BLOCKS >> 4; ++ static final int RAD_BIG_BLOCKS = 100_000; ++ static final int RAD_BIG = RAD_BIG_BLOCKS >> 4; ++ static final int VD = 4; ++ static final int BIG_PLAYERS = 50; ++ static final double WALK_CHANCE = 0.10; ++ static final double TP_CHANCE = 0.01; ++ ++ public static void main(final String[] args) { ++ final ReentrantAreaLock ticketLock = new ReentrantAreaLock(SECTION_SHIFT); ++ final ReentrantAreaLock schedulingLock = new ReentrantAreaLock(SECTION_SHIFT); ++ final Long2ByteLinkedOpenHashMap levelMap = new Long2ByteLinkedOpenHashMap(); ++ final Long2ByteLinkedOpenHashMap refMap = new Long2ByteLinkedOpenHashMap(); ++ final io.papermc.paper.util.misc.Delayed8WayDistancePropagator2D ref = new io.papermc.paper.util.misc.Delayed8WayDistancePropagator2D((final long coordinate, final byte oldLevel, final byte newLevel) -> { ++ if (newLevel == 0) { ++ refMap.remove(coordinate); ++ } else { ++ refMap.put(coordinate, newLevel); ++ } ++ }); ++ final ThreadedTicketLevelPropagator propagator = new ThreadedTicketLevelPropagator() { ++ @Override ++ protected void processLevelUpdates(Long2ByteLinkedOpenHashMap updates) { ++ for (final long key : updates.keySet()) { ++ final byte val = updates.get(key); ++ if (val == 0) { ++ levelMap.remove(key); ++ } else { ++ levelMap.put(key, val); ++ } ++ } ++ } ++ ++ @Override ++ protected void processSchedulingUpdates(Long2ByteLinkedOpenHashMap updates, List scheduledTasks, List changedFullStatus) {} ++ }; ++ ++ for (;;) { ++ if (walkers.isEmpty()) { ++ for (int i = 0; i < PLAYERS; ++i) { ++ int rad = i < BIG_PLAYERS ? RAD_BIG : RAD; ++ int posX = random.nextInt(-rad, rad + 1); ++ int posZ = random.nextInt(-rad, rad + 1); ++ ++ io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap map = new io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap<>(null) { ++ @Override ++ protected void addCallback(Void parameter, int chunkX, int chunkZ) { ++ int src = 45 - 31 + 1; ++ ref.setSource(chunkX, chunkZ, src); ++ propagator.setSource(chunkX, chunkZ, src); ++ } ++ ++ @Override ++ protected void removeCallback(Void parameter, int chunkX, int chunkZ) { ++ ref.removeSource(chunkX, chunkZ); ++ propagator.removeSource(chunkX, chunkZ); ++ } ++ }; ++ ++ map.add(posX, posZ, VD); ++ ++ walkers.add(map); ++ } ++ } else { ++ for (int i = 0; i < PLAYERS; ++i) { ++ if (random.nextDouble() > WALK_CHANCE) { ++ continue; ++ } ++ ++ io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap map = walkers.get(i); ++ ++ int updateX = random.nextInt(-1, 2); ++ int updateZ = random.nextInt(-1, 2); ++ ++ map.update(map.lastChunkX + updateX, map.lastChunkZ + updateZ, VD); ++ } ++ ++ for (int i = 0; i < PLAYERS; ++i) { ++ if (random.nextDouble() > TP_CHANCE) { ++ continue; ++ } ++ ++ int rad = i < BIG_PLAYERS ? RAD_BIG : RAD; ++ int posX = random.nextInt(-rad, rad + 1); ++ int posZ = random.nextInt(-rad, rad + 1); ++ ++ io.papermc.paper.chunk.system.RegionizedPlayerChunkLoader.SingleUserAreaMap map = walkers.get(i); ++ ++ map.update(posX, posZ, VD); ++ } ++ } ++ ++ ref.propagateUpdates(); ++ propagator.performUpdates(ticketLock, schedulingLock, null, null); ++ ++ if (!refMap.equals(levelMap)) { ++ throw new IllegalStateException("Error!"); ++ } ++ } ++ } ++ */ ++} +diff --git a/src/main/java/net/minecraft/server/level/Ticket.java b/src/main/java/net/minecraft/server/level/Ticket.java +index 768a2667f950a635a562fa8a0c75b31a3ae9190e..6b727f452ae8461edc0d734173f25817af1e3318 100644 +--- a/src/main/java/net/minecraft/server/level/Ticket.java ++++ b/src/main/java/net/minecraft/server/level/Ticket.java +@@ -7,10 +7,12 @@ public final class Ticket implements Comparable> { + private final int ticketLevel; + public final T key; + // Paper start - rewrite chunk system +- public final long removalTick; ++ // Folia start - use area based lock to reduce contention ++ public long removeDelay; + +- public Ticket(TicketType type, int level, T argument, long removalTick) { +- this.removalTick = removalTick; ++ public Ticket(TicketType type, int level, T argument, long removeDelay) { ++ this.removeDelay = removeDelay; ++ // Folia end - use area based lock to reduce contention + // Paper end - rewrite chunk system + this.type = type; + this.ticketLevel = level; +@@ -47,7 +49,7 @@ public final class Ticket implements Comparable> { + + @Override + public String toString() { +- return "Ticket[" + this.type + " " + this.ticketLevel + " (" + this.key + ")] to die on " + this.removalTick; // Paper - rewrite chunk system ++ return "Ticket[" + this.type + " " + this.ticketLevel + " (" + this.key + ")] to die in " + this.removeDelay; // Paper - rewrite chunk system // Folia - use area based lock to reduce contention + } + + public TicketType getType() { +diff --git a/src/main/java/net/minecraft/util/SortedArraySet.java b/src/main/java/net/minecraft/util/SortedArraySet.java +index d227b91defc3992f1a003a19264bc3aa29718795..2aee6ae5d588f36fe23ffd2a88a5a2a925b52b8e 100644 +--- a/src/main/java/net/minecraft/util/SortedArraySet.java ++++ b/src/main/java/net/minecraft/util/SortedArraySet.java +@@ -14,6 +14,14 @@ public class SortedArraySet extends AbstractSet { + T[] contents; + int size; + ++ // Folia start - use area based lock to reduce contention ++ public SortedArraySet(final SortedArraySet other) { ++ this.comparator = other.comparator; ++ this.size = other.size; ++ this.contents = Arrays.copyOf(other.contents, this.size); ++ } ++ // Folia end - use area based lock to reduce contention ++ + private SortedArraySet(int initialCapacity, Comparator comparator) { + this.comparator = comparator; + if (initialCapacity < 0) {