Paper/Spigot-Server-Patches/0536-Optimize-Bit-Operations-by-inlining.patch

243 lines
10 KiB
Diff
Raw Normal View History

Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Aikar <aikar@aikar.co>
Date: Thu, 4 Jun 2020 02:24:49 -0400
Subject: [PATCH] Optimize Bit Operations by inlining
Inline bit operations and reduce instruction count to make these hot
operations faster
diff --git a/src/main/java/net/minecraft/server/BlockPosition.java b/src/main/java/net/minecraft/server/BlockPosition.java
index 3bf17ccdaef21322b787db538d569e0bc614ef22..4c64798fbc50f4d8b08502ba865c5fde5c968e62 100644
--- a/src/main/java/net/minecraft/server/BlockPosition.java
+++ b/src/main/java/net/minecraft/server/BlockPosition.java
@@ -75,38 +75,34 @@ public class BlockPosition extends BaseBlockPosition implements MinecraftSeriali
return dynamicops.createIntList(IntStream.of(new int[]{this.getX(), this.getY(), this.getZ()}));
}
+ public static long getAdjacent(int baseX, int baseY, int baseZ, EnumDirection enumdirection) { return asLong(baseX + enumdirection.getAdjacentX(), baseY + enumdirection.getAdjacentY(), baseZ + enumdirection.getAdjacentZ()); } // Paper
public static long a(long i, EnumDirection enumdirection) {
return a(i, enumdirection.getAdjacentX(), enumdirection.getAdjacentY(), enumdirection.getAdjacentZ());
}
public static long a(long i, int j, int k, int l) {
- return a(b(i) + j, c(i) + k, d(i) + l);
+ return a((int) (i >> 38) + j, (int) ((i << 52) >> 52) + k, (int) ((i << 26) >> 38) + l); // Paper - simplify/inline
}
public static int b(long i) {
- return (int) (i << 64 - BlockPosition.k - BlockPosition.c >> 64 - BlockPosition.c);
+ return (int) (i >> 38); // Paper - simplify/inline
}
public static int c(long i) {
- return (int) (i << 64 - BlockPosition.f >> 64 - BlockPosition.f);
+ return (int) ((i << 52) >> 52); // Paper - simplify/inline
}
public static int d(long i) {
- return (int) (i << 64 - BlockPosition.j - BlockPosition.d >> 64 - BlockPosition.d);
+ return (int) ((i << 26) >> 38); // Paper - simplify/inline
}
public static BlockPosition fromLong(long i) {
- return new BlockPosition(b(i), c(i), d(i));
+ return new BlockPosition((int) (i >> 38), (int) ((i << 52) >> 52), (int) ((i << 26) >> 38)); // Paper - simplify/inline
}
public static long asLong(int x, int y, int z) { return a(x, y, z); } // Paper - OBFHELPER
public static long a(int i, int j, int k) {
- long l = 0L;
-
- l |= ((long) i & BlockPosition.g) << BlockPosition.k;
- l |= ((long) j & BlockPosition.h) << 0;
- l |= ((long) k & BlockPosition.i) << BlockPosition.j;
- return l;
+ return (((long) i & (long) 67108863) << 38) | (((long) j & (long) 4095)) | (((long) k & (long) 67108863) << 12); // Paper - inline constants and simplify
}
public static long f(long i) {
diff --git a/src/main/java/net/minecraft/server/SectionPosition.java b/src/main/java/net/minecraft/server/SectionPosition.java
index ca3f63f5e5e4e926de562b4bfbbf72ad8a6b943f..22bc96f97d22e0748110982e4b1ff85002f924b5 100644
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
--- a/src/main/java/net/minecraft/server/SectionPosition.java
+++ b/src/main/java/net/minecraft/server/SectionPosition.java
@@ -16,7 +16,7 @@ public class SectionPosition extends BaseBlockPosition {
}
public static SectionPosition a(BlockPosition blockposition) {
- return new SectionPosition(a(blockposition.getX()), a(blockposition.getY()), a(blockposition.getZ()));
+ return new SectionPosition(blockposition.getX() >> 4, blockposition.getY() >> 4, blockposition.getZ() >> 4); // Paper
}
public static SectionPosition a(ChunkCoordIntPair chunkcoordintpair, int i) {
@@ -28,15 +28,23 @@ public class SectionPosition extends BaseBlockPosition {
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
}
public static SectionPosition a(long i) {
- return new SectionPosition(b(i), c(i), d(i));
+ return new SectionPosition((int) (i >> 42), (int) (i << 44 >> 44), (int) (i << 22 >> 42)); // Paper
}
public static long a(long i, EnumDirection enumdirection) {
return a(i, enumdirection.getAdjacentX(), enumdirection.getAdjacentY(), enumdirection.getAdjacentZ());
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
}
+ // Paper start
+ public static long getAdjacentFromBlockPos(int x, int y, int z, EnumDirection enumdirection) {
+ return (((long) ((x >> 4) + enumdirection.getAdjacentX()) & 4194303L) << 42) | (((long) ((y >> 4) + enumdirection.getAdjacentY()) & 1048575L)) | (((long) ((z >> 4) + enumdirection.getAdjacentZ()) & 4194303L) << 20);
+ }
+ public static long getAdjacentFromSectionPos(int x, int y, int z, EnumDirection enumdirection) {
+ return (((long) (x + enumdirection.getAdjacentX()) & 4194303L) << 42) | (((long) ((y) + enumdirection.getAdjacentY()) & 1048575L)) | (((long) (z + enumdirection.getAdjacentZ()) & 4194303L) << 20);
+ }
+ // Paper end
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
public static long a(long i, int j, int k, int l) {
- return b(b(i) + j, c(i) + k, d(i) + l);
+ return (((long) ((int) (i >> 42) + j) & 4194303L) << 42) | (((long) ((int) (i << 44 >> 44) + k) & 1048575L)) | (((long) ((int) (i << 22 >> 42) + l) & 4194303L) << 20); // Simplify to reduce instruction count
}
public static int a(int i) {
@@ -48,11 +56,7 @@ public class SectionPosition extends BaseBlockPosition {
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
}
public static short b(BlockPosition blockposition) {
- int i = b(blockposition.getX());
- int j = b(blockposition.getY());
- int k = b(blockposition.getZ());
-
- return (short) (i << 8 | k << 4 | j);
+ return (short) ((blockposition.x & 15) << 8 | (blockposition.z & 15) << 4 | blockposition.y & 15); // Paper - simplify/inline
}
public static int c(int i) {
@@ -60,7 +64,7 @@ public class SectionPosition extends BaseBlockPosition {
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
}
public static int b(long i) {
- return (int) (i << 0 >> 42);
+ return (int) (i >> 42); // Paper
}
public static int c(long i) {
@@ -71,44 +75,46 @@ public class SectionPosition extends BaseBlockPosition {
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
return (int) (i << 22 >> 42);
}
- public int a() {
- return this.getX();
+ public final int a() { // Paper
+ return x; // Paper
}
- public int b() {
- return this.getY();
+ public final int b() { // Paper
+ return y; // Paper
}
- public int c() {
- return this.getZ();
+ public final int c() { // Paper
+ return z; // Paper
}
- public int d() {
- return this.a() << 4;
+ public final int d() { // Paper
+ return x << 4; // Paper
}
- public int e() {
- return this.b() << 4;
+ public final int e() { // Paper
+ return y << 4; // Paper
}
- public int f() {
- return this.c() << 4;
+ public final int f() { // Paper
+ return z << 4; // Paper
}
- public int g() {
- return (this.a() << 4) + 15;
+ public final int g() { // Paper
+ return (x << 4) + 15; // Paper
}
- public int h() {
- return (this.b() << 4) + 15;
+ public final int h() { // Paper
+ return (y << 4) + 15; // Paper
}
- public int r() {
- return (this.c() << 4) + 15;
+ public final int r() { // Paper
+ return (z << 4) + 15; // Paper
}
+ public static long blockToSection(long i) { return e(i); } // Paper - OBFHELPER
public static long e(long i) {
- return b(a(BlockPosition.b(i)), a(BlockPosition.c(i)), a(BlockPosition.d(i)));
+ // b(a(BlockPosition.b(i)), a(BlockPosition.c(i)), a(BlockPosition.d(i)));
+ return (((long) (int) (i >> 42) & 4194303L) << 42) | (((long) (int) ((i << 52) >> 56) & 1048575L)) | (((long) (int) ((i << 26) >> 42) & 4194303L) << 20); // Simplify to reduce instruction count
}
public static long f(long i) {
@@ -116,7 +122,7 @@ public class SectionPosition extends BaseBlockPosition {
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
}
public BlockPosition s() {
- return new BlockPosition(c(this.a()), c(this.b()), c(this.c()));
+ return new BlockPosition(x << 4, y << 4, z << 4); // Paper
}
public BlockPosition t() {
@@ -129,36 +135,30 @@ public class SectionPosition extends BaseBlockPosition {
Optimize Light Engine Massive update to light to improve performance and chunk loading/generation. 1) Massive bit packing/unpacking optimizations and inlining. A lot of performance has to do with constant packing and unpacking of bits. We now inline a most bit operations, and re-use base x/y/z bits in many places. This helps with cpu level processing to just do all the math at once instead of having to jump in and out of function calls. This much logic also is likely over the JVM Inline limit for JIT too. 2) Applied a few of JellySquid's Phosphor mod optimizations such as - ensuring we don't notify neighbor chunks when neighbor chunk doesn't need to be notified - reduce hasLight checks in initializing light, and prob some more, they are tagged JellySquid where phosphor influence was used. 3) Optimize hot path accesses to getting updating chunk to have less branching 4) Optimize getBlock accesses to have less branching, and less unpacking 5) Have a separate urgent bucket for chunk light tasks. These tasks will always cut in line over non blocking light tasks. 6) Retain chunk priority while light tasks are enqueued. So if a task comes in at high priority but the queue is full of tasks already at a lower priority, before the task was simply added to the end. Now it can cut in line to the front. this applies for both urgent and non urgent tasks. 7) Buffer non urgent tasks even if queueUpdate is called multiple times to improve efficiency. 8) Fix NPE risk that crashes server in getting nibble data Fixes #3489 Fixes #3363
2020-06-05 07:25:11 +02:00
return new ChunkCoordIntPair(this.a(), this.c());
}
+ // Paper start
+ public static long blockPosAsSectionLong(int i, int j, int k) {
+ return (((long) (i >> 4) & 4194303L) << 42) | (((long) (j >> 4) & 1048575L)) | (((long) (k >> 4) & 4194303L) << 20);
+ }
+ // Paper end
+ public static long asLong(int i, int j, int k) { return b(i, j, k); } // Paper - OBFHELPER
public static long b(int i, int j, int k) {
- long l = 0L;
-
- l |= ((long) i & 4194303L) << 42;
- l |= ((long) j & 1048575L) << 0;
- l |= ((long) k & 4194303L) << 20;
- return l;
+ return (((long) i & 4194303L) << 42) | (((long) j & 1048575L)) | (((long) k & 4194303L) << 20); // Paper - Simplify to reduce instruction count
}
public long v() {
- return b(this.a(), this.b(), this.c());
+ return (((long) x & 4194303L) << 42) | (((long) y & 1048575L)) | (((long) z & 4194303L) << 20); // Paper - Simplify to reduce instruction count
}
public Stream<BlockPosition> w() {
- return BlockPosition.a(this.d(), this.e(), this.f(), this.g(), this.h(), this.r());
+ return BlockPosition.a(x << 4, y << 4, z << 4, (x << 4) + 15, (y << 4) + 15, (z << 4) + 15); // Paper - simplify/inline
}
public static Stream<SectionPosition> a(SectionPosition sectionposition, int i) {
- int j = sectionposition.a();
- int k = sectionposition.b();
- int l = sectionposition.c();
-
- return a(j - i, k - i, l - i, j + i, k + i, l + i);
+ return a(sectionposition.x - i, sectionposition.y - i, sectionposition.z - i, sectionposition.x + i, sectionposition.y + i, sectionposition.z + i); // Paper - simplify/inline
}
public static Stream<SectionPosition> b(ChunkCoordIntPair chunkcoordintpair, int i) {
- int j = chunkcoordintpair.x;
- int k = chunkcoordintpair.z;
-
- return a(j - i, 0, k - i, j + i, 15, k + i);
+ return a(chunkcoordintpair.x - i, 0, chunkcoordintpair.z - i, chunkcoordintpair.x + i, 15, chunkcoordintpair.z + i); // Paper - simplify/inline
}
public static Stream<SectionPosition> a(final int i, final int j, final int k, final int l, final int i1, final int j1) {