Paper/patches/server/0240-Add-Early-Warning-Feature-to-WatchDog.patch
Spottedleaf 01a13871de
Rewrite chunk system (#8177)
Patch documentation to come

Issues with the old system that are fixed now:
- World generation does not scale with cpu cores effectively.
- Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps.
- Unreliable prioritisation of chunk gen/load calls that block the main thread.
- Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved.
- Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal.
- Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles.

The above list is not complete. The patch documentation will complete it.

New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil.

Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft.

The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 01:02:51 -07:00

161 lines
9.9 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: miclebrick <miclebrick@outlook.com>
Date: Wed, 8 Aug 2018 15:30:52 -0400
Subject: [PATCH] Add Early Warning Feature to WatchDog
Detect when the server has been hung for a long duration, and start printing
thread dumps at an interval until the point of crash.
This will help diagnose what was going on in that time before the crash.
diff --git a/src/main/java/net/minecraft/server/MinecraftServer.java b/src/main/java/net/minecraft/server/MinecraftServer.java
index 65b9dfa62a7785b4889d8edac0000487f3fc5367..d79ac1e7a1301e3e5d4e6caecc95bf320e5203e5 100644
--- a/src/main/java/net/minecraft/server/MinecraftServer.java
+++ b/src/main/java/net/minecraft/server/MinecraftServer.java
@@ -1044,6 +1044,7 @@ public abstract class MinecraftServer extends ReentrantBlockableEventLoop<TickTa
this.updateStatusIcon(this.status);
// Spigot start
+ org.spigotmc.WatchdogThread.hasStarted = true; // Paper
Arrays.fill( recentTps, 20 );
long start = System.nanoTime(), curTime, tickSection = start; // Paper - Further improve server tick loop
lastTick = start - TICK_TIME; // Paper
diff --git a/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java b/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java
index ed3662f2d237fc6a11cb3f6a2e476dc014ba4586..0944d339c76b09f183b3a1e191955300fc24cd97 100644
--- a/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java
+++ b/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java
@@ -202,6 +202,7 @@ public class DedicatedServer extends MinecraftServer implements ServerInterface
// Paper start
paperConfigurations.initializeGlobalConfiguration();
paperConfigurations.initializeWorldDefaultsConfiguration();
+ org.spigotmc.WatchdogThread.doStart(org.spigotmc.SpigotConfig.timeoutTime, org.spigotmc.SpigotConfig.restartOnCrash);
io.papermc.paper.command.PaperCommands.registerCommands(this);
com.destroystokyo.paper.Metrics.PaperMetrics.startMetrics();
com.destroystokyo.paper.VersionHistoryManager.INSTANCE.getClass(); // load version history now
diff --git a/src/main/java/org/bukkit/craftbukkit/CraftServer.java b/src/main/java/org/bukkit/craftbukkit/CraftServer.java
index a60b93a15d5cf80745eb114b393465ff1c53f444..cf9f10a61f87130a4a58c09edce7ef14fbc6ae30 100644
--- a/src/main/java/org/bukkit/craftbukkit/CraftServer.java
+++ b/src/main/java/org/bukkit/craftbukkit/CraftServer.java
@@ -911,6 +911,7 @@ public final class CraftServer implements Server {
@Override
public void reload() {
+ org.spigotmc.WatchdogThread.hasStarted = false; // Paper - Disable watchdog early timeout on reload
this.reloadCount++;
this.configuration = YamlConfiguration.loadConfiguration(this.getConfigFile());
this.commandsConfiguration = YamlConfiguration.loadConfiguration(this.getCommandsConfigFile());
@@ -999,6 +1000,7 @@ public final class CraftServer implements Server {
this.enablePlugins(PluginLoadOrder.STARTUP);
this.enablePlugins(PluginLoadOrder.POSTWORLD);
this.getPluginManager().callEvent(new ServerLoadEvent(ServerLoadEvent.LoadType.RELOAD));
+ org.spigotmc.WatchdogThread.hasStarted = true; // Paper - Disable watchdog early timeout on reload
}
@Override
diff --git a/src/main/java/org/spigotmc/SpigotConfig.java b/src/main/java/org/spigotmc/SpigotConfig.java
index 645643d102118ad4916a152abfb9ab0d02751e11..edc5f195cc3de8885b839469656650ba465346be 100644
--- a/src/main/java/org/spigotmc/SpigotConfig.java
+++ b/src/main/java/org/spigotmc/SpigotConfig.java
@@ -229,7 +229,7 @@ public class SpigotConfig
SpigotConfig.restartScript = SpigotConfig.getString( "settings.restart-script", SpigotConfig.restartScript );
SpigotConfig.restartMessage = SpigotConfig.transform( SpigotConfig.getString( "messages.restart", "Server is restarting" ) );
SpigotConfig.commands.put( "restart", new RestartCommand( "restart" ) );
- WatchdogThread.doStart( timeoutTime, restartOnCrash );
+ // WatchdogThread.doStart( timeoutTime, restartOnCrash ); // Paper - moved to after paper config initialization
}
public static boolean bungee;
diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java
index a9897c494b3dc56d900356d74030359832febbaa..b47d043144c499b1499f6b4be5a16a3f75c9fcb8 100644
--- a/src/main/java/org/spigotmc/WatchdogThread.java
+++ b/src/main/java/org/spigotmc/WatchdogThread.java
@@ -14,6 +14,10 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
private static WatchdogThread instance;
private long timeoutTime;
private boolean restart;
+ private final long earlyWarningEvery; // Paper - Timeout time for just printing a dump but not restarting
+ private final long earlyWarningDelay; // Paper
+ public static volatile boolean hasStarted; // Paper
+ private long lastEarlyWarning; // Paper - Keep track of short dump times to avoid spamming console with short dumps
private volatile long lastTick;
private volatile boolean stopping;
@@ -22,6 +26,8 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
super( "Paper Watchdog Thread" );
this.timeoutTime = timeoutTime;
this.restart = restart;
+ earlyWarningEvery = Math.min(io.papermc.paper.configuration.GlobalConfiguration.get().watchdog.earlyWarningEvery, timeoutTime); // Paper
+ earlyWarningDelay = Math.min(io.papermc.paper.configuration.GlobalConfiguration.get().watchdog.earlyWarningDelay, timeoutTime); // Paper
}
private static long monotonicMillis()
@@ -61,9 +67,18 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
while ( !this.stopping )
{
//
- if ( this.lastTick != 0 && this.timeoutTime > 0 && WatchdogThread.monotonicMillis() > this.lastTick + this.timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
+ // Paper start
+ Logger log = Bukkit.getServer().getLogger();
+ long currentTime = WatchdogThread.monotonicMillis();
+ if ( this.lastTick != 0 && this.timeoutTime > 0 && currentTime > this.lastTick + this.earlyWarningEvery && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
{
- Logger log = Bukkit.getServer().getLogger();
+ boolean isLongTimeout = currentTime > lastTick + timeoutTime;
+ // Don't spam early warning dumps
+ if ( !isLongTimeout && (earlyWarningEvery <= 0 || !hasStarted || currentTime < lastEarlyWarning + earlyWarningEvery || currentTime < lastTick + earlyWarningDelay)) continue;
+ if ( !isLongTimeout && MinecraftServer.getServer().hasStopped()) continue; // Don't spam early watchdog warnings during shutdown, we'll come back to this...
+ lastEarlyWarning = currentTime;
+ if (isLongTimeout) {
+ // Paper end
log.log( Level.SEVERE, "------------------------------" );
log.log( Level.SEVERE, "The server has stopped responding! This is (probably) not a Paper bug." ); // Paper
log.log( Level.SEVERE, "If you see a plugin in the Server thread dump below, then please report it to that author" );
@@ -93,30 +108,46 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
}
}
// Paper end
+ } else
+ {
+ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH - " + Bukkit.getServer().getVersion() + " ---");
+ log.log(Level.SEVERE, "The server has not responded for " + (currentTime - lastTick) / 1000 + " seconds! Creating thread dump");
+ }
+ // Paper end - Different message for short timeout
log.log( Level.SEVERE, "------------------------------" );
log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" ); // Paper
io.papermc.paper.chunk.system.scheduling.ChunkTaskScheduler.dumpAllChunkLoadInfo(isLongTimeout); // Paper // Paper - rewrite chunk system
WatchdogThread.dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().serverThread.getId(), Integer.MAX_VALUE ), log );
log.log( Level.SEVERE, "------------------------------" );
//
+ // Paper start - Only print full dump on long timeouts
+ if ( isLongTimeout )
+ {
log.log( Level.SEVERE, "Entire Thread Dump:" );
ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true );
for ( ThreadInfo thread : threads )
{
WatchdogThread.dumpThread( thread, log );
}
+ } else {
+ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---");
+ }
+
log.log( Level.SEVERE, "------------------------------" );
+ if ( isLongTimeout )
+ {
if ( this.restart && !MinecraftServer.getServer().hasStopped() )
{
RestartCommand.restart();
}
break;
+ } // Paper end
}
try
{
- sleep( 10000 );
+ sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout
} catch ( InterruptedException ex )
{
interrupt();