Paper/patches/server/0239-Add-Early-Warning-Feature-to-WatchDog.patch

161 lines
9.9 KiB
Diff
Raw Normal View History

2021-06-11 14:02:28 +02:00
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: miclebrick <miclebrick@outlook.com>
Date: Wed, 8 Aug 2018 15:30:52 -0400
Subject: [PATCH] Add Early Warning Feature to WatchDog
Detect when the server has been hung for a long duration, and start printing
thread dumps at an interval until the point of crash.
This will help diagnose what was going on in that time before the crash.
diff --git a/src/main/java/net/minecraft/server/MinecraftServer.java b/src/main/java/net/minecraft/server/MinecraftServer.java
index bb7da8b167b72f2b1fec2be8838545ae4ed71eb3..eef3b01d1304e06d0c9971df82af54d90676957a 100644
2021-06-11 14:02:28 +02:00
--- a/src/main/java/net/minecraft/server/MinecraftServer.java
+++ b/src/main/java/net/minecraft/server/MinecraftServer.java
2022-12-07 20:22:28 +01:00
@@ -1051,6 +1051,7 @@ public abstract class MinecraftServer extends ReentrantBlockableEventLoop<TickTa
2022-06-07 23:45:11 +02:00
this.updateStatusIcon(this.status);
2021-06-11 14:02:28 +02:00
2022-06-07 23:45:11 +02:00
// Spigot start
2022-06-08 06:22:42 +02:00
+ org.spigotmc.WatchdogThread.hasStarted = true; // Paper
2022-06-07 23:45:11 +02:00
Arrays.fill( recentTps, 20 );
2022-06-08 06:22:42 +02:00
long start = System.nanoTime(), curTime, tickSection = start; // Paper - Further improve server tick loop
lastTick = start - TICK_TIME; // Paper
diff --git a/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java b/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java
2022-12-07 20:22:28 +01:00
index e6826cd0a596f063e8737dcde3c8c6c5b3f71970..1a2607d1b257cea65c82c661a6b3d46c1526a775 100644
--- a/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java
+++ b/src/main/java/net/minecraft/server/dedicated/DedicatedServer.java
2022-12-07 20:22:28 +01:00
@@ -197,6 +197,7 @@ public class DedicatedServer extends MinecraftServer implements ServerInterface
// Paper start
paperConfigurations.initializeGlobalConfiguration();
paperConfigurations.initializeWorldDefaultsConfiguration();
+ org.spigotmc.WatchdogThread.doStart(org.spigotmc.SpigotConfig.timeoutTime, org.spigotmc.SpigotConfig.restartOnCrash);
2022-06-09 23:43:27 +02:00
io.papermc.paper.command.PaperCommands.registerCommands(this);
com.destroystokyo.paper.Metrics.PaperMetrics.startMetrics();
com.destroystokyo.paper.VersionHistoryManager.INSTANCE.getClass(); // load version history now
2021-06-11 14:02:28 +02:00
diff --git a/src/main/java/org/bukkit/craftbukkit/CraftServer.java b/src/main/java/org/bukkit/craftbukkit/CraftServer.java
2023-02-19 15:57:10 +01:00
index 86d8e08b19316bb202b62bd7ac1187eb4827e5c5..ccb176c7292aa49780ef21f7a17d97ca7ef53794 100644
2021-06-11 14:02:28 +02:00
--- a/src/main/java/org/bukkit/craftbukkit/CraftServer.java
+++ b/src/main/java/org/bukkit/craftbukkit/CraftServer.java
2023-02-19 15:57:10 +01:00
@@ -906,6 +906,7 @@ public final class CraftServer implements Server {
2021-06-11 14:02:28 +02:00
@Override
public void reload() {
+ org.spigotmc.WatchdogThread.hasStarted = false; // Paper - Disable watchdog early timeout on reload
2021-06-13 01:45:00 +02:00
this.reloadCount++;
this.configuration = YamlConfiguration.loadConfiguration(this.getConfigFile());
this.commandsConfiguration = YamlConfiguration.loadConfiguration(this.getCommandsConfigFile());
2023-02-19 15:57:10 +01:00
@@ -994,6 +995,7 @@ public final class CraftServer implements Server {
2021-06-13 01:45:00 +02:00
this.enablePlugins(PluginLoadOrder.STARTUP);
this.enablePlugins(PluginLoadOrder.POSTWORLD);
this.getPluginManager().callEvent(new ServerLoadEvent(ServerLoadEvent.LoadType.RELOAD));
2021-06-11 14:02:28 +02:00
+ org.spigotmc.WatchdogThread.hasStarted = true; // Paper - Disable watchdog early timeout on reload
}
@Override
diff --git a/src/main/java/org/spigotmc/SpigotConfig.java b/src/main/java/org/spigotmc/SpigotConfig.java
2022-12-07 20:22:28 +01:00
index ddc5f2d9aa7dda6aff132392927e3d7e3674dbff..3ac48dafe2300ff4cf4591569fec9ce4916503cd 100644
2021-06-11 14:02:28 +02:00
--- a/src/main/java/org/spigotmc/SpigotConfig.java
+++ b/src/main/java/org/spigotmc/SpigotConfig.java
@@ -229,7 +229,7 @@ public class SpigotConfig
2021-06-13 01:45:00 +02:00
SpigotConfig.restartScript = SpigotConfig.getString( "settings.restart-script", SpigotConfig.restartScript );
SpigotConfig.restartMessage = SpigotConfig.transform( SpigotConfig.getString( "messages.restart", "Server is restarting" ) );
SpigotConfig.commands.put( "restart", new RestartCommand( "restart" ) );
2021-06-11 14:02:28 +02:00
- WatchdogThread.doStart( timeoutTime, restartOnCrash );
+ // WatchdogThread.doStart( timeoutTime, restartOnCrash ); // Paper - moved to after paper config initialization
2021-06-11 14:02:28 +02:00
}
public static boolean bungee;
diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java
Rewrite chunk system (#8177) Patch documentation to come Issues with the old system that are fixed now: - World generation does not scale with cpu cores effectively. - Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps. - Unreliable prioritisation of chunk gen/load calls that block the main thread. - Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved. - Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal. - Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles. The above list is not complete. The patch documentation will complete it. New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil. Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft. The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 10:02:51 +02:00
index a9897c494b3dc56d900356d74030359832febbaa..b47d043144c499b1499f6b4be5a16a3f75c9fcb8 100644
2021-06-11 14:02:28 +02:00
--- a/src/main/java/org/spigotmc/WatchdogThread.java
+++ b/src/main/java/org/spigotmc/WatchdogThread.java
Rewrite chunk system (#8177) Patch documentation to come Issues with the old system that are fixed now: - World generation does not scale with cpu cores effectively. - Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps. - Unreliable prioritisation of chunk gen/load calls that block the main thread. - Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved. - Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal. - Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles. The above list is not complete. The patch documentation will complete it. New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil. Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft. The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 10:02:51 +02:00
@@ -14,6 +14,10 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
2021-06-11 14:02:28 +02:00
private static WatchdogThread instance;
private long timeoutTime;
private boolean restart;
+ private final long earlyWarningEvery; // Paper - Timeout time for just printing a dump but not restarting
+ private final long earlyWarningDelay; // Paper
+ public static volatile boolean hasStarted; // Paper
+ private long lastEarlyWarning; // Paper - Keep track of short dump times to avoid spamming console with short dumps
private volatile long lastTick;
private volatile boolean stopping;
Rewrite chunk system (#8177) Patch documentation to come Issues with the old system that are fixed now: - World generation does not scale with cpu cores effectively. - Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps. - Unreliable prioritisation of chunk gen/load calls that block the main thread. - Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved. - Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal. - Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles. The above list is not complete. The patch documentation will complete it. New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil. Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft. The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 10:02:51 +02:00
@@ -22,6 +26,8 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
2021-06-11 14:02:28 +02:00
super( "Paper Watchdog Thread" );
this.timeoutTime = timeoutTime;
this.restart = restart;
+ earlyWarningEvery = Math.min(io.papermc.paper.configuration.GlobalConfiguration.get().watchdog.earlyWarningEvery, timeoutTime); // Paper
+ earlyWarningDelay = Math.min(io.papermc.paper.configuration.GlobalConfiguration.get().watchdog.earlyWarningDelay, timeoutTime); // Paper
2021-06-11 14:02:28 +02:00
}
private static long monotonicMillis()
Rewrite chunk system (#8177) Patch documentation to come Issues with the old system that are fixed now: - World generation does not scale with cpu cores effectively. - Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps. - Unreliable prioritisation of chunk gen/load calls that block the main thread. - Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved. - Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal. - Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles. The above list is not complete. The patch documentation will complete it. New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil. Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft. The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 10:02:51 +02:00
@@ -61,9 +67,18 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
2021-06-13 01:45:00 +02:00
while ( !this.stopping )
2021-06-11 14:02:28 +02:00
{
2021-06-13 01:45:00 +02:00
//
- if ( this.lastTick != 0 && this.timeoutTime > 0 && WatchdogThread.monotonicMillis() > this.lastTick + this.timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
2021-06-11 14:02:28 +02:00
+ // Paper start
+ Logger log = Bukkit.getServer().getLogger();
2021-06-13 01:45:00 +02:00
+ long currentTime = WatchdogThread.monotonicMillis();
+ if ( this.lastTick != 0 && this.timeoutTime > 0 && currentTime > this.lastTick + this.earlyWarningEvery && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
2021-06-11 14:02:28 +02:00
{
- Logger log = Bukkit.getServer().getLogger();
+ boolean isLongTimeout = currentTime > lastTick + timeoutTime;
+ // Don't spam early warning dumps
+ if ( !isLongTimeout && (earlyWarningEvery <= 0 || !hasStarted || currentTime < lastEarlyWarning + earlyWarningEvery || currentTime < lastTick + earlyWarningDelay)) continue;
+ if ( !isLongTimeout && MinecraftServer.getServer().hasStopped()) continue; // Don't spam early watchdog warnings during shutdown, we'll come back to this...
+ lastEarlyWarning = currentTime;
+ if (isLongTimeout) {
+ // Paper end
log.log( Level.SEVERE, "------------------------------" );
log.log( Level.SEVERE, "The server has stopped responding! This is (probably) not a Paper bug." ); // Paper
log.log( Level.SEVERE, "If you see a plugin in the Server thread dump below, then please report it to that author" );
Rewrite chunk system (#8177) Patch documentation to come Issues with the old system that are fixed now: - World generation does not scale with cpu cores effectively. - Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps. - Unreliable prioritisation of chunk gen/load calls that block the main thread. - Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved. - Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal. - Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles. The above list is not complete. The patch documentation will complete it. New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil. Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft. The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 10:02:51 +02:00
@@ -93,30 +108,46 @@ public final class WatchdogThread extends io.papermc.paper.util.TickThread // Pa
2021-06-11 14:02:28 +02:00
}
}
// Paper end
+ } else
+ {
+ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH - " + Bukkit.getServer().getVersion() + " ---");
+ log.log(Level.SEVERE, "The server has not responded for " + (currentTime - lastTick) / 1000 + " seconds! Creating thread dump");
+ }
+ // Paper end - Different message for short timeout
log.log( Level.SEVERE, "------------------------------" );
log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" ); // Paper
Rewrite chunk system (#8177) Patch documentation to come Issues with the old system that are fixed now: - World generation does not scale with cpu cores effectively. - Relies on the main thread for scheduling and maintaining chunk state, dropping chunk load/generate rates at lower tps. - Unreliable prioritisation of chunk gen/load calls that block the main thread. - Shutdown logic is utterly unreliable, as it has to wait for all chunks to unload - is it guaranteed that the chunk system is in a state on shutdown that it can reliably do this? Watchdog shutdown also typically failed due to thread checks, which is now resolved. - Saving of data is not unified (i.e can save chunk data without saving entity data, poses problems for desync if shutdown is really abnormal. - Entities are not loaded with chunks. This caused quite a bit of headache for Chunk#getEntities API, but now the new chunk system loads entities with chunks so that they are ready whenever the chunk loads in. Effectively brings the behavior back to 1.16 era, but still storing entities in their own separate regionfiles. The above list is not complete. The patch documentation will complete it. New chunk system hard relies on starlight and dataconverter, and most importantly the new concurrent utilities in ConcurrentUtil. Some of the old async chunk i/o interface (i.e the old file io thread reroutes _some_ calls to the new file io thread) is kept for plugin compat reasons. It will be removed in the next major version of minecraft. The old legacy chunk system patches have been moved to the removed folder in case we need them again.
2022-09-26 10:02:51 +02:00
io.papermc.paper.chunk.system.scheduling.ChunkTaskScheduler.dumpAllChunkLoadInfo(isLongTimeout); // Paper // Paper - rewrite chunk system
2021-06-13 01:45:00 +02:00
WatchdogThread.dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().serverThread.getId(), Integer.MAX_VALUE ), log );
2021-06-11 14:02:28 +02:00
log.log( Level.SEVERE, "------------------------------" );
//
+ // Paper start - Only print full dump on long timeouts
+ if ( isLongTimeout )
+ {
log.log( Level.SEVERE, "Entire Thread Dump:" );
ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true );
for ( ThreadInfo thread : threads )
{
2021-06-13 01:45:00 +02:00
WatchdogThread.dumpThread( thread, log );
2021-06-11 14:02:28 +02:00
}
+ } else {
+ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---");
+ }
+
log.log( Level.SEVERE, "------------------------------" );
+ if ( isLongTimeout )
+ {
2021-06-13 01:45:00 +02:00
if ( this.restart && !MinecraftServer.getServer().hasStopped() )
2021-06-11 14:02:28 +02:00
{
RestartCommand.restart();
}
break;
+ } // Paper end
}
try
{
- sleep( 10000 );
+ sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout
} catch ( InterruptedException ex )
{
interrupt();