From 4d433968359d54b6d48c3e1cbb6a024aeaffe08f Mon Sep 17 00:00:00 2001 From: Oxan van Leeuwen Date: Wed, 10 Nov 2021 19:42:41 +0100 Subject: [PATCH] Clean-up string sanitation helpers (#2660) --- esphome/components/mqtt/mqtt_component.cpp | 4 +- esphome/core/entity_base.cpp | 2 +- esphome/core/helpers.cpp | 47 +++++++++------------- esphome/core/helpers.h | 28 ++++++------- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/esphome/components/mqtt/mqtt_component.cpp b/esphome/components/mqtt/mqtt_component.cpp index cebb8dd086..e3ae4dea50 100644 --- a/esphome/components/mqtt/mqtt_component.cpp +++ b/esphome/components/mqtt/mqtt_component.cpp @@ -17,7 +17,7 @@ static const char *const TAG = "mqtt.component"; void MQTTComponent::set_retain(bool retain) { this->retain_ = retain; } std::string MQTTComponent::get_discovery_topic_(const MQTTDiscoveryInfo &discovery_info) const { - std::string sanitized_name = sanitize_string_allowlist(App.get_name(), HOSTNAME_CHARACTER_ALLOWLIST); + std::string sanitized_name = str_sanitize(App.get_name()); return discovery_info.prefix + "/" + this->component_type() + "/" + sanitized_name + "/" + this->get_default_object_id_() + "/config"; } @@ -136,7 +136,7 @@ bool MQTTComponent::is_discovery_enabled() const { } std::string MQTTComponent::get_default_object_id_() const { - return sanitize_string_allowlist(to_lowercase_underscore(this->friendly_name()), HOSTNAME_CHARACTER_ALLOWLIST); + return str_sanitize(str_snake_case(this->friendly_name())); } void MQTTComponent::subscribe(const std::string &topic, mqtt_callback_t callback, uint8_t qos) { diff --git a/esphome/core/entity_base.cpp b/esphome/core/entity_base.cpp index 41f08b28a6..a9e1414018 100644 --- a/esphome/core/entity_base.cpp +++ b/esphome/core/entity_base.cpp @@ -35,7 +35,7 @@ const std::string &EntityBase::get_object_id() { return this->object_id_; } // Calculate Object ID Hash from Entity Name void EntityBase::calc_object_id_() { - this->object_id_ = sanitize_string_allowlist(to_lowercase_underscore(this->name_), HOSTNAME_CHARACTER_ALLOWLIST); + this->object_id_ = str_sanitize(str_snake_case(this->name_)); // FNV-1 hash this->object_id_hash_ = fnv1_hash(this->object_id_); } diff --git a/esphome/core/helpers.cpp b/esphome/core/helpers.cpp index 3047facf45..edd2f74c12 100644 --- a/esphome/core/helpers.cpp +++ b/esphome/core/helpers.cpp @@ -128,31 +128,6 @@ float gamma_uncorrect(float value, float gamma) { return powf(value, 1 / gamma); } -std::string to_lowercase_underscore(std::string s) { - std::transform(s.begin(), s.end(), s.begin(), ::tolower); - std::replace(s.begin(), s.end(), ' ', '_'); - return s; -} - -std::string sanitize_string_allowlist(const std::string &s, const std::string &allowlist) { - std::string out(s); - out.erase(std::remove_if(out.begin(), out.end(), - [&allowlist](const char &c) { return allowlist.find(c) == std::string::npos; }), - out.end()); - return out; -} - -std::string sanitize_hostname(const std::string &hostname) { - std::string s = sanitize_string_allowlist(hostname, HOSTNAME_CHARACTER_ALLOWLIST); - return truncate_string(s, 63); -} - -std::string truncate_string(const std::string &s, size_t length) { - if (s.length() > length) - return s.substr(0, length); - return s; -} - std::string value_accuracy_to_string(float value, int8_t accuracy_decimals) { if (accuracy_decimals < 0) { auto multiplier = powf(10.0f, accuracy_decimals); @@ -191,8 +166,6 @@ ParseOnOffState parse_on_off(const char *str, const char *on, const char *off) { return PARSE_NONE; } -const char *const HOSTNAME_CHARACTER_ALLOWLIST = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"; - uint8_t crc8(uint8_t *data, uint8_t len) { uint8_t crc = 0; @@ -481,4 +454,24 @@ IRAM_ATTR InterruptLock::InterruptLock() { portDISABLE_INTERRUPTS(); } IRAM_ATTR InterruptLock::~InterruptLock() { portENABLE_INTERRUPTS(); } #endif +// --------------------------------------------------------------------------------------------------------------------- + +std::string str_truncate(const std::string &str, size_t length) { + return str.length() > length ? str.substr(0, length) : str; +} +std::string str_snake_case(const std::string &str) { + std::string result; + result.resize(str.length()); + std::transform(str.begin(), str.end(), result.begin(), ::tolower); + std::replace(result.begin(), result.end(), ' ', '_'); + return result; +} +std::string str_sanitize(const std::string &str) { + std::string out; + std::copy_if(str.begin(), str.end(), std::back_inserter(out), [](const char &c) { + return c == '-' || c == '_' || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + }); + return out; +} + } // namespace esphome diff --git a/esphome/core/helpers.h b/esphome/core/helpers.h index f29af06d89..9a60d036e4 100644 --- a/esphome/core/helpers.h +++ b/esphome/core/helpers.h @@ -25,9 +25,6 @@ namespace esphome { -/// The characters that are allowed in a hostname. -extern const char *const HOSTNAME_CHARACTER_ALLOWLIST; - /// Read the raw MAC address into the provided byte array (6 bytes). void get_mac_address_raw(uint8_t *mac); @@ -55,14 +52,6 @@ std::string to_string(double val); std::string to_string(long double val); optional parse_hex(const std::string &str, size_t start, size_t length); optional parse_hex(char chr); -/// Sanitize the hostname by removing characters that are not in the allowlist and truncating it to 63 chars. -std::string sanitize_hostname(const std::string &hostname); - -/// Truncate a string to a specific length -std::string truncate_string(const std::string &s, size_t length); - -/// Convert the string to lowercase_underscore. -std::string to_lowercase_underscore(std::string s); /// Compare string a to string b (ignoring case) and return whether they are equal. bool str_equals_case_insensitive(const std::string &a, const std::string &b); @@ -145,9 +134,6 @@ std::string uint64_to_string(uint64_t num); /// Convert a uint32_t to a hex string std::string uint32_to_string(uint32_t num); -/// Sanitizes the input string with the allowlist. -std::string sanitize_string_allowlist(const std::string &s, const std::string &allowlist); - uint8_t reverse_bits_8(uint8_t x); uint16_t reverse_bits_16(uint16_t x); uint32_t reverse_bits_32(uint32_t x); @@ -331,6 +317,20 @@ template::value, int> = 0> constexpr ///@} +/// @name Strings +///@{ + +/// Truncate a string to a specific length. +std::string str_truncate(const std::string &str, size_t length); + +/// Convert the string to snake case (lowercase with underscores). +std::string str_snake_case(const std::string &str); + +/// Sanitizes the input string by removing all characters but alphanumerics, dashes and underscores. +std::string str_sanitize(const std::string &str); + +///@} + /// @name Parsing & formatting ///@{