tokenizer update: co

This commit is contained in:
Kendell R 2024-04-22 16:30:04 -07:00
parent 27a750c2e0
commit 7d6a9eab35
No known key found for this signature in database
GPG Key ID: 64314E306EEF6109
3 changed files with 4 additions and 2 deletions

View File

@ -16,7 +16,7 @@ def tokenize(string):
string = re.sub(r"['] ", " ' ", string)
string = re.sub(r"[\"“”] ", " '' ", string)
string = re.sub(r"[-+]?[.\d]*[\d]+[:,.\d]*|²", " <number> ", string)
string = string.replace("b-parasite", "b parasite").replace("nfc/rfid", "nfc rfid").replace("fastled", "fast led").replace("neopixelbus", "neopixel bus").replace("neopixel", "neo pixel").replace("h-bridge", "h bridge").replace("rgbw", "rgb white").replace("rgbww", "rgb cold warm").replace("rgbct", "rgb temperature brightness").replace("faqs", "frequently asked questions").replace("faq", "frequently asked questions").replace("cannot", "can not").replace("addressable", "addressed").replace("automations", "automation")
string = string.replace("b-parasite", "b parasite").replace("nfc/rfid", "nfc rfid").replace("fastled", "fast led").replace("neopixelbus", "neopixel bus").replace("neopixel", "neo pixel").replace("h-bridge", "h bridge").replace("eco_", "co").replace("co_", "co").replace("rgbw", "rgb white").replace("rgbww", "rgb cold warm").replace("rgbct", "rgb temperature brightness").replace("faqs", "frequently asked questions").replace("faq", "frequently asked questions").replace("cannot", "can not").replace("addressable", "addressed").replace("automations", "automation")
string = re.sub(r"\bha\b", "home assistant", string)
string = re.sub(r"\badc\b", "analog digital converter", string)
string = re.sub(r"\s+", " ", string)

View File

@ -500,7 +500,7 @@ closed 6.351 0.387 -0.175 -0.390 -0.382 0.483 -0.599 1.091 0.143 0.689 -0.685 0.
closing 5.658 0.400 0.421 1.001 -0.108 0.451 -0.248 1.231 0.380 0.825 -1.032 1.004 -0.936 -2.700 0.410 -0.260 0.404 0.107 0.320 -0.481 -0.271 -0.230 -0.496 -0.670 0.126 0.208 0.862 -0.366 -0.019 -0.192 -0.066 0.065 -0.363 0.416 -0.384 0.979 0.065 -0.461 -0.082 0.836 0.350 -0.257 1.167 -0.252 0.170 0.495 -0.203 -0.419 -0.202 0.228 0.574
clothes 6.351 -0.732 -1.234 0.256 0.291 0.153 -0.410 1.400 -0.228 0.428 0.285 -0.105 0.387 -3.788 1.174 -0.808 0.544 -0.831 -1.119 -0.845 -0.086 -0.441 -0.656 0.520 0.594 -0.568 1.058 0.037 0.567 -0.288 -1.311 -0.429 0.769 0.400 0.056 0.124 -0.712 -0.335 0.751 0.328 -0.234 0.046 0.926 -0.626 -0.615 0.901 -0.252 1.528 0.385 -0.729 -0.000
cmos 5.658 -0.562 0.475 0.275 0.903 -0.248 -0.557 -0.427 -0.771 -0.058 -0.813 -1.089 -0.105 -1.107 0.494 0.079 -0.976 0.160 -0.571 1.171 1.101 -2.388 -0.879 1.170 0.150 -0.411 0.828 -1.025 0.769 -0.859 1.209 0.349 -0.327 1.768 -0.915 0.964 -0.969 0.256 -0.121 0.128 0.112 -0.160 1.657 0.672 0.561 -0.776 -1.621 -0.073 0.654 -0.032 -1.924
co 4.405 0.466 0.380 -0.273 -0.260 -0.966 0.019 -0.690 -0.124 0.436 -0.641 -0.870 0.366 -2.513 0.157 1.049 0.120 0.909 -0.419 -0.140 -0.071 0.951 -0.174 0.703 0.270 -0.665 -0.846 -0.094 0.232 0.885 0.785 0.229 0.079 -0.677 -0.091 0.697 -0.268 -0.000 -0.014 -0.359 0.043 -1.974 0.890 -0.095 0.121 -0.104 0.563 -0.385 -0.327 -0.023 -0.904
co 4.271 0.466 0.380 -0.273 -0.260 -0.966 0.019 -0.690 -0.124 0.436 -0.641 -0.870 0.366 -2.513 0.157 1.049 0.120 0.909 -0.419 -0.140 -0.071 0.951 -0.174 0.703 0.270 -0.665 -0.846 -0.094 0.232 0.885 0.785 0.229 0.079 -0.677 -0.091 0.697 -0.268 -0.000 -0.014 -0.359 0.043 -1.974 0.890 -0.095 0.121 -0.104 0.563 -0.385 -0.327 -0.023 -0.904
coast 6.351 -0.229 -0.055 0.111 -1.025 0.308 0.277 0.771 0.131 0.891 -0.968 -0.700 -0.566 -3.530 0.151 1.044 -0.674 0.046 0.405 0.242 0.029 1.001 -0.906 0.164 -1.165 0.269 0.568 -0.398 0.408 -0.074 0.421 0.009 -0.087 0.002 0.725 0.841 -0.497 0.272 0.555 0.407 0.516 -0.605 -0.419 0.670 0.211 -0.708 0.795 0.007 -0.327 0.299 1.325
coat 6.351 -1.218 -1.881 0.273 0.017 -0.384 -0.811 0.468 -0.132 1.108 -0.107 0.345 0.655 -2.800 0.356 -0.174 0.618 -0.083 -0.240 -0.237 -0.246 -0.740 -0.009 1.024 -0.239 0.202 1.067 0.132 0.682 -0.129 -0.470 0.134 1.175 0.771 -0.865 0.223 -0.409 -0.827 0.566 0.554 0.173 -0.260 0.291 -0.026 -0.046 0.679 0.321 1.217 0.125 0.044 -0.289
code 4.965 0.597 0.226 -1.560 0.063 0.823 -0.175 0.842 -0.466 1.235 -0.057 0.263 0.413 -3.034 -0.019 -0.278 0.469 0.438 0.213 -0.469 1.025 -0.954 -0.660 0.353 -0.646 0.004 -0.227 0.563 -0.056 0.257 0.478 0.334 -0.269 -0.219 -0.890 1.138 -0.295 -0.611 -0.078 0.002 -0.480 0.153 -0.269 -0.151 -0.484 0.417 -0.661 -0.030 -0.869 0.051 0.054

View File

@ -19,6 +19,8 @@ const tokenize = (value) => {
.replaceAll("neopixelbus", "neopixel bus")
.replaceAll("neopixel", "neo pixel")
.replaceAll("h-bridge", "h bridge")
.replaceAll("eco_", "co")
.replaceAll("co_", "co")
.replaceAll("rgbw", "rgb white")
.replaceAll("rgbww", "rgb cold warm")
.replaceAll("rgbct", "rgb temperature brightness")