From 7cbd79fee5f87c5c611685100ef8167d90b831f5 Mon Sep 17 00:00:00 2001 From: bbielsa Date: Tue, 26 Oct 2021 21:19:20 -0400 Subject: [PATCH 01/11] Add helper function parse_subscription_export_csv() which parses the csv format returned by the subscription exporter --- src/invidious/helpers/utils.cr | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 8453d605..6d12fe8d 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -1,3 +1,5 @@ +require "csv" + # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html def ci_lower_bound(pos, n) if n == 0 @@ -367,3 +369,23 @@ def fetch_random_instance return filtered_instance_list.sample(1)[0] end + +def parse_subscription_export_csv(csv_content : String) + rows = CSV.new(csv_content, headers: true) + subscriptions = Array(String).new + + rows.each do |row| + # Channel ID is the first column in the csv export we can't use the header + # name, because I believe the header name is localized depending on the + # language the user has set on their account + channel_id = row[0].strip + + if channel_id.empty? + next + end + + subscriptions << channel_id + end + + subscriptions +end From 43ff3be751920bedb394ff5cf8cd27812131c489 Mon Sep 17 00:00:00 2001 From: bbielsa Date: Wed, 27 Oct 2021 17:54:40 -0400 Subject: [PATCH 02/11] Test if body content is likely JSON, if so parse the json format of subscriptions export. If the content is anything else, assume it is CSV and parse --- src/invidious.cr | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/invidious.cr b/src/invidious.cr index fb67af87..3a358c20 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -821,11 +821,14 @@ post "/data_control" do |env| user.subscriptions += subscriptions.xpath_nodes(%q(//outline[@type="rss"])).map do |channel| channel["xmlUrl"].match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] end - else + elsif body[0] == '[' subscriptions = JSON.parse(body) user.subscriptions += subscriptions.as_a.compact_map do |entry| entry["snippet"]["resourceId"]["channelId"].as_s end + else + subscriptions = parse_subscription_export_csv(body) + user.subscriptions += subscriptions end user.subscriptions.uniq! From 62057e676a4f4359b9e977b9a5aa055c61e16c8e Mon Sep 17 00:00:00 2001 From: bbielsa Date: Wed, 3 Nov 2021 00:31:43 -0400 Subject: [PATCH 03/11] Move parse_subscription_export_csv function to user/imports.cr --- src/invidious/helpers/utils.cr | 20 -------------------- src/invidious/user/imports.cr | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 20 deletions(-) create mode 100644 src/invidious/user/imports.cr diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 6d12fe8d..8bf6b272 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -369,23 +369,3 @@ def fetch_random_instance return filtered_instance_list.sample(1)[0] end - -def parse_subscription_export_csv(csv_content : String) - rows = CSV.new(csv_content, headers: true) - subscriptions = Array(String).new - - rows.each do |row| - # Channel ID is the first column in the csv export we can't use the header - # name, because I believe the header name is localized depending on the - # language the user has set on their account - channel_id = row[0].strip - - if channel_id.empty? - next - end - - subscriptions << channel_id - end - - subscriptions -end diff --git a/src/invidious/user/imports.cr b/src/invidious/user/imports.cr new file mode 100644 index 00000000..0ea554bd --- /dev/null +++ b/src/invidious/user/imports.cr @@ -0,0 +1,17 @@ +def parse_subscription_export_csv(csv_content : String) + rows = CSV.new(csv_content, headers: true) + subscriptions = Array(String).new + + rows.each do |row| + # Channel ID is the first column in the csv export we can't use the header + # name, because the header name is localized depending on the + # language the user has set on their account + channel_id = row[0].strip + + next if channel_id.empty? + + subscriptions << channel_id + end + + subscriptions +end From 9607fe03af8dc02a53ffd05df6f815e675bfadae Mon Sep 17 00:00:00 2001 From: bbielsa Date: Wed, 3 Nov 2021 00:45:03 -0400 Subject: [PATCH 04/11] Detect the type of subscription import format based on the content type of the file uploaded --- src/invidious.cr | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 3a358c20..bdecff1d 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -746,6 +746,8 @@ post "/data_control" do |env| HTTP::FormData.parse(env.request) do |part| body = part.body.gets_to_end + type = part.headers["Content-Type"] + next if body.empty? # TODO: Unify into single import based on content-type @@ -816,12 +818,12 @@ post "/data_control" do |env| end end when "import_youtube" - if body[0..4] == " Date: Wed, 3 Nov 2021 19:57:00 -0400 Subject: [PATCH 05/11] Add text/xml as a possible mime type for xml file uploads --- src/invidious.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/invidious.cr b/src/invidious.cr index bdecff1d..85053da2 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -818,7 +818,7 @@ post "/data_control" do |env| end end when "import_youtube" - if type == "application/xml" + if type == "application/xml" || type == "text/xml" subscriptions = XML.parse(body) user.subscriptions += subscriptions.xpath_nodes(%q(//outline[@type="rss"])).map do |channel| channel["xmlUrl"].match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] From 0a66a68db8630e5012a3b4a03db37e862410c628 Mon Sep 17 00:00:00 2001 From: bbielsa Date: Wed, 17 Nov 2021 20:41:23 -0500 Subject: [PATCH 06/11] Move require statement to the correct file --- src/invidious/helpers/utils.cr | 2 -- src/invidious/user/imports.cr | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 8bf6b272..8453d605 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -1,5 +1,3 @@ -require "csv" - # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html def ci_lower_bound(pos, n) if n == 0 diff --git a/src/invidious/user/imports.cr b/src/invidious/user/imports.cr index 0ea554bd..836da14d 100644 --- a/src/invidious/user/imports.cr +++ b/src/invidious/user/imports.cr @@ -1,3 +1,5 @@ +require "csv" + def parse_subscription_export_csv(csv_content : String) rows = CSV.new(csv_content, headers: true) subscriptions = Array(String).new From 6764185543fc6fad8422fb6fc00b305bb4376d37 Mon Sep 17 00:00:00 2001 From: bbielsa Date: Wed, 17 Nov 2021 20:44:04 -0500 Subject: [PATCH 07/11] Add explicit return keyword --- src/invidious/user/imports.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/invidious/user/imports.cr b/src/invidious/user/imports.cr index 836da14d..98a62c17 100644 --- a/src/invidious/user/imports.cr +++ b/src/invidious/user/imports.cr @@ -15,5 +15,5 @@ def parse_subscription_export_csv(csv_content : String) subscriptions << channel_id end - subscriptions + return subscriptions end From 2a541cb4d5bedafdfa720ea15f245b50d51438f7 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Wed, 12 Jan 2022 01:20:45 +0100 Subject: [PATCH 08/11] Add tests --- spec/invidious/user/imports_spec.cr | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 spec/invidious/user/imports_spec.cr diff --git a/spec/invidious/user/imports_spec.cr b/spec/invidious/user/imports_spec.cr new file mode 100644 index 00000000..a1acc2c0 --- /dev/null +++ b/spec/invidious/user/imports_spec.cr @@ -0,0 +1,49 @@ +require "spectator" +require "../../../src/invidious/user/imports" + +Spectator.configure do |config| + config.fail_blank + config.randomize +end + +def csv_sample + return <<-CSV + Kanal-ID,Kanal-URL,Kanaltitel + UC0hHW5Y08ggq-9kbrGgWj0A,http://www.youtube.com/channel/UC0hHW5Y08ggq-9kbrGgWj0A,Matias Marolla + UC0vBXGSyV14uvJ4hECDOl0Q,http://www.youtube.com/channel/UC0vBXGSyV14uvJ4hECDOl0Q,Techquickie + UC1sELGmy5jp5fQUugmuYlXQ,http://www.youtube.com/channel/UC1sELGmy5jp5fQUugmuYlXQ,Minecraft + UC9kFnwdCRrX7oTjqKd6-tiQ,http://www.youtube.com/channel/UC9kFnwdCRrX7oTjqKd6-tiQ,LUMOX - Topic + UCBa659QWEk1AI4Tg--mrJ2A,http://www.youtube.com/channel/UCBa659QWEk1AI4Tg--mrJ2A,Tom Scott + UCGu6_XQ64rXPR6nuitMQE_A,http://www.youtube.com/channel/UCGu6_XQ64rXPR6nuitMQE_A,Callcenter Fun + UCGwu0nbY2wSkW8N-cghnLpA,http://www.youtube.com/channel/UCGwu0nbY2wSkW8N-cghnLpA,Jaiden Animations + UCQ0OvZ54pCFZwsKxbltg_tg,http://www.youtube.com/channel/UCQ0OvZ54pCFZwsKxbltg_tg,Methos + UCRE6itj4Jte4manQEu3Y7OA,http://www.youtube.com/channel/UCRE6itj4Jte4manQEu3Y7OA,Chipflake + UCRLc6zsv_d0OEBO8OOkz-DA,http://www.youtube.com/channel/UCRLc6zsv_d0OEBO8OOkz-DA,Kegy + UCSl5Uxu2LyaoAoMMGp6oTJA,http://www.youtube.com/channel/UCSl5Uxu2LyaoAoMMGp6oTJA,Atomic Shrimp + UCXuqSBlHAE6Xw-yeJA0Tunw,http://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw,Linus Tech Tips + UCZ5XnGb-3t7jCkXdawN2tkA,http://www.youtube.com/channel/UCZ5XnGb-3t7jCkXdawN2tkA,Discord + CSV +end + +Spectator.describe "Invidious::User::Imports" do + it "imports CSV" do + subscriptions = parse_subscription_export_csv(csv_sample) + + expect(subscriptions).to be_an(Array(String)) + expect(subscriptions.size).to eq(13) + + expect(subscriptions).to contain("UC0hHW5Y08ggq-9kbrGgWj0A") + expect(subscriptions).to contain("UC0vBXGSyV14uvJ4hECDOl0Q") + expect(subscriptions).to contain("UC1sELGmy5jp5fQUugmuYlXQ") + expect(subscriptions).to contain("UC9kFnwdCRrX7oTjqKd6-tiQ") + expect(subscriptions).to contain("UCBa659QWEk1AI4Tg--mrJ2A") + expect(subscriptions).to contain("UCGu6_XQ64rXPR6nuitMQE_A") + expect(subscriptions).to contain("UCGwu0nbY2wSkW8N-cghnLpA") + expect(subscriptions).to contain("UCQ0OvZ54pCFZwsKxbltg_tg") + expect(subscriptions).to contain("UCRE6itj4Jte4manQEu3Y7OA") + expect(subscriptions).to contain("UCRLc6zsv_d0OEBO8OOkz-DA") + expect(subscriptions).to contain("UCSl5Uxu2LyaoAoMMGp6oTJA") + expect(subscriptions).to contain("UCXuqSBlHAE6Xw-yeJA0Tunw") + expect(subscriptions).to contain("UCZ5XnGb-3t7jCkXdawN2tkA") + end +end From 81a2300af88c137e1c9be7577a9c68deb45499a0 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Wed, 12 Jan 2022 01:28:58 +0100 Subject: [PATCH 09/11] Prevent import of insanely large files --- src/invidious/user/imports.cr | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/invidious/user/imports.cr b/src/invidious/user/imports.cr index 98a62c17..2ae1dcb1 100644 --- a/src/invidious/user/imports.cr +++ b/src/invidious/user/imports.cr @@ -4,7 +4,15 @@ def parse_subscription_export_csv(csv_content : String) rows = CSV.new(csv_content, headers: true) subscriptions = Array(String).new + # Counter to limit the amount of imports. + # This is intended to prevent DoS. + row_counter = 0 + rows.each do |row| + # Limit to 1200 + row_counter += 1 + break if row_counter > 1_200 + # Channel ID is the first column in the csv export we can't use the header # name, because the header name is localized depending on the # language the user has set on their account From eff8b23f57bbec2d6d33fb577f8bf9604da86d16 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Wed, 12 Jan 2022 18:13:15 +0100 Subject: [PATCH 10/11] Improve youtube import type detection Code courtesy of bbielsa: https://gist.github.com/bbielsa/7d131aa2188945f591a8379ec0defc9b --- src/invidious.cr | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 85053da2..7a324bd1 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -818,22 +818,29 @@ post "/data_control" do |env| end end when "import_youtube" - if type == "application/xml" || type == "text/xml" + filename = part.filename || "" + extension = filename.split(".").last + + if extension == "xml" || type == "application/xml" || type == "text/xml" subscriptions = XML.parse(body) user.subscriptions += subscriptions.xpath_nodes(%q(//outline[@type="rss"])).map do |channel| channel["xmlUrl"].match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0] end - elsif type == "application/json" + elsif extension == "json" || type == "application/json" subscriptions = JSON.parse(body) user.subscriptions += subscriptions.as_a.compact_map do |entry| entry["snippet"]["resourceId"]["channelId"].as_s end - else + elsif extension == "csv" || type == "text/csv" subscriptions = parse_subscription_export_csv(body) user.subscriptions += subscriptions + else + halt(env, status_code: 415, + response: error_template(415, "Invalid subscription file uploaded") + ) end - user.subscriptions.uniq! + user.subscriptions.uniq! user.subscriptions = get_batch_channels(user.subscriptions, false, false) Invidious::Database::Users.update_subscriptions(user) From 461fae4f77d1774ffc4c377127da923a1fd9f939 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Sat, 15 Jan 2022 12:52:19 +0100 Subject: [PATCH 11/11] Use contain_exactly() in spec --- spec/invidious/user/imports_spec.cr | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/spec/invidious/user/imports_spec.cr b/spec/invidious/user/imports_spec.cr index a1acc2c0..5a682ec5 100644 --- a/spec/invidious/user/imports_spec.cr +++ b/spec/invidious/user/imports_spec.cr @@ -32,18 +32,20 @@ Spectator.describe "Invidious::User::Imports" do expect(subscriptions).to be_an(Array(String)) expect(subscriptions.size).to eq(13) - expect(subscriptions).to contain("UC0hHW5Y08ggq-9kbrGgWj0A") - expect(subscriptions).to contain("UC0vBXGSyV14uvJ4hECDOl0Q") - expect(subscriptions).to contain("UC1sELGmy5jp5fQUugmuYlXQ") - expect(subscriptions).to contain("UC9kFnwdCRrX7oTjqKd6-tiQ") - expect(subscriptions).to contain("UCBa659QWEk1AI4Tg--mrJ2A") - expect(subscriptions).to contain("UCGu6_XQ64rXPR6nuitMQE_A") - expect(subscriptions).to contain("UCGwu0nbY2wSkW8N-cghnLpA") - expect(subscriptions).to contain("UCQ0OvZ54pCFZwsKxbltg_tg") - expect(subscriptions).to contain("UCRE6itj4Jte4manQEu3Y7OA") - expect(subscriptions).to contain("UCRLc6zsv_d0OEBO8OOkz-DA") - expect(subscriptions).to contain("UCSl5Uxu2LyaoAoMMGp6oTJA") - expect(subscriptions).to contain("UCXuqSBlHAE6Xw-yeJA0Tunw") - expect(subscriptions).to contain("UCZ5XnGb-3t7jCkXdawN2tkA") + expect(subscriptions).to contain_exactly( + "UC0hHW5Y08ggq-9kbrGgWj0A", + "UC0vBXGSyV14uvJ4hECDOl0Q", + "UC1sELGmy5jp5fQUugmuYlXQ", + "UC9kFnwdCRrX7oTjqKd6-tiQ", + "UCBa659QWEk1AI4Tg--mrJ2A", + "UCGu6_XQ64rXPR6nuitMQE_A", + "UCGwu0nbY2wSkW8N-cghnLpA", + "UCQ0OvZ54pCFZwsKxbltg_tg", + "UCRE6itj4Jte4manQEu3Y7OA", + "UCRLc6zsv_d0OEBO8OOkz-DA", + "UCSl5Uxu2LyaoAoMMGp6oTJA", + "UCXuqSBlHAE6Xw-yeJA0Tunw", + "UCZ5XnGb-3t7jCkXdawN2tkA", + ).in_order end end