From fa09f59de0a188858c76598c714c4a9eb94a4b5e Mon Sep 17 00:00:00 2001 From: Alpha Chen Date: Mon, 24 Feb 2020 15:55:14 -0800 Subject: [PATCH] [rzz] create feed from the webpage instead --- ansible/docker/rzz/rzz.rb | 48 ++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/ansible/docker/rzz/rzz.rb b/ansible/docker/rzz/rzz.rb index 998ac88..c578560 100644 --- a/ansible/docker/rzz/rzz.rb +++ b/ansible/docker/rzz/rzz.rb @@ -8,36 +8,32 @@ module Rzz class App < Roda plugin :caching + plugin :json route do |r| r.get do r.is "test-gym" do - url = "https://elemental.medium.com/feed" - xml = URI.open(url).read - r.etag Digest::SHA1.hexdigest(xml) - - doc = Nokogiri::XML(xml) - - self_url = "#{r.base_url}#{r.path}" # Should this be configured via the environment? - doc.at_xpath("/rss/channel/link/text()").content = self_url - doc.at_xpath("/rss/channel/atom:link[@href='#{url}']")["href"] = self_url - - doc.at_xpath("/rss/channel/atom:link[@rel='hub']").remove - - doc - .xpath("/rss/channel/item") - .select {|item| item.xpath("./category[text()='test-gym']").empty? } - .each(&:remove) - - doc - .xpath("/rss/channel/item") - .each do |item| - link = item.at_xpath("./link/text()").content - item.at_xpath("./description/text()").content = URI.open(link).read - end - - response.headers["Content-Type"] = "text/xml" - doc.to_xml + url = "https://elemental.medium.com/test-gym/home" + html = URI.open(url).read + + doc = Nokogiri::HTML(html) + items = doc.xpath("//a[@data-post-id]").map {|a| + attrs = a.attributes + id = attrs.fetch("data-post-id").value + title = a.xpath("./h3").text + content = a.xpath("./div").text + href = URI(attrs.fetch("href")) + url = "#{href.scheme}://#{href.host}#{href.path}" + { id: id, title: title, content_text: content, url: url } + } + + { + version: "https://jsonfeed.org/version/1", + title: "Test Gym", + home_page_url: url, + feed_url: "#{r.base_url}#{r.path}", + items: items, + } end end end