forked from erwanor/engineering-blogs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_opml.rb
executable file
·103 lines (87 loc) · 2.57 KB
/
generate_opml.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env ruby
require 'builder'
require 'feedbag'
require 'nokogiri'
INPUT_FILENAME = 'README.md'.freeze
OUTPUT_FILENAME = 'engineering_blogs.opml'.freeze
TITLE = 'Engineering Blogs'.freeze
# grab name/url pairings from README.md
contents = File.read INPUT_FILENAME
matches = contents.scan(/\* (.*) (http.*)/)
# All blogs that do not respond
unavailable = []
temp_ignores = [
'AdRoll',
'Buzzfeed',
'SourceClear',
'TaskRabbit',
'theScore',
'Trivago',
'Xmartlabs',
'WyeWorks',
'Zoosk',
'Rudolf Olah'
]
xml = File.open OUTPUT_FILENAME do |f|
Nokogiri::XML(f)
end if File.exist? OUTPUT_FILENAME
Struct.new('Blog', :name, :web_url, :rss_url)
blogs = []
# for each blog URL, check if rss URL exists
matches.each do |match|
name = match[0]
web_url = match[1]
if temp_ignores.include?(name)
puts "#{name}: IGNORE [TEMPORARILY]"
next
end
# if rss_url already in existing opml file, use that; otherwise, do a lookup
rss_url = nil
existing_blog = xml.xpath("//outline[@htmlUrl='#{web_url}']").first if xml
if existing_blog
rss_url = existing_blog.attr('xmlUrl')
puts "#{name}: ALREADY HAVE"
end
puts "#{name}: GETTING" if rss_url.nil?
rss_url = Feedbag.find(web_url).first if rss_url.nil?
if rss_url.nil?
suggested_paths = ['/rss', '/feed', '/feeds', '/atom.xml',
'/feed.xml', '/rss.xml', '.atom']
suggested_paths.each do |suggested_path|
rss_url = Feedbag.find("#{web_url.chomp('/')}#{suggested_path}").first
break if rss_url
end
end
list = rss_url && !rss_url.empty? ? blogs : unavailable
list.push(Struct::Blog.new(name, web_url, rss_url))
end
blogs.sort_by { |b| b.name.capitalize }
unavailable.sort_by { |b| b.name.capitalize }
# write opml
xml = Builder::XmlMarkup.new(indent: 2)
xml.instruct! :xml, version: '1.0', encoding: 'UTF-8'
xml.tag!('opml', version: '1.0') do
# head
xml.tag!('head') do
xml.title TITLE
end
# body
xml.tag!('body') do
xml.tag!('outline', text: TITLE, title: TITLE) do
blogs.each do |blog|
xml.tag!('outline', type: 'rss', text: blog.name, title: blog.name,
xmlUrl: blog.rss_url, htmlUrl: blog.web_url)
end
end
end
end
output = File.new(OUTPUT_FILENAME, 'wb')
output.write(xml.target!)
output.close
puts "DONE: #{blogs.count} written to #{OUTPUT_FILENAME}"
puts "\nUnable to find an RSS feed for the following blogs:"
puts '==================================================='
unavailable.each do |b|
puts "#{b.name} | #{b.web_url}"
end
puts '==================================================='