summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2006-10-23 16:21:20 +0000
committerlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2006-10-23 16:21:20 +0000
commitdd108ab5ffc959ebfaf326769e5fc74822b6647b (patch)
treeb384cddf85540a0285c792fcef66016f6e858b34
parent5eb10f0ee87a84103dd605efa16783dfeec53a54 (diff)
downloadfeed2imap-dd108ab5ffc959ebfaf326769e5fc74822b6647b.tar.gz
feed2imap-dd108ab5ffc959ebfaf326769e5fc74822b6647b.tar.bz2
feed2imap-dd108ab5ffc959ebfaf326769e5fc74822b6647b.zip
git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@105 f70e237a-67f3-0310-a06c-d2b8a7116972
-rw-r--r--ChangeLog8
-rwxr-xr-xbin/feed2imap17
-rw-r--r--data/doc/feed2imap/examples/feed2imaprc7
-rw-r--r--lib/feed2imap/cache.rb34
-rw-r--r--lib/feed2imap/config.rb6
-rw-r--r--lib/feed2imap/feed2imap.rb56
6 files changed, 104 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog
index cc017d5..ec0c198 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,14 @@ Feed2Imap 0.9 (XX/XX/2006)
<arnt@gulbrandsen.priv.no>)
* Now supports Snowscripts, using the 'execurl' and 'filter' config
keywords. For more information, see the example configuration file.
+* Slightly better option parsing. Thanks to Paul van Tilburg for the
+ patch.
+* A debug mode was added, and the normal mode was improved, so it is
+ no longer necessary to redirect feed2imap output to /dev/null:
+ transient errors are only reported after they have happened a
+ certain number of times (default 5).
+* An ignore-hash option was added for feeds whose content change all
+ the time.
Feed2Imap 0.8 (28/06/2006)
============================
diff --git a/bin/feed2imap b/bin/feed2imap
index 0095d9f..b31c191 100755
--- a/bin/feed2imap
+++ b/bin/feed2imap
@@ -9,13 +9,20 @@ verbose = false
version = false
cacherebuild = false
configf = ENV['HOME'] + '/.feed2imaprc'
+progname = File::basename($PROGRAM_NAME)
opts = OptionParser::new do |opts|
- opts.banner = "Usage: ./feed2imap.rb [options]"
+ opts.banner = "Usage: #{progname} [options]"
opts.separator ""
opts.separator "Options:"
+
opts.on("-v", "--verbose", "Verbose mode") do |v|
verbose = true
end
+
+ opts.on("-d", "--debug", "Debug mode") do |v|
+ verbose = :debug
+ end
+
opts.on("-V", "--version", "Display Feed2Imap version") do |v|
version = true
end
@@ -26,7 +33,13 @@ opts = OptionParser::new do |opts|
configf = f
end
end
-opts.parse!(ARGV)
+begin
+ opts.parse!(ARGV)
+rescue OptionParser::ParseError => pe
+ opts.warn pe
+ puts opts
+ exit 1
+end
if version
puts "Feed2Imap v.#{F2I_VERSION}"
diff --git a/data/doc/feed2imap/examples/feed2imaprc b/data/doc/feed2imap/examples/feed2imaprc
index 0d0c3ed..0d8eaac 100644
--- a/data/doc/feed2imap/examples/feed2imaprc
+++ b/data/doc/feed2imap/examples/feed2imaprc
@@ -1,4 +1,7 @@
# Global options:
+# max-failures: maximum number of failures allowed before they are reported in
+# normal mode (default 5). By default, failures are only visible in verbose
+# mode.
# dumpdir: (for debugging purposes) directory where all fetched feeds will be
# dumped.
# debug-updated: (for debugging purposes) if true, display a lot of information
@@ -16,6 +19,10 @@
# an item is new or has been updated. It doesn't work well with some web apps
# like mediawiki. When this flag is enabled, all items which don't match
# exactly a previously downloaded item are considered as new items.
+# ignore-hash: Some feeds change the content of their items all the time, so
+# feed2imap detects that they have been updated at each run. When this flag
+# is enabled, feed2imap ignores the content of an item when determining
+# whether the item is already known.
# Snownews/Liferea scripts support :
# execurl: Command to execute that will display the RSS/Atom feed on stdout
# filter: Command to execute which will receive the RSS/Atom feed on stdin,
diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb
index a101785..4687b76 100644
--- a/lib/feed2imap/cache.rb
+++ b/lib/feed2imap/cache.rb
@@ -34,13 +34,13 @@ class ItemCache
end
# Returns the really new items amongst items
- def get_new_items(id, items, always_new = false)
+ def get_new_items(id, items, always_new = false, ignore_hash = false)
if $updateddebug
puts "======================================================="
puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})"
end
@channels[id] ||= CachedChannel::new
- return @channels[id].get_new_items(items, always_new)
+ return @channels[id].get_new_items(items, always_new, ignore_hash)
end
# Commit changes to the cache
@@ -59,9 +59,16 @@ class ItemCache
def set_last_check(id, time)
@channels[id] ||= CachedChannel::new
@channels[id].lastcheck = time
+ @channels[id].failures = 0
self
end
+ # Fetching failure.
+ # returns number of failures
+ def fetch_failed(id)
+ @channels[id].fetch_failed
+ end
+
# Load the cache from an IO stream
def load(io)
begin
@@ -103,13 +110,14 @@ class CachedChannel
# 100 items should be enough for everybody, even quite busy feeds
CACHESIZE = 100
- attr_accessor :lastcheck, :items
+ attr_accessor :lastcheck, :items, :failures
def initialize
@lastcheck = Time::at(0)
@items = []
@itemstemp = [] # see below
@nbnewitems = 0
+ @failures = 0
end
# Let's explain @items and @itemstemp.
@@ -123,7 +131,7 @@ class CachedChannel
# of (old) items serialized.
# Returns the really new items amongst items
- def get_new_items(items, always_new = false)
+ def get_new_items(items, always_new = false, ignore_hash = false)
# save number of new items
@nbnewitems = items.length
# set items' cached version if not set yet
@@ -165,7 +173,10 @@ class CachedChannel
found = false
# Try to find a perfect match
@items.each do |j|
- if i.cacheditem == j
+ # note that simple_compare only CachedItem, not RSSItem, so we have to use
+ # j.simple_compare(i) and not i.simple_compare(j)
+ if (i.cacheditem == j and not ignore_hash) or
+ (j.simple_compare(i) and ignore_hash)
i.cacheditem.index = j.index
found = true
# let's put j in front of itemstemp
@@ -222,6 +233,12 @@ class CachedChannel
def nbitems
@items.length
end
+
+ def fetch_failed
+ @failures = 0 if @failures.nil?
+ @failures += 1
+ return @failures
+ end
end
# This class is the only thing kept in the cache
@@ -243,7 +260,7 @@ class CachedItem
end
def ==(other)
- if $updateddebug and @title =~ /e325/ and other.title =~ /e325/
+ if $updateddebug
puts "Comparing #{self.to_s} and #{other.to_s}:"
puts "Title: #{@title == other.title}"
puts "Link: #{@link == other.link}"
@@ -256,6 +273,11 @@ class CachedItem
(@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
end
+ def simple_compare(other)
+ @title == other.title and @link == other.link and
+ (@creator.nil? or other.creator.nil? or @creator == other.creator)
+ end
+
def create_index
@index = ItemCache.getindex
end
diff --git a/lib/feed2imap/config.rb b/lib/feed2imap/config.rb
index 10c4d82..4ab522e 100644
--- a/lib/feed2imap/config.rb
+++ b/lib/feed2imap/config.rb
@@ -26,7 +26,7 @@ DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
# Feed2imap configuration
class F2IConfig
- attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug
+ attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug, :max_failures
# Load the configuration from the IO stream
# TODO should do some sanity check on the data read.
@@ -36,6 +36,7 @@ class F2IConfig
@dumpdir = @conf['dumpdir'] || nil
@conf['feeds'] ||= []
@feeds = []
+ @max_failures = @conf['max-failures'].to_i || 5
@updateddebug = (@conf['debug-updated'] and @conf['debug-updated'] != 'false')
@imap_accounts = ImapAccounts::new
@conf['feeds'].each do |f|
@@ -71,7 +72,7 @@ end
# A configured feed. simple data container.
class ConfigFeed
- attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter
+ attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter, :ignore_hash
attr_accessor :body
def initialize(f, imapaccount, folder)
@@ -83,6 +84,7 @@ class ConfigFeed
@always_new = (f['always-new'] and f['always-new'] != 'false')
@execurl = f['execurl']
@filter = f['filter']
+ @ignore_hash = f['ignore-hash'] || false
@freq = @freq.to_i if @freq
end
diff --git a/lib/feed2imap/feed2imap.rb b/lib/feed2imap/feed2imap.rb
index 3cf46aa..2f62a80 100644
--- a/lib/feed2imap/feed2imap.rb
+++ b/lib/feed2imap/feed2imap.rb
@@ -36,14 +36,17 @@ class Feed2Imap
def initialize(verbose, cacherebuild, configfile)
@logger = Logger::new(STDOUT)
- if verbose
+ if verbose == :debug
@logger.level = Logger::DEBUG
+ require 'pp'
+ elsif verbose == true
+ @logger.level = Logger::INFO
else
@logger.level = Logger::WARN
end
@logger.info("Feed2Imap V.#{F2I_VERSION} started")
# reading config
- @logger.info('Reading configuration file')
+ @logger.info('Reading configuration file ...')
if not File::exist?(configfile)
@logger.fatal("Configuration file #{configfile} not found.")
exit(1)
@@ -60,8 +63,13 @@ class Feed2Imap
@logger.fatal("Error while reading configuration file, exiting: #{$!}")
exit(1)
end
+ if @logger.level == Logger::DEBUG
+ @logger.debug("Configuration read:")
+ pp(@config)
+ end
+
# init cache
- @logger.info('Initializing cache')
+ @logger.info('Initializing cache ...')
@cache = ItemCache::new(@config.updateddebug)
if not File::exist?(@config.cache + '.lock')
f = File::new(@config.cache + '.lock', 'w')
@@ -78,8 +86,9 @@ class Feed2Imap
@cache.load(f)
end
end
+
# connecting all IMAP accounts
- @logger.info('Connecting to IMAP accounts')
+ @logger.info('Connecting to IMAP accounts ...')
@config.imap_accounts.each_value do |ac|
begin
ac.connect
@@ -88,8 +97,9 @@ class Feed2Imap
exit(1)
end
end
+
# check that IMAP folders exist
- @logger.info("Checking IMAP folders")
+ @logger.info("Checking IMAP folders ...")
@config.feeds.each do |f|
begin
f.imapaccount.create_folder(f.folder) if not f.imapaccount.folder_exist?(f.folder)
@@ -99,7 +109,7 @@ class Feed2Imap
end
end
# for each feed, fetch, upload to IMAP and cache
- @logger.info("Fetching and filtering feeds")
+ @logger.info("Fetching and filtering feeds ...")
ths = []
mutex = Mutex::new
@config.feeds.each do |f|
@@ -126,6 +136,8 @@ class Feed2Imap
mutex.lock
feed.body = s
@cache.set_last_check(feed.name, Time::now)
+ else
+ @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.")
end
mutex.unlock
# dump if requested
@@ -139,19 +151,34 @@ class Feed2Imap
end
rescue Timeout::Error
mutex.synchronize do
- @logger.fatal("Timeout::Error while fetching #{feed.url}: #{$!}")
+ n = @cache.fetch_failed(feed.name)
+ m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
+ if n > @config.max_failures
+ @logger.fatal(m)
+ else
+ @logger.info(m)
+ end
end
rescue
mutex.synchronize do
- @logger.fatal("Error while fetching #{feed.url}: #{$!}")
+ n = @cache.fetch_failed(feed.name)
+ m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
+ if n > @config.max_failures
+ @logger.fatal(m)
+ else
+ @logger.info(m)
+ end
end
end
end
end
ths.each { |t| t.join }
- @logger.info("Parsing and uploading")
+ @logger.info("Parsing and uploading ...")
@config.feeds.each do |f|
- next if f.body.nil? # means 304
+ if f.body.nil? # means 304
+ @logger.debug("Feed #{f.name} did not change.")
+ next
+ end
begin
feed = FeedParser::Feed::new(f.body)
rescue Exception => e
@@ -159,13 +186,13 @@ class Feed2Imap
next
end
begin
- newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new)
+ newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash)
rescue
@logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}")
puts $!.backtrace
next
end
- @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0
+ @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG
begin
if !cacherebuild
updateditems.each do |i|
@@ -190,17 +217,18 @@ class Feed2Imap
next
end
end
- @logger.info("Finished. Saving cache")
+ @logger.info("Finished. Saving cache ...")
begin
File::open(@config.cache, 'w') { |f| @cache.save(f) }
rescue
@logger.fatal("Exception caught while writing cache to #{@config.cache}: #{$!}")
end
- @logger.info("Closing IMAP connections")
+ @logger.info("Closing IMAP connections ...")
@config.imap_accounts.each_value do |ac|
begin
ac.disconnect
rescue
+ # servers tend to cause an exception to be raised here, hence the INFO level.
@logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}")
end
end