summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2006-10-23 16:21:20 +0000
committerlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2006-10-23 16:21:20 +0000
commitdd108ab5ffc959ebfaf326769e5fc74822b6647b (patch)
treeb384cddf85540a0285c792fcef66016f6e858b34 /lib
parent5eb10f0ee87a84103dd605efa16783dfeec53a54 (diff)
downloadfeed2imap-dd108ab5ffc959ebfaf326769e5fc74822b6647b.tar.gz
feed2imap-dd108ab5ffc959ebfaf326769e5fc74822b6647b.tar.bz2
feed2imap-dd108ab5ffc959ebfaf326769e5fc74822b6647b.zip
git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@105 f70e237a-67f3-0310-a06c-d2b8a7116972
Diffstat (limited to 'lib')
-rw-r--r--lib/feed2imap/cache.rb34
-rw-r--r--lib/feed2imap/config.rb6
-rw-r--r--lib/feed2imap/feed2imap.rb56
3 files changed, 74 insertions, 22 deletions
diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb
index a101785..4687b76 100644
--- a/lib/feed2imap/cache.rb
+++ b/lib/feed2imap/cache.rb
@@ -34,13 +34,13 @@ class ItemCache
end
# Returns the really new items amongst items
- def get_new_items(id, items, always_new = false)
+ def get_new_items(id, items, always_new = false, ignore_hash = false)
if $updateddebug
puts "======================================================="
puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})"
end
@channels[id] ||= CachedChannel::new
- return @channels[id].get_new_items(items, always_new)
+ return @channels[id].get_new_items(items, always_new, ignore_hash)
end
# Commit changes to the cache
@@ -59,9 +59,16 @@ class ItemCache
def set_last_check(id, time)
@channels[id] ||= CachedChannel::new
@channels[id].lastcheck = time
+ @channels[id].failures = 0
self
end
+ # Fetching failure.
+ # returns number of failures
+ def fetch_failed(id)
+ @channels[id].fetch_failed
+ end
+
# Load the cache from an IO stream
def load(io)
begin
@@ -103,13 +110,14 @@ class CachedChannel
# 100 items should be enough for everybody, even quite busy feeds
CACHESIZE = 100
- attr_accessor :lastcheck, :items
+ attr_accessor :lastcheck, :items, :failures
def initialize
@lastcheck = Time::at(0)
@items = []
@itemstemp = [] # see below
@nbnewitems = 0
+ @failures = 0
end
# Let's explain @items and @itemstemp.
@@ -123,7 +131,7 @@ class CachedChannel
# of (old) items serialized.
# Returns the really new items amongst items
- def get_new_items(items, always_new = false)
+ def get_new_items(items, always_new = false, ignore_hash = false)
# save number of new items
@nbnewitems = items.length
# set items' cached version if not set yet
@@ -165,7 +173,10 @@ class CachedChannel
found = false
# Try to find a perfect match
@items.each do |j|
- if i.cacheditem == j
+ # note that simple_compare only CachedItem, not RSSItem, so we have to use
+ # j.simple_compare(i) and not i.simple_compare(j)
+ if (i.cacheditem == j and not ignore_hash) or
+ (j.simple_compare(i) and ignore_hash)
i.cacheditem.index = j.index
found = true
# let's put j in front of itemstemp
@@ -222,6 +233,12 @@ class CachedChannel
def nbitems
@items.length
end
+
+ def fetch_failed
+ @failures = 0 if @failures.nil?
+ @failures += 1
+ return @failures
+ end
end
# This class is the only thing kept in the cache
@@ -243,7 +260,7 @@ class CachedItem
end
def ==(other)
- if $updateddebug and @title =~ /e325/ and other.title =~ /e325/
+ if $updateddebug
puts "Comparing #{self.to_s} and #{other.to_s}:"
puts "Title: #{@title == other.title}"
puts "Link: #{@link == other.link}"
@@ -256,6 +273,11 @@ class CachedItem
(@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
end
+ def simple_compare(other)
+ @title == other.title and @link == other.link and
+ (@creator.nil? or other.creator.nil? or @creator == other.creator)
+ end
+
def create_index
@index = ItemCache.getindex
end
diff --git a/lib/feed2imap/config.rb b/lib/feed2imap/config.rb
index 10c4d82..4ab522e 100644
--- a/lib/feed2imap/config.rb
+++ b/lib/feed2imap/config.rb
@@ -26,7 +26,7 @@ DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
# Feed2imap configuration
class F2IConfig
- attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug
+ attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug, :max_failures
# Load the configuration from the IO stream
# TODO should do some sanity check on the data read.
@@ -36,6 +36,7 @@ class F2IConfig
@dumpdir = @conf['dumpdir'] || nil
@conf['feeds'] ||= []
@feeds = []
+ @max_failures = @conf['max-failures'].to_i || 5
@updateddebug = (@conf['debug-updated'] and @conf['debug-updated'] != 'false')
@imap_accounts = ImapAccounts::new
@conf['feeds'].each do |f|
@@ -71,7 +72,7 @@ end
# A configured feed. simple data container.
class ConfigFeed
- attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter
+ attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter, :ignore_hash
attr_accessor :body
def initialize(f, imapaccount, folder)
@@ -83,6 +84,7 @@ class ConfigFeed
@always_new = (f['always-new'] and f['always-new'] != 'false')
@execurl = f['execurl']
@filter = f['filter']
+ @ignore_hash = f['ignore-hash'] || false
@freq = @freq.to_i if @freq
end
diff --git a/lib/feed2imap/feed2imap.rb b/lib/feed2imap/feed2imap.rb
index 3cf46aa..2f62a80 100644
--- a/lib/feed2imap/feed2imap.rb
+++ b/lib/feed2imap/feed2imap.rb
@@ -36,14 +36,17 @@ class Feed2Imap
def initialize(verbose, cacherebuild, configfile)
@logger = Logger::new(STDOUT)
- if verbose
+ if verbose == :debug
@logger.level = Logger::DEBUG
+ require 'pp'
+ elsif verbose == true
+ @logger.level = Logger::INFO
else
@logger.level = Logger::WARN
end
@logger.info("Feed2Imap V.#{F2I_VERSION} started")
# reading config
- @logger.info('Reading configuration file')
+ @logger.info('Reading configuration file ...')
if not File::exist?(configfile)
@logger.fatal("Configuration file #{configfile} not found.")
exit(1)
@@ -60,8 +63,13 @@ class Feed2Imap
@logger.fatal("Error while reading configuration file, exiting: #{$!}")
exit(1)
end
+ if @logger.level == Logger::DEBUG
+ @logger.debug("Configuration read:")
+ pp(@config)
+ end
+
# init cache
- @logger.info('Initializing cache')
+ @logger.info('Initializing cache ...')
@cache = ItemCache::new(@config.updateddebug)
if not File::exist?(@config.cache + '.lock')
f = File::new(@config.cache + '.lock', 'w')
@@ -78,8 +86,9 @@ class Feed2Imap
@cache.load(f)
end
end
+
# connecting all IMAP accounts
- @logger.info('Connecting to IMAP accounts')
+ @logger.info('Connecting to IMAP accounts ...')
@config.imap_accounts.each_value do |ac|
begin
ac.connect
@@ -88,8 +97,9 @@ class Feed2Imap
exit(1)
end
end
+
# check that IMAP folders exist
- @logger.info("Checking IMAP folders")
+ @logger.info("Checking IMAP folders ...")
@config.feeds.each do |f|
begin
f.imapaccount.create_folder(f.folder) if not f.imapaccount.folder_exist?(f.folder)
@@ -99,7 +109,7 @@ class Feed2Imap
end
end
# for each feed, fetch, upload to IMAP and cache
- @logger.info("Fetching and filtering feeds")
+ @logger.info("Fetching and filtering feeds ...")
ths = []
mutex = Mutex::new
@config.feeds.each do |f|
@@ -126,6 +136,8 @@ class Feed2Imap
mutex.lock
feed.body = s
@cache.set_last_check(feed.name, Time::now)
+ else
+ @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.")
end
mutex.unlock
# dump if requested
@@ -139,19 +151,34 @@ class Feed2Imap
end
rescue Timeout::Error
mutex.synchronize do
- @logger.fatal("Timeout::Error while fetching #{feed.url}: #{$!}")
+ n = @cache.fetch_failed(feed.name)
+ m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
+ if n > @config.max_failures
+ @logger.fatal(m)
+ else
+ @logger.info(m)
+ end
end
rescue
mutex.synchronize do
- @logger.fatal("Error while fetching #{feed.url}: #{$!}")
+ n = @cache.fetch_failed(feed.name)
+ m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
+ if n > @config.max_failures
+ @logger.fatal(m)
+ else
+ @logger.info(m)
+ end
end
end
end
end
ths.each { |t| t.join }
- @logger.info("Parsing and uploading")
+ @logger.info("Parsing and uploading ...")
@config.feeds.each do |f|
- next if f.body.nil? # means 304
+ if f.body.nil? # means 304
+ @logger.debug("Feed #{f.name} did not change.")
+ next
+ end
begin
feed = FeedParser::Feed::new(f.body)
rescue Exception => e
@@ -159,13 +186,13 @@ class Feed2Imap
next
end
begin
- newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new)
+ newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash)
rescue
@logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}")
puts $!.backtrace
next
end
- @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0
+ @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG
begin
if !cacherebuild
updateditems.each do |i|
@@ -190,17 +217,18 @@ class Feed2Imap
next
end
end
- @logger.info("Finished. Saving cache")
+ @logger.info("Finished. Saving cache ...")
begin
File::open(@config.cache, 'w') { |f| @cache.save(f) }
rescue
@logger.fatal("Exception caught while writing cache to #{@config.cache}: #{$!}")
end
- @logger.info("Closing IMAP connections")
+ @logger.info("Closing IMAP connections ...")
@config.imap_accounts.each_value do |ac|
begin
ac.disconnect
rescue
+ # servers tend to cause an exception to be raised here, hence the INFO level.
@logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}")
end
end