From b99dd4533ac832f73b17fa4ebd560789ef54ce5a Mon Sep 17 00:00:00 2001 From: lnu Date: Wed, 5 Apr 2006 16:22:19 +0000 Subject: - Now compares the whole items when searching for duplicates (might cause some false-negatives) - New always-new flag git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@93 f70e237a-67f3-0310-a06c-d2b8a7116972 --- data/doc/feed2imap/examples/feed2imaprc | 9 +++++++++ lib/feed2imap/cache.rb | 35 ++++++++++++++++++--------------- lib/feed2imap/config.rb | 3 ++- lib/feed2imap/feed2imap.rb | 2 +- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/data/doc/feed2imap/examples/feed2imaprc b/data/doc/feed2imap/examples/feed2imaprc index 13ac3c2..35a9f6c 100644 --- a/data/doc/feed2imap/examples/feed2imaprc +++ b/data/doc/feed2imap/examples/feed2imaprc @@ -4,6 +4,10 @@ # min-frequency (in HOURS) is the minimum frequency with which this particular # feed will be fetched # disable: if set to something, the feed will be ignored +# always-new: feed2imap tries to use a clever algorithm to determine whether an item +# is new or has been updated. It doesn't work well with some web apps like +# mediawiki. When this flag is enabled, all items which don't match exactly +# a previously downloaded item are considered as new items. # # If your login contains an @ character, replace it with %40. Other reserved # characters can be escaped in the same way (see man ascii to get their code) @@ -17,3 +21,8 @@ feeds: - name: linuxfr url: http://linuxfr.org/backend/news/rss20.rss target: imap://lucaswebmail:password@imap.apinc.org/INBOX.Feeds.LinuxFR + - + name: JabberFrWiki + url: http://wiki.jabberfr.org/index.php?title=Special:Recentchanges&feed=rss + target: imaps://lucaswebmail:password@imap.apinc.org/INBOX.Feeds.JabberFR + always-new: true diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb index 006e99e..cd75db2 100644 --- a/lib/feed2imap/cache.rb +++ b/lib/feed2imap/cache.rb @@ -30,9 +30,9 @@ class ItemCache end # Returns the really new items amongst items - def get_new_items(id, items) + def get_new_items(id, items, always_new = false) @channels[id] ||= CachedChannel::new - return @channels[id].get_new_items(items) + return @channels[id].get_new_items(items, always_new) end # Commit changes to the cache @@ -116,7 +116,7 @@ class CachedChannel UPDATEDDEBUG = false # Returns the really new items amongst items - def get_new_items(items) + def get_new_items(items, always_new = false) # save number of new items @nbnewitems = items.length # set items' cached version if not set yet @@ -130,7 +130,7 @@ class CachedChannel dups = false for i in 0...items.length do for j in i+1...items.length do - if items[i].cacheditem.link == items[j].cacheditem.link + if items[i].cacheditem == items[j].cacheditem if UPDATEDDEBUG puts "## Removed #{items[j].cacheditem.to_s}" end @@ -148,6 +148,7 @@ class CachedChannel items.each { |i| puts "#{i.cacheditem.to_s}" } puts "-------Items already there :----------" @items.each { |i| puts "#{i.to_s}" } + puts "Items always considered as new: #{always_new.to_s}" end items.each do |i| found = false @@ -163,18 +164,20 @@ class CachedChannel end end next if found - # Try to find an updated item - @items.each do |j| - # Do we need a better heuristic ? - if i.link and i.link == j.link - i.cacheditem.index = j.index - i.cacheditem.updated = true - updateditems.push(i) - found = true - # let's put j in front of itemstemp - @itemstemp.delete(j) - @itemstemp.unshift(i.cacheditem) - break + if not always_new + # Try to find an updated item + @items.each do |j| + # Do we need a better heuristic ? + if i.link and i.link == j.link + i.cacheditem.index = j.index + i.cacheditem.updated = true + updateditems.push(i) + found = true + # let's put j in front of itemstemp + @itemstemp.delete(j) + @itemstemp.unshift(i.cacheditem) + break + end end end next if found diff --git a/lib/feed2imap/config.rb b/lib/feed2imap/config.rb index 0483423..1e13a04 100644 --- a/lib/feed2imap/config.rb +++ b/lib/feed2imap/config.rb @@ -70,7 +70,7 @@ end # A configured feed. simple data container. class ConfigFeed - attr_reader :name, :url, :imapaccount, :folder + attr_reader :name, :url, :imapaccount, :folder, :always_new attr_accessor :body def initialize(f, imapaccount, folder) @@ -79,6 +79,7 @@ class ConfigFeed @url.sub!(/^feed:/, '') if @url =~ /^feed:/ @imapaccount, @folder = imapaccount, folder @freq = f['min-frequency'] + @always_new = (f['always-new'] and f['always-new'] != 'false') @freq = @freq.to_i if @freq end diff --git a/lib/feed2imap/feed2imap.rb b/lib/feed2imap/feed2imap.rb index 91660c5..19d6ccd 100644 --- a/lib/feed2imap/feed2imap.rb +++ b/lib/feed2imap/feed2imap.rb @@ -145,7 +145,7 @@ class Feed2Imap next end begin - newitems, updateditems = @cache.get_new_items(f.name, feed.items) + newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new) rescue @logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}") puts $!.backtrace -- cgit v1.2.3-54-g00ecf