=begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end # Feed2Imap version F2I_VERSION = '1.0' F2I_WARNFETCHTIME = 10 require 'feed2imap/config' require 'feed2imap/cache' require 'feed2imap/httpfetcher' require 'logger' require 'thread' require 'feedparser' require 'feed2imap/itemtomail' require 'open3' class Feed2Imap def Feed2Imap.version return F2I_VERSION end def initialize(verbose, cacherebuild, configfile) @logger = Logger::new(STDOUT) if verbose == :debug @logger.level = Logger::DEBUG require 'pp' elsif verbose == true @logger.level = Logger::INFO else @logger.level = Logger::WARN end @logger.info("Feed2Imap V.#{F2I_VERSION} started") # reading config @logger.info('Reading configuration file ...') if not File::exist?(configfile) @logger.fatal("Configuration file #{configfile} not found.") exit(1) end if (File::stat(configfile).mode & 044) != 0 @logger.warn("Configuration file is readable by other users. It " + "probably contains your password.") end begin File::open(configfile) { |f| @config = F2IConfig::new(f) } rescue @logger.fatal("Error while reading configuration file, exiting: #{$!}") exit(1) end if @logger.level == Logger::DEBUG @logger.debug("Configuration read:") pp(@config) end # init cache @logger.info('Initializing cache ...') @cache = ItemCache::new(@config.updateddebug) if not File::exist?(@config.cache + '.lock') f = File::new(@config.cache + '.lock', 'w') f.close end if File::new(@config.cache + '.lock', 'w').flock(File::LOCK_EX | File::LOCK_NB) == false @logger.fatal("Another instance of feed2imap is already locking the cache file") exit(1) end if not File::exist?(@config.cache) @logger.warn("Cache file #{@config.cache} not found, using a new one") else File::open(@config.cache) do |f| @cache.load(f) end end # connecting all IMAP accounts @logger.info('Connecting to IMAP accounts ...') @config.imap_accounts.each_value do |ac| begin ac.connect rescue @logger.fatal("Error while connecting to #{ac}, exiting: #{$!}") exit(1) end end # for each feed, fetch, upload to IMAP and cache @logger.info("Fetching and filtering feeds ...") ths = [] mutex = Mutex::new sparefetchers = 16 # max number of fetchers running at the same time. sparefetchers_mutex = Mutex::new sparefetchers_cond = ConditionVariable::new @config.feeds.each do |f| ths << Thread::new(f) do |feed| begin mutex.lock lastcheck = @cache.get_last_check(feed.name) if feed.needfetch(lastcheck) mutex.unlock sparefetchers_mutex.synchronize do while sparefetchers <= 0 sparefetchers_cond.wait(sparefetchers_mutex) end sparefetchers -= 1 end fetch_start = Time::now if feed.url s = HTTPFetcher::fetch(feed.url, @cache.get_last_check(feed.name)) elsif feed.execurl # avoid running more than one command at the same time. # We need it because the called command might not be # thread-safe, and we need to get the right exitcode mutex.lock s = %x{#{feed.execurl}} if $?.exitstatus != 0 @logger.warn("Command for #{feed.name} exited with status #{$?.exitstatus} !") end mutex.unlock else @logger.warn("No way to fetch feed #{feed.name} !") end if feed.filter and s != nil # avoid running more than one command at the same time. # We need it because the called command might not be # thread-safe, and we need to get the right exitcode. mutex.lock # hack hack hack, avoid buffering problems stdin, stdout, stderr = Open3::popen3(feed.filter) inth = Thread::new do stdin.puts s stdin.close end output = nil outh = Thread::new do output = stdout.read end inth.join outh.join s = output if $?.exitstatus != 0 @logger.warn("Filter command for #{feed.name} exited with status #{$?.exitstatus}. Output might be corrupted !") end mutex.unlock end if Time::now - fetch_start > F2I_WARNFETCHTIME @logger.info("Fetching feed #{feed.name} took #{(Time::now - fetch_start).to_i}s") end sparefetchers_mutex.synchronize do sparefetchers += 1 sparefetchers_cond.signal end mutex.lock feed.body = s @cache.set_last_check(feed.name, Time::now) else @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.") end mutex.unlock # dump if requested if @config.dumpdir mutex.synchronize do if feed.body fname = @config.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema File::open(fname, 'w') { |file| file.puts feed.body } end end end # dump this feed if requested if feed.dumpdir mutex.synchronize do if feed.body fname = feed.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema File::open(fname, 'w') { |file| file.puts feed.body } end end end rescue Timeout::Error mutex.synchronize do n = @cache.fetch_failed(feed.name) m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)" if n > @config.max_failures @logger.fatal(m) else @logger.info(m) end end rescue mutex.synchronize do n = @cache.fetch_failed(feed.name) m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)" if n > @config.max_failures @logger.fatal(m) else @logger.info(m) end end end end end ths.each { |t| t.join } @logger.info("Parsing and uploading ...") @config.feeds.each do |f| if f.body.nil? # means 304 @logger.debug("Feed #{f.name} did not change.") next end begin feed = FeedParser::Feed::new(f.body) rescue Exception n = @cache.parse_failed(f.name) m = "Error while parsing #{f.name}: #{$!} (failed #{n} times)" if n > @config.max_failures @logger.fatal(m) else @logger.info(m) end next end begin newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash) rescue @logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}") puts $!.backtrace next end @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG begin if !cacherebuild fn = f.name.gsub(/[^0-9A-Za-z]/,'') updateditems.each do |i| id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>" email = item_to_mail(@config, i, id, true, f.name, f.include_images, f.wrapto) f.imapaccount.updatemail(f.folder, email, id, i.date || Time::new, f.reupload_if_updated) end # reverse is needed to upload older items first (fixes gna#8986) newitems.reverse.each do |i| id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>" email = item_to_mail(@config, i, id, false, f.name, f.include_images, f.wrapto) f.imapaccount.putmail(f.folder, email, i.date || Time::new) end end rescue @logger.fatal("Exception caught while uploading mail to #{f.folder}: #{$!}") puts $!.backtrace @logger.fatal("We can't recover from IMAP errors, so we are exiting.") exit(1) end begin @cache.commit_cache(f.name) rescue @logger.fatal("Exception caught while updating cache for #{f.name}: #{$!}") next end end @logger.info("Finished. Saving cache ...") begin File::open("#{@config.cache}.new", 'w') { |f| @cache.save(f) } rescue @logger.fatal("Exception caught while writing new cache to #{@config.cache}.new: #{$!}") end begin File::rename("#{@config.cache}.new", @config.cache) rescue @logger.fatal("Exception caught while renaming #{@config.cache}.new to #{@config.cache}: #{$!}") end @logger.info("Closing IMAP connections ...") @config.imap_accounts.each_value do |ac| begin ac.disconnect rescue # servers tend to cause an exception to be raised here, hence the INFO level. @logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}") end end end end