#!/usr/bin/env ruby # # Uses search plugins to search the web. Keeps a local cache to # prevent lookup's every time. Type 'search' with no arguments for # options # # Examples: # # % search google mr potatohead # -> searches google.com for 'mr potatohead' # # % search imdb mr potatohead # -> searches imdb.com for 'mr potatohead' # # After finding a successful one, if you don't have the plugin name in # your path or aliases, it aliases 'search '. For example, in # the previous two examples, if 'google' and 'imdb' aren't already # commands, you could save 7 characters (one is a space) by just # typing: # # % google mr potatohead # -> searches google.com for 'mr potatohead' # # % imdb mr potatohead # -> searches imdb.com for 'mr potatohead' # require 'cgi' require 'date' require 'open-uri' require 'rexml/document' class Search def initialize @verbose = false end def main(argv) exit real_main argv end def real_main(argv) # print help? if argv.empty? print_usage return 1 end # options explain = false clean = false force = false real_args = [] # parse args argv.each do |arg| if arg == '-h' || arg == '-help' print_usage return 0 elsif arg == '-v' || arg == '-verbose' @verbose = true elsif arg == '-e' || arg == '-explain' explain = true elsif arg == '-c' || arg == '-clean' clean = true elsif arg == '-f' || arg == '-force' force = true else real_args << arg end end dir = init_database if clean note 'Cleaning cache' Dir[File.join dir,'*.xml'].each do |f| File.delete f end Dir.delete dir return 0 end plugin = real_args.shift query = real_args.join '+' if not plugin print_usage return 1 end # find the plugin and run it p = find_plugin plugin,dir,force if not p note 'Couldn\'t find plugin, returning' return 0 end note 'Have plugin \'' + p.to_s + '\'' # maybe just explain this plugin if explain puts p.explain return 1 end # make sure we have a query if query == '' STDERR.puts 'Missing query' print_usage return 1 end url = p.url query def open_in_browser(url) note 'Opening ' + url system 'open \'' + url + '\'' end # Try to find a search result plugin srp = find_search_result_plugin plugin if srp items = srp.search_results url while true items.each_index do |i| item = items[i] prefix = '[' + (i+1).to_s + '] ' puts prefix + item.title if item.title puts (' ' * prefix.length) + item.link if item.link puts (' ' * prefix.length) + item.description if item.description end print '1-' + items.length.to_s + ' or \'q\' to quit> ' STDOUT.flush line = STDIN.readline.strip.downcase break if !line || line == '' || line =~ /^q/ index = line.to_i if index < 1 || index > items.length STDERR.puts 'Invalid index, dummy' else open_in_browser items[index-1].link break end end else # # If we can't one, just open the browser (boo!) # open_in_browser url end # if don't have this plugin in our path or aliased, alias it def e(cmd,plugin) tmp = '.tmp' system cmd + ' ' + plugin + ' >& ' + tmp s = open(tmp).read File.delete tmp if File.exist? tmp return s end alias_str = e 'alias ',plugin which_str = e 'which ',plugin if alias_str =~ /not found$/ && which_str == '' dot_profile = File.join ENV['HOME'],'.profile' # # Check to see if it exists # already_in_dot_profile = false if File.exist? dot_profile IO.foreach dot_profile do |line| if line =~ /^alias #{plugin}/ already_in_dot_profile = true end end end if already_in_dot_profile note 'Already in ' + dot_profile else note 'Writing alias to ' + dot_profile mode = File.exist?(dot_profile) ? 'a' : 'w' File.open dot_profile,mode do |out| out.write "\n" + 'alias ' + plugin + '=\'' + File.basename($0) + ' ' + plugin + '\'' + "\n" end end end return 0 end private def find_search_result_plugin(plugin) # # Right now only deal with google. . . # if plugin == 'google' return ResultPlugin.new plugin,GOOGLE_RESULT_PLUGIN_XML end return nil end GOOGLE_RESULT_PLUGIN_XML = < HERE class SearchResultItem attr_reader :title, :link, :description def initialize(title,link,description) @title = title @link = link @description = description end end class CompiledSearchResult def initialize(template) @template = template @variable_bindings = [] compile end # String[url] -> List[SearchResultItem]: # Returns a list of search results after having compiled our template def find_items(url) items = [] # We're not going to use XPATH or XML, because most fo the pages # don't conform, and the XML parser is too anal html = open(url).read html = html.gsub /\n/,'' puts @re html.scan Regexp.new(@re) do |res| # Our variable bindings are in order, so the names of the # variables map to indices in the res, here title = captured_result res,'title' link = captured_result res,'link' description = captured_result res,'description' items << SearchResultItem.new(title,link,description) end return items end private def removeHTMLTags(s) return s.gsub /<[^>]+>/,'' end def captured_result(res,name) index = @variable_bindings.index name return index ? removeHTMLTags(CGI.unescapeHTML(res[index])) : nil end def compile # # Convert the given template into something that uses a regular # expression to do capture and replace. We'll have input like: # #

{title:[^<]+}

{description:[^<]+}< # # --> # #

([^<]+)

([^<]+)< # # Here we want to bind the link variable to the 1st captured # result, title to the 2nd, etc # # First escape all the backlslashes, because we're going from an # XML-decent string into a real regular expression t = @template # Create the replacements, where we'll bind variable names to # the order of capture Also, replace the variable capture # expressions in the given to real regular expressions. This # will involve just removing the variable capture names, and # turning the brackets into parents. This seems pretty ghetto. real_regex = t t.scan /\{(\w+):([^\}]+)\}/ do |res| var,regex = res @variable_bindings << var # Don't use the actual regex in the replacement here, because # we're going from a string to regex again, and there'd be too # much backslashing and ball-crushing conversion, again real_regex = real_regex.gsub /\{#{var}\s*:[^\}]+\}/, '(' + regex + ')' end # Use the real regular expression we created, then we'll use the # bindings to bind the names of the results to the capture @re = real_regex end end class ResultPlugin attr_reader :name def initialize(name,xml) @xml = xml @name = name end # String[url] -> List[SearchResultItem] def search_results(url) # First compile the conversion they have provided template = nil doc = REXML::Document.new @xml doc.elements.each '//Conversion/' do |el| template = el.attribute('template').to_s end if not template STDERR.puts 'Couldn\'t find template' return nil end return search_results_from_template template,url end def search_results_from_template(template,url) ct = CompiledSearchResult.new template return ct.find_items url end end class SearchPlugin attr_reader :name def initialize(name,xml) @xml = xml @name = name end def url(query) doc = REXML::Document.new @xml doc.elements.each '//Url/' do |el| url = el.attribute('template').to_s url = url.gsub /\{searchTerms\}/, query url = url.gsub /&/, '&' return url end end def explain doc = REXML::Document.new @xml doc.elements.each '//Description/' do |el| return el.to_s.gsub /<[^>]+>/, '' end end def to_s name end end def find_plugin plugin,dir,force note 'Finding plugin \'' + plugin + '\'' # Check to see if we have it in the cache f = File.join dir,plugin + '.xml' if File.exist? f if force note 'Skipping cached version' else note 'Found a cached version' return SearchPlugin.new plugin,open(f).read end end # Look up rfom searchplugins.net and choose the best match note 'Searching searchplugins.net' url = 'http://www.searchplugins.net/pluginlist.aspx?q=' + URI.encode(plugin) + '&mode=title' get_html(url).scan /S<\/a>/ do |res| xml = get_html res[0] File.open f,'w' do |out| out.write xml end return SearchPlugin.new plugin,xml end return nil end def get_html(url) note 'Opening ' + url return open(url).read end def init_database dir = File.join ENV['HOME'],'.searchdb' Dir.mkdir dir if not File.exist? dir return dir end def note(msg) STDERR.puts msg if @verbose end def print_usage STDERR.puts 'Usage: ' + File.basename($0) + ' [options] ' STDERR.puts 'where options include' STDERR.puts ' -h || -help print this message' STDERR.puts ' -v || -verbose be loud' STDERR.puts ' -e || -explain prints a description of the plugin' STDERR.puts ' -c || -clean clean database of plugins' STDERR.puts ' -f || -force force a network lookup of plugin' end end Search.new.main ARGV