#!/usr/bin/env ruby # # -*- ruby -*- # # Searches the google cache # # See file for options # # Created: Fri May 11 06:20:36 2007 # require 'net/http' require 'cgi' require 'getoptlong' def log(msg) STDERR.puts "*** " + msg end def scan(page, re, host) log(page + "...") h = Net::HTTP.new(host, 80) resp, data = h.get(page, nil) if resp.message == "OK" lines = data.split /
]+>/, "") res = res.gsub(/&[^;]+;/, "") return res end def parse_args opts = GetoptLong.new( [ "--html", "-h", GetoptLong::NO_ARGUMENT ], [ "--links", "-l", GetoptLong::NO_ARGUMENT ], [ "--titles", "-t", GetoptLong::NO_ARGUMENT ] ) opts.each do |opt, arg| $html = true if opt == "--html" || opt == "-h" $links = true if opt == "--links" || opt == "-l" $titles = true if opt == "--titles" || opt == "-t" end end ########## Main ########## $html = false $links = false $titles = false parse_args() $links = $titles = true if !$links and !$titles ARGV.each do |arg| start = 0 while true do host = "www.google.com" page = "/search?q=site:" + arg page += "&start=" + start.to_s if start != 0 count = 0 scan(page, /]*>(.*)<\/a><\/h2>.*([^\s]+)\s+-[^<]*<\/span>/mi, host) do |x| title = no_tags x[0] link = no_tags x[1] link = "http://" + link if !/^http.*/.match link if $html then puts "" + title + "
" elsif $links && $titles then puts title + ":" + link elsif $links puts link elsif $titles puts title end count += 1 end break if count == 0 start += 10 end end