#!/usr/bin/env ruby
#
# -*- ruby -*-
#
# Searches the google cache
#
# See file for options
#
# Created: Fri May 11 06:20:36 2007
#
require 'net/http'
require 'cgi'
require 'getoptlong'
def log(msg)
STDERR.puts "*** " + msg
end
def scan(page, re, host)
log(page + "...")
h = Net::HTTP.new(host, 80)
resp, data = h.get(page, nil)
if resp.message == "OK"
lines = data.split /
]+>/, "")
res = res.gsub(/&[^;]+;/, "")
return res
end
def parse_args
opts = GetoptLong.new(
[ "--html", "-h", GetoptLong::NO_ARGUMENT ],
[ "--links", "-l", GetoptLong::NO_ARGUMENT ],
[ "--titles", "-t", GetoptLong::NO_ARGUMENT ]
)
opts.each do |opt, arg|
$html = true if opt == "--html" || opt == "-h"
$links = true if opt == "--links" || opt == "-l"
$titles = true if opt == "--titles" || opt == "-t"
end
end
########## Main ##########
$html = false
$links = false
$titles = false
parse_args()
$links = $titles = true if !$links and !$titles
ARGV.each do |arg|
start = 0
while true do
host = "www.google.com"
page = "/search?q=site:" + arg
page += "&start=" + start.to_s if start != 0
count = 0
scan(page, /
]*>(.*)<\/a><\/h2>.*([^\s]+)\s+-[^<]*<\/span>/mi, host) do |x|
title = no_tags x[0]
link = no_tags x[1]
link = "http://" + link if !/^http.*/.match link
if $html then
puts "" + title + "
"
elsif $links && $titles then
puts title + ":" + link
elsif $links
puts link
elsif $titles
puts title
end
count += 1
end
break if count == 0
start += 10
end
end