require 'rubygems' require 'net/http' require 'cgi' class Twitter TWITTER_SEARCH_API_URL = 'http://search.twitter.com/search.atom' DEFAULT_TIMEOUT = 5 HEADERS = { "Content-Type" => 'application/rss+xml', "User-Agent" => 'twitter-search' } def self.search(opts) search_for = '#twitter' url = URI.parse(TWITTER_SEARCH_API_URL) url.query = sanitize_query(opts) ensure_no_location_operators(url.query) req = Net::HTTP::Get.new(url.path) http = Net::HTTP.new(url.host, url.port) http.read_timeout = DEFAULT_TIMEOUT res = http.start { |h| h.get("#{url.path}?#{url.query}", HEADERS) } if res.code == '404' raise "Twitter responded with a 404 for your query" end self.parse_search(res.body) end def self.sanitize_query(opts) if opts.is_a? String "q=#{CGI.escape(opts)}" elsif opts.is_a? Hash "#{sanitize_query_hash(opts)}" else raise "sanitique_query expects a String or a Hash" end end def self.sanitize_query_hash(query_hash) query_hash.collect{|key, value| "#{CGI.escape(key.to_s)}=#{CGI.escape(value.to_s)}" }.join('&') end def self.ensure_no_location_operators(query_string) if query_string.include?("near%3A") || query_string.include?("within%3A") raise "near: and within: are available from the Twitter Search web interface, but not the API. The API requires the geocode parameter. See dancroak/twitter-search README." end end def self.parse_search(body) doc = Hpricot.parse(body) entries = [] items = (doc/:entry) items.each do |raw_item| entry = {} entry[:id] = raw_item.at('id').inner_html entry[:published] = raw_item.at('published').inner_html entry[:updated] = raw_item.at('updated').inner_html entry[:title] = raw_item.at('title').inner_html entry[:content] = Hpricot(raw_item.at('content').inner_html, :xhtml_strict => true).to_plain_text entry[:source] = raw_item.at('twitter:source').inner_html entry[:lang] = raw_item.at('twitter:lang').inner_html entry[:links] = [] raw_item.search('link').each do |s| link = {} link[:href] = s['href'] link[:type] = s['type'] link[:rel] = s['rel'] entry[:links] << link end entry[:author] = {} raw_item.search('author').each do |a| entry[:author][:name] = a.at('name').inner_html entry[:author][:uri] = a.at('uri').inner_html end entries << entry end entries end end