/usr/bin/homescrape is in mpdcron 0.3+git20110303-6build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | #!/usr/bin/env ruby
# vim: set sw=2 sts=2 tw=100 et nowrap fenc=utf-8 :
# Copyright 2010 Ali Polatel <alip@exherbo.org>
# Distributed under the terms of the GNU General Public License v2
%w{getoptlong net/http time uri rubygems nokogiri}.each {|m| require m }
begin
  require 'chronic'
  has_chronic = true
rescue LoadError
  has_chronic = false
end
MYNAME = File.basename $0, ".rb"
MYVERSION = "0.3" + ""
class UserNotFound < StandardError; end
class Scraper
  LASTFM_URL = 'http://www.last.fm/user/%s/tracks'
  LASTFM_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
  attr_accessor :username, :url
  def initialize username
    @username = username
    @url = sprintf(LASTFM_URL, username)
    # Set up proxy
    @proxy_url = URI.parse(ENV['http_proxy']) if ENV['http_proxy']
    @proxy_host = @proxy_url.host if @proxy_url and @proxy_url.host
    @proxy_port = @proxy_url.port if @proxy_url and @proxy_url.port
    @proxy_user, @proxy_pass = @proxy_url.userinfo.split(/:/) if @proxy_url and @proxy_url.userinfo
  end
  def fetch since, page=1, &block
    uri = URI.parse(@url + "?page=#{page}")
    req = Net::HTTP::Get.new(uri.request_uri)
    res = Net::HTTP::Proxy(@proxy_host, @proxy_port,
                           @proxy_user, @proxy_pass).start(uri.host, uri.port) {|http|
      http.request(req)
    }
    data = res.body
    raise UserNotFound if data =~ /User not found/
    doc = Nokogiri::HTML data
    if page == 1
      if doc.css('a.lastpage').length != 0
        @lastpage = doc.css('a.lastpage')[0].content.to_i
      else
        @lastpage = 1
      end
    end
    tags = doc.xpath(<<-EOF)
    //tr[
          td[@class="subjectCell"]
      and td[@class="lovedCell"]
      and td[@class="dateCell last"]
    ]
EOF
    tags.each do |tag|
      subjectCell = tag.children[2]
      lovedCell = tag.children[4]
      dateCell = tag.children[8]
      artist = subjectCell.children[1].content
      title = subjectCell.children[3].content
      love = lovedCell.children[1] ? true : false
      date = Date.strptime(dateCell.at('//abbr/@title').to_s, LASTFM_DATE_FORMAT)
      return if since > date
      block.call artist, title, love
    end
    if page <= @lastpage
      fetch since, page + 1, &block
    end
  end
end
def usage out, code
  out.puts <<HELP
#{MYNAME} -- import last.fm data
Usage: #{MYNAME} [OPTIONS] USERNAME
Options:
  --help, -h      Display help and exit
  --version, -V   Display version and exit
  --since, -s     Import data since the given date
HELP
  exit code
end
def quote src
  src.gsub(/'/, "''").gsub(/"/, "\\\"")
end
opts = GetoptLong.new(
    [ '--help',          '-h', GetoptLong::NO_ARGUMENT ],
    [ '--version',       '-V', GetoptLong::NO_ARGUMENT ],
    [ '--since',         '-s', GetoptLong::REQUIRED_ARGUMENT ])
$since = Date.parse(Time.at(0).to_s)
opts.each do |opt, arg|
  case opt
  when '--help'
    usage($stdout, 0)
  when '--version'
    puts "#{MYNAME}-#{MYVERSION}"
    exit 0
  when '--since'
    $since = has_chronic ? Date.parse(Chronic.parse(arg).to_s) : Date.parse(Time.parse(arg).to_s)
  end
end
usage($stderr, 1) if ARGV.empty?
importer = Scraper.new ARGV[0]
importer.fetch($since) do |artist, title, love|
  cmd = "eugene %s \"artist='#{quote(artist)}' and title='#{quote(title)}'\""
  puts "* " + sprintf(cmd, 'count 1')
  system sprintf(cmd, 'count 1')
  if love
    puts "* " + sprintf(cmd, 'love')
    system sprintf(cmd, 'love')
  end
  cmd_artist = "eugene count --artist 1 \"name='#{quote(artist)}'\""
  puts "* " + cmd_artist
  system cmd_artist
end
 |