# $Id: Debget.pm,v 1.3 2005-07-11 21:25:17 roderick Exp $
#
# Roderick Schertler <roderick@argon.org>

use strict;

# Copyright (C) 2005 Roderick Schertler
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# For a copy of the GNU General Public License write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

=head1 NAME

Debian::Debget - helper functions used by B<debget> et al

=head1 SYNOPSIS

XXX

=head1 DESCRIPTION

These are functions used by B<debget> and related scripts, but you can
use them, too.  I will do everything possible to retain backwards
compatibility for the interfaces documented here.

Most of these functions will C<die> if they're called in a context
(scalar, list) which doesn't match what they expect.  This is so that I
will be able to change them to return something different in the other
context without breaking old code.

=cut

package Debian::Debget;

use base qw(Exporter);

use Carp		  ();
use List::Util		qw(max);
use Memoize		qw(memoize);

our $VERSION = q$Revision: 1.3 $ =~ /(\d\S+)/ ? $1 : '?';

our (@EXPORT, @EXPORT_OK);

BEGIN {
    @EXPORT	= qw(
	binary_package_info
	cmp_debian_versions
	madison
	package_component
	package_source
	pool_dir
	source_package_info
	upstream_version
    );

    # XXX document these?  then you have to keep their interfaces
    @EXPORT_OK	= qw(
	$Debug
	$Debug_fh
	$Exit
	$Me
	debug
	debug_hash
	debug_nonl
	dstr
	get_uri
	getopt
	xconfess
	xcroak
	xdie
	xwarn
    );
}

use subs grep { /^[a-z]/ } @EXPORT;

# PDO == packages.debian.org

our $Pdo_uri_base	||= 'http://packages.debian.org';
our $Pdo_uri_binary	||= "$Pdo_uri_base/%s";
our $Pdo_uri_source	||= "$Pdo_uri_base/src:%s";

# XXX can LWP get a page in a different language?
our $Pdo_re_no_package = qr/Sorry, your search gave no results/;

(our $Me		= $0) =~ s|.*/||;
our $Debug		||= 0;
our $Debug_fh		||= \*STDERR;
our $Exit		||= 0;

sub dstr ($) {
    local $_ = shift;
    return 'undef' if !defined;
    return "$_" if ref;
    return "[$_]" if !/[\[\]\x00-\x1f\x7f-\xff]/;
    s/([%\[\]\x00-\x1f\x7f-\xff])/sprintf "%%%02X", ord $1/eg;
    return $_;
}

sub debugx {
    my $s = (caller 2)[3];
    $s =~ s/.*:://;
    print $Debug_fh "$s: ", @_;
}

sub debug_nonl {
    debugx @_
	if $Debug;
}

sub debug {
    debugx @_, "\n"
	if $Debug;
}

sub debug_hash {
    return unless $Debug;
    my ($prefix, $indent, %h) = @_;

    $indent = '' if !defined $indent;
    my $klen = max map { length dstr $_ } keys %h;

    debugx "$indent$prefix:\n";
    for my $k (sort keys %h) {
	debugx sprintf "%s  %-${klen}s => %s\n", $indent, dstr $k, dstr $h{$k};
    }
}

sub die_prefix {
    (my $program = $0) =~ s|.*/||;
    my @c = caller 2;
    my $sub = $c[0] eq 'main' ? '' : "$c[3]: ";
    return "$program: $sub";
}

sub xconfess	{ Carp::confess	die_prefix, @_ }
sub xcroak	{ Carp::croak	die_prefix, @_ }
sub xdie	{ die		die_prefix, @_ }
sub xwarn	{ warn		die_prefix, @_; $Exit ||= 1 }

# This is basically Getopt::Long but it has the defaults set up the way I
# think they should be.

sub getopt {
    # Don't bother if there aren't any switches.  This test works because
    # I'm setting $REQUIRE_ORDER.
    return 1 unless @ARGV && substr($ARGV[0], 0, 1) eq '-';

    my $bundling = 0;
    if (@_ && ($_[0] eq -bundle || $_[0] eq -bundling)) {
	$bundling = 1;
	shift;
    }

    {
	# I'm setting this environment variable when loading Getopt::Long
	# so that the defaults for options added later (which aren't set
	# explicitly below) are more likely to match what I'd like.
	local $ENV{POSIXLY_CORRECT} = 1;
	require Getopt::Long;
    }

    Getopt::Long->VERSION(2.19);
    Getopt::Long::Configure(
	'no_auto_abbrev',
	'no_getopt_compat',
	'require_order',
	$bundling ? 'bundling' : (),
	'no_ignore_case',
	'prefix_pattern=(--|-)',
    ) if 1;

    # The getopt function puts the vars into its caller's package so
    # it's necessary to jump to it so that its caller is my caller.
    goto &Getopt::Long::GetOptions;
}

#------------------------------------------------------------------------------

=head1 IMPORTABLES

=over 4

=cut

# package info from packages.debian.org ---------------------------------------

=item binary_package_info I<package>

This function returns some information about a binary package.

If the I<package> isn't found nothing is returned.  If it is found, the
return value is a data structure like this:

    $r->{$distribution}{$architecture} = $version;
    # $distribution => 'stable', 'experimental', etc.
    # $architecture => 'i386', 'powerpc', etc.

If something goes seriously wrong B<binary_package_info> will C<die>.

=item source_package_info I<package>

This function returns some information about a source package.

If the I<package> isn't found nothing is returned.  If it is found, the
return value is a data structure like this:

    $r->{$distribution} = [$version, @binary_package_list];
    # $distribution => 'stable', 'experimental', etc.
    # $version      => '123.4-5', etc.
    # @binary_package_list => qw(nethack-common nethack-x11), etc.

=cut

{ my $ua;
sub get_uri {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($uri) = @_;

    if ($Debug > 1) {
	require LWP::Debug;
	LWP::Debug::level('+');
    }

    if (!$ua) {
	require LWP::UserAgent;
	$ua = LWP::UserAgent->new(env_proxy => 1);
	$ua->env_proxy;
    }

    my $resp = $ua->get($uri);
    if (!$resp->is_success) {
    	xcroak "error getting $uri: ", $resp->status_line;
    }

    return $resp;
} }

# My current method of finding what version of a package is to parse a
# http://packages.debian.org query.

# This lacks some info I'd like
# (source for a binary package, component of either kind of package).  I'd
#
# also using xml
#
# mailed 1 guy, mail the other

# Originally I tried parsing the packages.debian.org data using
# HTML::TreeBuilder, but I ran into a problem described in
#
#    news:87oea1gp9l.fsf@eeyore.ibcinc.com
#    http://groups-beta.google.com/group/comp.lang.perl.modules/msg/b7c72820e4895b42
#
# I also sent it to Sean Burke, but haven't heard back.

sub parse_pdo_binary {
    @_ == 2
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($uname, $content) = @_;

    debug "parsing $uname from:\n", $content
	if $Debug > 1;

    my $p = "parse_pdo_binary(): failure parsing package web page for $uname: ";

#  <h3>Package gthumb</h3>
#  <ul>
#    <li class="etch"><a class="resultlink" href="/etch/gthumb">etch (stable)</a> (gnome):
#	an image viewer and browser
#      <br>3:2.8.0-1: alpha amd64 arm hppa i386 ia64 mips mipsel powerpc s390 sparc
#    </li>
#    <li class="etch-m68k"><a class="resultlink" href="/etch-m68k/gthumb">etch-m68k</a> (gnome):
#	an image viewer and browser
#      <br>3:2.8.0-1: m68k
#    </li>
#    <li class="lenny"><a class="resultlink" href="/lenny/gthumb">lenny (testing)</a> (gnome):
#	navigatore e visualizzatore di immagini
#      <br>3:2.10.8-1: alpha amd64 arm armel hppa i386 ia64 mips mipsel powerpc s390 sparc
#    </li>
#    <li class="sid"><a class="resultlink" href="/sid/gthumb">sid (unstable)</a> (gnome):
#	navigatore e visualizzatore di immagini
#      <br>3:2.10.8-1: alpha amd64 arm armel hppa hurd-i386 i386 ia64 kfreebsd-amd64 kfreebsd-i386 m68k mips mipsel powerpc s390 sparc
#    </li>
#    <li class="experimental"><a class="resultlink" href="/experimental/gthumb">experimental</a> (gnome):
#	an image viewer and browser
#      <br>3:2.11.0~svn2319-1+b1: alpha amd64 hppa i386 ia64 mipsel powerpc s390 sparc
#      <br>3:2.11.0~svn2319-1 [<strong class="pmarker" title="ports of packages to architectures not yet available in Debian">debports</strong>]: arm kfreebsd-amd64 kfreebsd-i386 mips
#    </li>
#  </ul>

    my %dist_arch_ver;
    while ($content =~ m|^\s*<li[^>]+><a[^>]+>([\w-]+)( \((.*)\))?</a>.+\n
    				\s*\w+.*\n
    				\s*\n
    				\s*<br>(.+)\n
    				\s*</li>\n|xmg) {
    # $1 = "sid"
    # $2 = " (unstable)"
    # $3 = "unstable"
    # $4 = "1.6: all"
	my ($dist, $version_lines) = ($1, $4);
	debug_nonl "dist=$dist version_lines:\n", $version_lines;

	while ($version_lines =~ m|^\s*(\S+):((?:\s+\w+)+)|mg) {
	    my ($ver, $arch_list) = ($1, $2);
	    my @arch = split ' ', $arch_list;

	    for my $arch (@arch) {
	    	debug "dist=$dist ver=$ver arch=$arch";
		if (exists $dist_arch_ver{$dist}{$arch}) {
		    # There's a second version listed for this distribution.
		    # I didn't expect this could happen, but it did:

# <li><a href="/oldstable/games/nethack">oldstable</a> (games): Overhead dungeon-c
# rawler game (dummy package)
# <br>3.4.0-3.0woody4: all
# <br>3.4.0-3.0woody3: all
# </li>

		    if (cmp_debian_versions($ver, $dist_arch_ver{$dist}{$arch})
		    	    <= 0) {
			debug "ignoring older version";
			next;
		    }
		    else {
			debug "using newer version";
		    }
		}
		$dist_arch_ver{$dist}{$arch} = $ver;
	    }
	}
	if (!$dist_arch_ver{$dist}) {
	    xdie $p, "no versions parsed for dist=$dist from:\n",
		$version_lines;
	}
    }

    if (!%dist_arch_ver) {
    	if ($content =~ /$Pdo_re_no_package/) {
	    return;
	}
	xdie $p, "main parse failed";
    }

    return \%dist_arch_ver;
}

sub parse_pdo_source {
    @_ == 2
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($uname, $content) = @_;

    debug "parsing $uname from:\n", $content
	if $Debug > 1;

    my $p = "parse_pdo_source(): failure parsing package web page for $uname: ";

#  <h3>Source Package gthumb</h3>
#  <ul>
#    <li><a class="resultlink" href="/source/etch/gthumb">etch</a> (gnome): 3:2.8.0-1
#    <br>Binary packages: <span id="js_gthumbetchus" class="p_js_elem"></span> <span id="html_gthumbetchus" class="binaries"><a href="/etch/gthumb">gthumb</a></span>
#    </li>
#    <li><a class="resultlink" href="/source/etch-m68k/gthumb">etch-m68k</a> (gnome): 3:2.8.0-1
#    <br>Binary packages: <span id="js_gthumbetch-m68kus" class="p_js_elem"></span> <span id="html_gthumbetch-m68kus" class="binaries"><a href="/etch-m68k/gthumb">gthumb</a></span>
#    </li>
#    <li><a class="resultlink" href="/source/lenny/gthumb">lenny</a> (gnome): 3:2.10.8-1
#    <br>Binary packages: <span id="js_gthumblennyus" class="p_js_elem"></span> <span id="html_gthumblennyus" class="binaries"><a href="/lenny/gthumb">gthumb</a>, <a href="/lenny/gthumb-data">gthumb-data</a></span>
#    </li>
#    <li><a class="resultlink" href="/source/sid/gthumb">sid</a> (gnome): 3:2.10.8-1
#    <br>Binary packages: <span id="js_gthumbsidus" class="p_js_elem"></span> <span id="html_gthumbsidus" class="binaries"><a href="/sid/gthumb">gthumb</a>, <a href="/sid/gthumb-data">gthumb-data</a></span>
#    </li>
#    <li><a class="resultlink" href="/source/experimental/gthumb">experimental</a> (gnome): 3:2.11.0~svn2319-1
#    <br>Binary packages: <span id="js_gthumbexperimentalus" class="p_js_elem"></span> <span id="html_gthumbexperimentalus" class="binaries"><a href="/experimental/gthumb">gthumb</a>, <a href="/experimental/gthumb-data">gthumb-data</a></span>
#    </li>
#  </ul>

    my %d;
	while ($content =~ m|^\s*<li><a[^>]+>([\w\(\)\ -]+)</a>
							\s+\(.*\):\s+(.*)\n
							\s+<br>Binary\s+packages:\s+<span[^>]+></span>
							\s+<span[^>]+>(<a[^>]+>.*</a>)</span>|xmg) {
	my ($dist, $ver, $binary_links) = ($1, $2, $3);
	debug "dist=$dist ver=$ver binary_links=$binary_links";

	if (exists $d{$dist}) {
    	    xdie $p, "duplicate entries for distribution $dist";
	}

    	my $binary_links_orig = $binary_links;
	my @binary_packages;
	while ($binary_links =~ s%^\s*
					(?:<a[^>]+>(\S+)</a>)
				    ,?%%x) {
	    push @binary_packages, defined $1 ? $1 : "";
	}
	#if ($binary_links =~ /\S/) {
    #	    xdie $p, "data left after bin parse: $binary_links";
    #	}
	if (!@binary_packages) {
	    xdie $p, "no binary packages parsed for dist=$dist ver=$ver from:\n",
		$binary_links_orig;
	}

    	$d{$dist} = [ $ver, \@binary_packages];
    }

    if (!%d) {
    	if ($content =~ /$Pdo_re_no_package/) {
	    return;
	}
	xdie $p, "main parse failed";
    }

    return \%d;
}

sub package_info_helper {
    @_ == 4
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($name, $type, $uri_fmt, $parser) = @_;

    debug "type=$type name=$name";

    defined $name
	# XXX shows memoize location
	or xcroak "undefined name";
    $name =~ /^\S+\z/
	or xcroak "invalid name `$name'";

    my $uri = sprintf $uri_fmt, $name;
    my $resp = get_uri $uri;
    return $parser->("$type $name", $resp->content);
}

sub binary_package_info {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($name) = @_;

    return package_info_helper $name,
	    'binary', $Pdo_uri_binary, \&parse_pdo_binary;
}

sub source_package_info {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($name) = @_;

    return package_info_helper $name,
	    'source', $Pdo_uri_source, \&parse_pdo_source;
}

#------------------------------------------------------------------------------

{

my $apt_cache;
my $apt_packages;

sub aptpkg_init {
    return if $apt_cache;

    require AptPkg::Cache;
    $apt_cache = AptPkg::Cache->new		or die;
    $apt_packages = $apt_cache->packages	or die;
}

# Return the source package for PACKAGE.

sub package_source {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($pname) = @_;

    aptpkg_init unless $apt_cache;

    my $r = $apt_packages->lookup($pname);
    if (!$r) {
    	# It wasn't found in the packages cache.  It might not exist,
	# or it might be a source package name, or it might be either
	# source or binary but not in the cache.
	#
	# XXX you could try AptPkg::Source here first
	if (source_package_info $pname) {
	    # it's a source package
    	    debug "$pname -> $pname (via pdo)";
    	    return $pname;
    	}
	# XXX a binary package name not found in the cache fails, need
    	# to extend pdo to tell you the source package name for a binary
	debug "$pname -> not found";
	return;
    }
    my $s = defined $r->{SourcePkg} ? $r->{SourcePkg} : $pname;
    debug "$pname -> $s";
    return $s;
}
memoize 'package_source';

# Return the component (main, contrib, etc.) for PACKAGE.
#
# XXX only works on binary packages

sub package_component {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($pname) = @_;

    aptpkg_init unless $apt_cache;

    my $p = $apt_cache->{$pname};
    if (!$p) {
	debug "$pname -> not found";
	return;
    }
    debug_hash "package $pname", '', %$p if $Debug > 1;

    my $vls = $p->{VersionList};
    if (!$vls) {
	debug "$pname -> no VersionList";
	return;
    }

    for my $vl (@$vls) {
    	debug_hash "version", '  ', %$vl if $Debug > 1;
    	my $fls = $vl->{FileList};
	if (!$fls) {
	    debug "no FileList";
	    next;
	}
	for my $fl (@$fls) {
	    my $f = $fl->{File};
	    if (!$f) {
		debug "no File";
		next;
	    }
	    debug_hash "file", '    ', %$f if $Debug > 1;
	    my $o = $f->{Origin};
	    if (!defined $o || $o ne 'Debian') {
		debug "non-Debian";
		next;
	    }
	    my $c = $f->{Component};
	    if (!$c) {
		debug "no Component";
		next;
	    }
	    debug "$pname -> $c";
	    return $c;
	}
    }

    debug "$pname -> component not found";
    return;
}
memoize 'package_component';

}

#------------------------------------------------------------------------------

{

my $vs;

sub version_init {
    return if $vs;

    require AptPkg::Config;

    # 2 extra loads required, bug #316768
    require AptPkg::System;
    require AptPkg::Version;

    $vs = $AptPkg::Config::_config->system->versioning
	or xdie "failed to get AptPkg::Version object";
}

=item cmp_debian_versions I<v1>, I<v2>

This is like C<cmp> for Debian package version numbers.  It is prototyped
to take 2 scalar arguments, so it can be used as-is as a C<sort> comparison
function.

=cut

sub cmp_debian_versions ($$) {
    !wantarray
	or xconfess 'called in wrong context';
    version_init unless $vs;

    return $vs->compare($_[0], $_[1]);
}

=item upstream_version I<debian-version>

This returns the upstream version extracted from the given I<debian-version>

=cut

sub upstream_version {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($deb_version) = @_;

    version_init unless $vs;
    return $vs->upstream($deb_version);
}

}

#------------------------------------------------------------------------------

sub madison {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($name) = @_;

    my $rbinary = binary_package_info $name;
    my $rsource = (package_source($name) eq $name)
		    ? source_package_info $name
		    : undef;

    my $fmt = "%10s | %10s | %13s | %s\n";

    my %ver_dist;
    # $ver_dist{$ver}{$dist} = \@arch;

    if ($rbinary) {
	my %dist_arch = %$rbinary;

	# break down by ver/dist/arch

	my %d;
	for my $dist (keys %dist_arch) {
	    while (my ($arch, $ver) = each %{ $dist_arch{$dist} }) {
		$d{$ver}{$dist}{$arch} = 1;
	    }
	}

	# add sorted arch list to %ver_dist

	for my $ver (keys %d) {
	    for my $dist (keys %{ $d{$ver} }) {
	    	push @{ $ver_dist{$ver}{$dist} },
			sort keys %{ $d{$ver}{$dist} };
	    }
	}
    }

    if ($rsource) {
    	for my $dist (keys %$rsource) {
	    my $ver = $rsource->{$dist}[0];
	    unshift @{ $ver_dist{$ver}{$dist} }, 'source';
	}
    }

    return \%ver_dist;
}

=item pool_dir I<package>

This returns the pool directory (starting at the C<pool/> level) in
which files for I<package> are stored.  I<package> can be either a
binary or source package name.

=cut

sub pool_dir {
    @_ == 1
	or xconfess "wrong number of args (", 0+@_, ")";
    !wantarray
	or xconfess 'called in wrong context';
    my ($package) = @_;

    my $psource = package_source $package
	or return;
    my $component = package_component $package
	or return;

    my $sub = $psource =~ /^(lib.)/ ? $1 : substr $psource, 0, 1;
    return "pool/$component/$sub/$psource";
}

#------------------------------------------------------------------------------

=back

=head1 SEE ALSO

debget(1), AptPkg(3pm)

=head1 AUTHOR

Roderick Schertler <roderick@argon.org>

=cut

1
