#! /usr/bin/perl

use warnings;

use IO::File;
use Getopt::Std;
undef $opt_g;			# gunzip each file
getopts("g:");


foreach $file (@ARGV) {
    if (defined($opt_g)) {
	$FP = new IO::File("gunzip -c $file |");
    } else {
	$FP = new IO::File($file) or die "Cannot open $file";
    }

    while (defined($line = <$FP>)) {
	if ($line =~ /^\#/) {
	    # Skip comment
	} else {
	    chop $line;
	    @fields = split /\t/,$line;
	    if ($fields[2] eq "SNV") {
		$chr = $fields[0];
		$chrpos = $fields[3];	# GVF file is 1-based
		
		undef $rsid;
		undef $alleleA;
		undef $alleleB;
		
		foreach $item (split ";",$fields[8]) {
		    if ($item =~ /Dbxref=(\S+)/) {
			$rsid = $1;
			$rsid =~ s/.*://;
		    } elsif ($item =~ /Reference_seq=(.)/) {
			$alleleA = $1;
		    } elsif ($item =~ /Variant_seq=(.)/) {
			$alleleB = $1;
		    }
		}
		if (($snp_strand = $fields[6]) eq "-") {
		    $alleleA = $revcomp{$alleleA};
		    $alleleB = $revcomp{$alleleB};
		}
		if ($alleleA le $alleleB) {
		    $snp_type = $alleleA . $alleleB;
		} else {
		    $snp_type = $alleleB . $alleleA;
		}
		
		if (!defined($rsid)) {
		    # Skip
		} elsif ($snp_strand ne "+" && $snp_strand ne "=") {
		    # Skip
		} elsif (!defined($alleleA) || !defined($alleleB)) {
		    # Skip
		} elsif (!defined($acgt{$alleleA}) || !defined($acgt{$alleleB})) {
		    print STDERR "$rsid has alleles $fields[9] with non-ACGT character\n";
		} else {
		    print ">$rsid $chr:$chrpos $snp_type $snp_strand\n";
		}
	    }
	}
    }

    close($FP);
}

exit;


BEGIN {
    %acgt = ("A" => 1, "C" => 1, "G" => 1, "T" => 1);

    %revcomp = ("A" => "T", "C" => "G", "G" => "C", "T" => "A");
}


