#!/usr/bin/perl
# Author: [email protected]

# Description: This program designs primers for constructing knockouts
# of genes by transformation of PCR products (ref: Datsenko & Wanner,
# PNAS 2000).  A tab-delimited file containing ORF START STOP is read,
# and primers flanking the start & stop coordinates are designed based
# on the user-designated sequence file.  In addition, primers flanking
# the knockout regions are chosen for PCR screening purposes once the
# knockout is generated.  The script uses Bioperl in order to
# determine the primer sequences, which requires getting subsequences
# and reverse complementing some of the objects.

# make_primers.pl
# Purpose: Design primers for the Wanner method of PCR product-based knockouts
# Input: FASTA sequence file, tab-delimited coordinates file
# Output: Primer output file
# July 4, 2001
# Charles C. Kim

###########
# MODULES #
###########
use Bio::Seq;
use Getopt::Std;

#############
# VARIABLES #
#############
$upgap = 0; # the number of nt upstream of the 5' end to include in the deletion
$downgap = 0; # the number of nucleotides downstream of the 3' end to include
              # in the deletion
$oligolength = 40; # the length of the homologous region on each primer
$seqfile = '';   # don't specify these filenames unless you want to run
$coordfile = ''; # the program on these filenames exclusively
$outfile = '';   #
%fiveprime_primers = (
		      "P1" => "GTGTAGGCTGGAGCTGCTTC",
		      );
%threeprime_primers = (
		       "P2" => "CATATGAATATCCTCCTTAG",
		       "P4" => "ATTCCGGGGATCCGTCGACC",
		       );

#########
# FILES #
#########
getopts('s:c:o:');  # sequence file, coordinates file, output file

$seqfile = $opt_s if $opt_s;
$coordfile = $opt_c if $opt_c;
$outfile = $opt_o if $opt_o;

&open_readfile(*SEQFILE, 'sequence', $seqfile);
&open_readfile(*COORDFILE, 'coordinate', $coordfile);
&open_writefile(*PRIMERFILE, 'output', $outfile);

########
# MAIN #
########

$seq = '';
$count = 0;
while (<SEQFILE>) {
    if (/>/) {
	$count  ;
	if ($count > 1) {
	    die "More than one sequence present in the input file\n";
	}
	next;
    }
    chomp($_);
    $_ =~ tr/gatc/GATC/;
    $seq .= $_;
}
close SEQFILE;

$seq = Bio::Seq-> new('-seq'=>$seq );

while (<COORDFILE>) {
    chomp($_);
    next if !$_;
    (my $name, my $start, my $stop) = split(/\t/, $_);
    if ($start < $stop) {
	$upprimer = $seq->subseq($start-$oligolength-$upgap, $start-1-$upgap);
	$downprimer = $seq->subseq($stop 1 $downgap,$stop $oligolength $downgap);
	$downprimer = Bio::Seq->new('-seq'=>$downprimer);
	$downprimer = $downprimer->revcom();
	$downprimer = $downprimer->seq();
	$uppcr = $seq->subseq($start-$oligolength-$upgap-20,$start-1-$upgap-$oligolength);
	$downpcr = $seq->subseq($stop 1 $downgap $oligolength,$stop $oligolength $downgap 20);
	$downpcr = Bio::Seq->new('-seq'=>$downpcr);
	$downpcr = $downpcr->revcom();
	$downpcr = $downpcr->seq();
    }
    elsif ($start > $stop) {
	$upprimer = $seq->subseq($start $upgap 1,$start $oligolength $upgap);
	$downprimer = $seq->subseq($stop-$oligolength-$downgap, $stop-1-$downgap);
	$upprimer = Bio::Seq->new('-seq'=>$upprimer);
	$upprimer = $upprimer->revcom();
	$upprimer = $upprimer->seq();
	$uppcr = $seq->subseq($start $oligolength $upgap 1,$start $oligolength $upgap 20);
	$downpcr = $seq->subseq($stop-$oligolength-$downgap-20,$stop-1-$downgap-$oligolength);
	$uppcr = Bio::Seq->new('-seq'=>$uppcr);
	$uppcr = $uppcr->revcom();
	$uppcr = $uppcr->seq();
    }
    else { die "Problem with start and stop coordinates\n"; }
    print PRIMERFILE "$name\n";
    print PRIMERFILE "5'pcr\t$uppcr\n";
    print PRIMERFILE "3'pcr\t$downpcr\n";
    print PRIMERFILE "\tExpected wildtype product size: ",abs($start-$stop) 121," bp\n";
    foreach $entry (sort keys %fiveprime_primers) {
	print PRIMERFILE "5' $entry\t$upprimer$fiveprime_primers{$entry}\n";
    }
    foreach $entry (sort keys %threeprime_primers) {
	print PRIMERFILE "3' $entry\t$downprimer$threeprime_primers{$entry}\n";
    }
    print PRIMERFILE "\n";
    $upprimer = '';
    $downprimer = '';
    $uppcr = '';
    $downpcr = '';
}


###############
# SUBROUTINES #
###############

sub open_readfile {
    my $filehandle = $_[0];
    my $filetype = $_[1] if $_[1];
    my $filename = $_[2] if $_[2];
    unless ($filename) {
	print "Enter $filetype filename: ";
	chomp ($filename=<STDIN>);
    }
    unless (-e $filename) { die "$filename not found\n"; }
    open $filehandle,'<', $filename or die "Could not read file '$filename': $!\n";
    $filehandle = '';
    $filetype = '';
    $filename = '';
}

sub open_writefile {
    my $filehandle = $_[0];
    my $filetype = $_[1] if $_[1];
    my $filename = $_[2] if $_[2];
    unless ($filename) {
	print "Enter $filetype filename: ";
	chomp ($filename=<STDIN>);
    }
    if (-e $filename) {
	print "$filename already exists!  Overwrite (Y/N)? ";
	chomp ($_ = <STDIN>);
	while (/[^yn]/i) {
	    print 'Y or N, please: ';
	    chomp ($_ = <STDIN>);
	}
	if (/n/i) { die "$filename not overwritten.\n"; }
	else { open $filehandle, '>', $filename or die "Could nott write file '$filename': $!\n"; }
    }
    else { open $filehandle, '>', $filename or die "Could not write file '$filename': $!\n"; }
    $filehandle = '';
    $filetype = '';
    $filename = '';
}