#! /usr/local/bin/perl # map_boot_frame.perl: pull out specific loci from mapmaker file, with only one marker per binned position # Author: Steve DiFazio # Date: 5-2003 use Getopt::Long; my ($len,@data,@lg,@pos,$lg,$pos,$header,$ind,$m,$o,$loci,$allloc,$loc,$i,@sarray,%printed,$index); GetOptions("a" => \$allloc, "l:i" => \$loci); if(!($loci)) { $loci = 200; } if(!$ARGV[0] ) { die " _______________________________________________________________\n Usage: perl map_boot_frame.perl -l= > out.txt \n where \t is the number of loci to pull out of file (default 200), \t is mapmaker input file with all genotype info. \t is the name of the file to save the output \tinput data must contain locus lg pos genotypes\n ** You can choose unique names for these input and output files ** _______________________________________________________________\n\n"; } while(<>) { if(/^\*/) { # input data must contain locus lg position $_ =~ tr/[a-z]/[A-Z]/; (my @dat) = split(/\t/); if($len) { if($len != $#dat) { die "Line ", $. - 1," has $len data points and line $. has ",$#dat," points\n"; } } else { $len = $#dat; } push(@data,$_); ($loc,$lg,$pos) = split(/\t/,$_); push(@lg,$lg); push(@pos,$pos); } elsif (/\S/) { $header.= $_; } } print $header; my $rsamparray = samp_array($#data+1,$loci,\@lg,\@pos); #sort array to preserve map order in output file @sarray=sort by_num (@$rsamparray); foreach $i (0 .. $#sarray) { print $data[$sarray[$i]]; } # subsample array without replacement; argument is length of array to # sample and number of elements to pull out. Returns array of array indices sub samp_array { my ($len,$num,@indices,@outarray,$routarray,%pulled,$rlg,$rpos,$pulled); ($len,$num,$rlg,$rpos) = @_; if($num > $len) { die "ERROR: Trying to sample $num data points but only $len exist\n"; } foreach $i (0 .. $len) { $indices[$i] = $i; } foreach $i (1 .. $num) { # pull only one locus per position $pulled=0; while(!($pulled)) { $index = int(rand()*$#indices); if(!($pulled{$rlg->[$indices[$index]].":".$rpos->[$indices[$index]]})) { if(!($allloc)) { $pulled{$rlg->[$indices[$index]].":".$rpos->[$indices[$index]]} = 1; } push(@outarray,splice(@indices,$index,1)); $pulled=1; } } } return \@outarray; } sub by_num { $a <=> $b; }