#! /usr/local/bin/perl # map_boot.perl: pull out specific loci from mapmaker file # Author: Steve DiFazio # Date: 5-2003 use Getopt::Long; GetOptions( "l:i" => \$loci, "r" => \$repuls); if(!($loci)) { $loci = 200; } if(!$ARGV[0] ) { die " _______________________________________________________________\n Usage: perl map_boot.perl -l= > out.txt \n where \t is the number of loci to pull out of file (default 200), \t-r allows processing of markers in coupling and repulsion phase, \t is mapmaker input file with all genotype info. \t is the name of the file to save the output\n ** You can choose unique names for these input and output files ** _______________________________________________________________\n\n"; } my ($len,@data,@header,$ind,$m,$o); while(<>) { if(/^\*/) { $_ =~ tr/[a-z]/[A-Z]/; push(@data,$_); (my @all) = split(); if($len) { if($len != $#all) { die "Line ", $. - 1,"has $len data points and line $. has ",$#all," points\n"; } } else { $len = $#all; } $data{$all[0]} = $_; } else { push(@header,$_); } } for $i (0 .. $#header) { if($header[$i] !~ /^[0-9]/) { print $header[$i]; } else { ($ind,$m,$o) = split(/ /,$header[$i]); print "$ind ",$loci* ($repuls ? 2 : 1)," $o\n"; } } my $rsamparray = samp_array($#data,$loci); #sort array to preserve map order in output file @sarray=sort by_num (@$rsamparray); foreach $i (0 .. $#sarray) { if($repuls) { (my @all) = split(/\s+/,$data[$sarray[$i]]); if($all[0] =~ s/(.*)R$/$1/) { } else { $all[0].="R"; } print $data{$all[0]}; } print $data[$sarray[$i]]; } # subsample array without replacement; argument is length of array to # sample and number of elements to pull out. Returns array of array indices sub samp_array { my ($len,$num,@indices,@outarray,$routarray); ($len,$num) = @_; if($num > $len) { die "ERROR: Trying to sample $num data points but only $len exist\n"; } foreach $i (0 .. $len) { $indices[$i] = $i; } foreach $i (1 .. $num) { $index = int(rand()*$#indices); push(@outarray,splice(@indices,$index,1)); } return \@outarray; } sub by_num { $a <=> $b; }