#!/usr/bin/perl
#/*WordMake 0.1: Dictionary Maker based on text files input*/
#/*Takes as input raw text files an parses them as dictionary files*/
#/*Features: 
#		Select as many raw texts as you like*/
#		Choose words between a specified long*/
#		Choose an output File		 	*/
#		Sorts all the words, makes them lowercase and unique*/
#		
#/*That's all I was looking in order 		*/
#/*to make a better Spanish Dictionary		*/
#/*If you find it useful, drop me a line and if */
#/*you want send me your dictionary		*/
#/*ToDO: Better cmd line parsing, more regular expresion filters, etc*/
#/*Of course if you make it better, let me know			     */
#Author: Linga<linga@mailandnews.com>
#Date: 25-05-00
#Usage: ./wordmake.pl TEXT_FILE_1 TEXT_FILE_2 ... TEXT_FILE_N [-o OUTPUT_FILE] [-min MIN_WORD_LENGTH] [-max MAX_WORD_LENGTH]

#Defaults... May be changed in the command line
$min_long=4;
$max_long=8;
$fout="dict_clean.txt";

#begin parsing arguments at the command line...
$i=0;
$m=0;
while($ARGV[$i]){
    if($ARGV[$i] eq "-o"){
    $i++;
    $fout=$ARGV[$i];
    $i++;
    }
    elsif($ARGV[$i] eq "-min"){
    $i++;
    $long_min=$ARGV[$i];
    $i++;
    }
    elsif($ARGV[$i] eq "-max"){
    $i++;
    $long_max=$ARGV[$i];
    $i++;
    }
    else{
    $rawfiles[$m]=$ARGV[$i];
    $i++;$m++;
    }
}
open( DICT, ">$fout") or die "Can't open output file $fout...";
foreach $raw_dict (@rawfiles){
open( RAW,$raw_dict) or die "Could't open input file $raw_dict\n";
while (<RAW>) {
  for (split) {
   	$_=~tr/A-Z/a-z/;
	@a=m/\w{$min_long,$max_long}/g;
	if($a[0]){
	    $count{$a[0]}++;
	}
  }
}
close (RAW);
}
foreach $key (sort keys %count){
print DICT "$key\n";
}
close (DICT);
