#!/usr/bin/perl -w # Generate a script to run oggenc for a Compact Disc's worth of tracks # [JNZ] Modified 29-Mar-2011 # Syntax: # make-oggenc-script num "artist" "album" # where # num is the number of tracks # "artist" is the artist # "album" is the album title # The program then reads num titles from standard input (or until EOF) # The output is a script called oggenc--artist--album use strict; # Enforce better programming habits use locale; # Allow locale-specific sorting, etc. use utf8; # This script uses UTF-8 character set use open ":locale"; # Use locale for standard input/output use charnames ":full"; use IO::Handle; use Encode qw(decode_utf8); # Standard Encode package use Cwd qw(getcwd); if ($#ARGV != 2) { die "$0: insufficient or incorrect arguments\n\n" . "Usage:\n" . " $0 num \"artist\" \"album\"\n" . "where\n" . " num is the number of tracks\n" . " \"artist\" is the artist\n" . " \"album\" is the album title\n" . "The program then reads num titles from standard input (or until EOF)\n" . "The output is a script called oggenc--artist--album\n"; } # Encode an integer (0x80 to 0xFF) as a UTF-8 string sub u ($) { return pack("U", $_[0]); } # Transform a string into lowercase ASCII filename equivalent sub transform ($$$) { my $in = $_[0]; my $fn = $_[1]; my $line = $_[2]; my $c; $in =~ s/ /-/g; # Replace spaces with a dash $in =~ s/_/-/g; # Replace underscores with a dash $in =~ s/:/-/g; # Replace colons with a dash $in =~ s/(\p{UppercaseLetter})/\l$1/g; # Make lowercase $c = u(0xDF); $in =~ s/$c/ss/g; # Latin small letter Sharp S $c = u(0xE0); $in =~ s/$c/a/g; # Latin small letter A with grave $c = u(0xE1); $in =~ s/$c/a/g; # Latin small letter A with acute $c = u(0xE2); $in =~ s/$c/a/g; # Latin small letter A with circumflex $c = u(0xE3); $in =~ s/$c/a/g; # Latin small letter A with tilde $c = u(0xE4); $in =~ s/$c/a/g; # Latin small letter A with diaresis $c = u(0xE5); $in =~ s/$c/a/g; # Latin small letter A with ring above $c = u(0xE6); $in =~ s/$c/ae/g; # Latin small letter AE $c = u(0xE7); $in =~ s/$c/c/g; # Latin small letter C with cedilla $c = u(0xE8); $in =~ s/$c/e/g; # Latin small letter E with grave $c = u(0xE9); $in =~ s/$c/e/g; # Latin small letter E with acute $c = u(0xEA); $in =~ s/$c/e/g; # Latin small letter E with circumflex $c = u(0xEB); $in =~ s/$c/e/g; # Latin small letter E with diaresis $c = u(0xEC); $in =~ s/$c/i/g; # Latin small letter I with grave $c = u(0xED); $in =~ s/$c/i/g; # Latin small letter I with acute $c = u(0xEE); $in =~ s/$c/i/g; # Latin small letter I with circumflex $c = u(0xEF); $in =~ s/$c/i/g; # Latin small letter I with diaresis $c = u(0xF0); $in =~ s/$c/eth/g; # Latin small letter ETH $c = u(0xF1); $in =~ s/$c/n/g; # Latin small letter N with tilde $c = u(0xF2); $in =~ s/$c/o/g; # Latin small letter O with grave $c = u(0xF3); $in =~ s/$c/o/g; # Latin small letter O with acute $c = u(0xF4); $in =~ s/$c/o/g; # Latin small letter O with circumflex $c = u(0xF5); $in =~ s/$c/o/g; # Latin small letter O with tilde $c = u(0xF6); $in =~ s/$c/o/g; # Latin small letter O with diaresis $c = u(0xF8); $in =~ s/$c/o/g; # Latin small letter O with stroke $c = u(0xF9); $in =~ s/$c/u/g; # Latin small letter U with grave $c = u(0xFA); $in =~ s/$c/u/g; # Latin small letter U with acute $c = u(0xFB); $in =~ s/$c/u/g; # Latin small letter U with circumflex $c = u(0xFC); $in =~ s/$c/u/g; # Latin small letter U with diaresis $c = u(0xFD); $in =~ s/$c/y/g; # Latin small letter Y with acute $c = u(0xFE); $in =~ s/$c/th/g; # Latin small letter THORN $c = u(0xFF); $in =~ s/$c/y/g; # Latin small letter Y with diaresis $in =~ s/\x{0101}/a/g; # Latin small letter A with macron $in =~ s/\x{0103}/a/g; # Latin small letter A with breve $in =~ s/\x{0105}/a/g; # Latin small letter A with ogonek $in =~ s/\x{0107}/c/g; # Latin small letter C with acute $in =~ s/\x{0109}/c/g; # Latin small letter C with circumflex $in =~ s/\x{010B}/c/g; # Latin small letter C with dot above $in =~ s/\x{010D}/c/g; # Latin small letter C with caron $in =~ s/\x{010F}/d/g; # Latin small letter D with caron $in =~ s/\x{0111}/d/g; # Latin small letter D with stroke $in =~ s/\x{0113}/e/g; # Latin small letter E with macron $in =~ s/\x{0115}/e/g; # Latin small letter E with breve $in =~ s/\x{0117}/e/g; # Latin small letter E with dot above $in =~ s/\x{0119}/e/g; # Latin small letter E with ogonek $in =~ s/\x{011B}/e/g; # Latin small letter E with caron $in =~ s/\x{011D}/g/g; # Latin small letter G with circumflex $in =~ s/\x{011F}/g/g; # Latin small letter G with breve $in =~ s/\x{0121}/g/g; # Latin small letter G with dot above $in =~ s/\x{0123}/g/g; # Latin small letter G with cedilla $in =~ s/\x{0125}/h/g; # Latin small letter H with circumflex $in =~ s/\x{0127}/h/g; # Latin small letter H with stroke $in =~ s/\x{0129}/i/g; # Latin small letter I with tilde $in =~ s/\x{012B}/i/g; # Latin small letter I with macron $in =~ s/\x{012D}/i/g; # Latin small letter I with breve $in =~ s/\x{012F}/i/g; # Latin small letter I with ogonek $in =~ s/\x{0131}/i/g; # Latin small letter dotless I $in =~ s/\x{0133}/ij/g; # Latin small ligature IJ $in =~ s/\x{0135}/j/g; # Latin small letter J with circumflex $in =~ s/\x{0137}/k/g; # Latin small letter K with cedilla $in =~ s/\x{0138}/kr/g; # Latin small letter KRA $in =~ s/\x{013A}/l/g; # Latin small letter L with acute $in =~ s/\x{013C}/l/g; # Latin small letter L with cedilla $in =~ s/\x{013E}/l/g; # Latin small letter L with caron $in =~ s/\x{0140}/l/g; # Latin small letter L with middle dot $in =~ s/\x{0142}/l/g; # Latin small letter L with stroke $in =~ s/\x{0144}/n/g; # Latin small letter N with acute $in =~ s/\x{0146}/n/g; # Latin small letter N with cedilla $in =~ s/\x{0148}/n/g; # Latin small letter N with caron $in =~ s/\x{0149}/n/g; # Latin small letter N preceded by apostrophe $in =~ s/\x{014B}/ng/g; # Latin small letter ENG $in =~ s/\x{014D}/o/g; # Latin small letter O with macron $in =~ s/\x{014F}/o/g; # Latin small letter O with breve $in =~ s/\x{0151}/o/g; # Latin small letter O with double acute $in =~ s/\x{0153}/oe/g; # Latin small ligature OE $in =~ s/\x{0155}/r/g; # Latin small letter R with acute $in =~ s/\x{0157}/r/g; # Latin small letter R with cedilla $in =~ s/\x{0159}/r/g; # Latin small letter R with caron $in =~ s/\x{015B}/s/g; # Latin small letter S with acute $in =~ s/\x{015D}/s/g; # Latin small letter S with circumflex $in =~ s/\x{015F}/s/g; # Latin small letter S with cedilla $in =~ s/\x{0161}/s/g; # Latin small letter S with caron $in =~ s/\x{0163}/t/g; # Latin small letter T with cedilla $in =~ s/\x{0165}/t/g; # Latin small letter T with caron $in =~ s/\x{0167}/t/g; # Latin small letter T with stroke $in =~ s/\x{0169}/u/g; # Latin small letter U with tilde $in =~ s/\x{016B}/u/g; # Latin small letter U with macron $in =~ s/\x{016D}/u/g; # Latin small letter U with breve $in =~ s/\x{016F}/u/g; # Latin small letter U with ring above $in =~ s/\x{0171}/u/g; # Latin small letter U with double acute $in =~ s/\x{0173}/u/g; # Latin small letter U with ogonek $in =~ s/\x{0175}/w/g; # Latin small letter W with circumflex $in =~ s/\x{0177}/y/g; # Latin small letter Y with circumflex $in =~ s/\x{017A}/z/g; # Latin small letter Z with acute $in =~ s/\x{017C}/z/g; # Latin small letter Z with dot above $in =~ s/\x{017E}/z/g; # Latin small letter Z with caron $in =~ s/\x{017F}/s/g; # Latin small letter long S $in =~ s/\x{0431}\x{0435}/be/g; # Cyrillic small letters BE + IE $in =~ s/\x{0432}\x{0435}/ve/g; # Cyrillic small letters VE + IE $in =~ s/\x{0433}\x{0435}/ge/g; # Cyrillic small letters GHE + IE $in =~ s/\x{0434}\x{0435}/de/g; # Cyrillic small letters DE + IE $in =~ s/\x{0436}\x{0435}/zhe/g; # Cyrillic small letters ZHE + IE $in =~ s/\x{0437}\x{0435}/ze/g; # Cyrillic small letters ZE + IE $in =~ s/\x{043A}\x{0435}/ke/g; # Cyrillic small letters KA + IE $in =~ s/\x{043F}\x{0435}/pe/g; # Cyrillic small letters PE + IE $in =~ s/\x{0441}\x{0435}/se/g; # Cyrillic small letters ES + IE $in =~ s/\x{0442}\x{0435}/te/g; # Cyrillic small letters TE + IE $in =~ s/\x{0446}\x{0435}/tse/g; # Cyrillic small letters TSE + IE $in =~ s/\x{0447}\x{0435}/che/g; # Cyrillic small letters CHE + IE $in =~ s/\x{0448}\x{0435}/she/g; # Cyrillic small letters SHA + IE $in =~ s/\x{0449}\x{0435}/shche/g; # Cyrillic small letters SHCHA + IE $in =~ s/\x{0430}/a/g; # Cyrillic small letter A $in =~ s/\x{0431}/b/g; # Cyrillic small letter BE $in =~ s/\x{0432}/v/g; # Cyrillic small letter VE $in =~ s/\x{0433}/g/g; # Cyrillic small letter GHE $in =~ s/\x{0434}/d/g; # Cyrillic small letter DE $in =~ s/\x{0435}/ye/g; # Cyrillic small letter IE $in =~ s/\x{0436}/zh/g; # Cyrillic small letter ZHE $in =~ s/\x{0437}/z/g; # Cyrillic small letter ZE $in =~ s/\x{0438}/i/g; # Cyrillic small letter I $in =~ s/\x{0439}/y/g; # Cyrillic small letter Short I $in =~ s/\x{043A}/k/g; # Cyrillic small letter KA $in =~ s/\x{043B}/l/g; # Cyrillic small letter EL $in =~ s/\x{043C}/m/g; # Cyrillic small letter EM $in =~ s/\x{043D}/n/g; # Cyrillic small letter EN $in =~ s/\x{043E}/o/g; # Cyrillic small letter O $in =~ s/\x{043F}/p/g; # Cyrillic small letter PE $in =~ s/\x{0440}/r/g; # Cyrillic small letter ER $in =~ s/\x{0441}/s/g; # Cyrillic small letter ES $in =~ s/\x{0442}/t/g; # Cyrillic small letter TE $in =~ s/\x{0443}/u/g; # Cyrillic small letter U $in =~ s/\x{0444}/f/g; # Cyrillic small letter EF $in =~ s/\x{0445}/kh/g; # Cyrillic small letter HA $in =~ s/\x{0446}/ts/g; # Cyrillic small letter TSE $in =~ s/\x{0447}/ch/g; # Cyrillic small letter CHE $in =~ s/\x{0448}/sh/g; # Cyrillic small letter SHA $in =~ s/\x{0449}/shch/g; # Cyrillic small letter SHCHA $in =~ s/\x{044A}//g; # Cyrillic small letter hard sign $in =~ s/\x{044B}/y/g; # Cyrillic small letter YERU $in =~ s/\x{044C}//g; # Cyrillic small letter soft sign $in =~ s/\x{044D}/e/g; # Cyrillic small letter E $in =~ s/\x{044E}/yu/g; # Cyrillic small letter YU $in =~ s/\x{044F}/ya/g; # Cyrillic small letter YA $in =~ s/\x{0451}/yo/g; # Cyrillic small letter IO $in =~ s/\x{0454}/ie/g; # Cyrillic small letter Ukrainian IE $in =~ s/\x{0456}/i/g; # Cyrillic small letter Ukrainian I $in =~ s/\x{0457}/yi/g; # Cyrillic small letter YI $in =~ s/\x{0491}/g/g; # Cyrillic small letter GHE with upturn $c = u(0xAD); $in =~ s/$c/-/g; # Soft hyphen $in =~ s/\x{2010}/-/g; # Hyphen $in =~ s/\x{2011}/-/g; # Non-breaking hyphen $in =~ s/\x{2012}/-/g; # Figure dash $in =~ s/\x{2013}/-/g; # En dash $in =~ s/\x{2014}/-/g; # Em dash $in =~ s/\x{2015}/-/g; # Horizontal bar $c = u(0xAB); $in =~ s/$c//g; # Left-pointing double angle quotation mark $c = u(0xBB); $in =~ s/$c//g; # Right-pointing double angle quotation mark $in =~ s/\x{2018}//g; # Left single quotation mark $in =~ s/\x{2019}//g; # Right single quotation mark $in =~ s/\x{201C}//g; # Left double quotation mark $in =~ s/\x{201D}//g; # Right double quotation mark $in =~ s/\x{2026}//g; # Horizontal ellipsis while ($in =~ /(\P{IsASCII})/g) { warn sprintf("$0: %s" . "String contains non-ASCII character: \"%s\" (U+%04X)\n", defined($fn) ? ($fn eq "-" ? ":$line: " : "$fn:$line: ") : "", $1, ord($1)); } $in =~ tr/-0-9a-z//dc; # Remove non-alphanumeric characters while ($in =~ /--/) { $in =~ s/--/-/g; # Remove sequences of dashes } return $in; } # Main program our $istty = (-t STDIN); STDOUT->autoflush(1); our $num = decode_utf8($ARGV[0]); # Assume command line args are in UTF8 our $artist = decode_utf8($ARGV[1]); our $album = decode_utf8($ARGV[2]); shift @ARGV; shift @ARGV; shift @ARGV; if (($num < 1) or ($num > 99)) { die "$0: Illegal number of tracks: $num\n"; } our $dirname = transform($artist, undef, undef) . "--" . transform($album, undef, undef); our $outfilename = "oggenc--" . $dirname; print "Artist: $artist\n"; print "Album: $album\n\n"; print "Enter $num titles, one per line:\n\n" if $istty; our $longest_title = 0; our $longest_fn = 0; our @titles = (); our @fns = (); for (my $i = 1; $i <= $num; $i++) { printf "%2d. ", $i; our $title = <>; if (! defined($title)) { die "$0: Insufficient titles (missing title $i)\n"; } chomp $title; $title =~ s/^\s*\d*\.?\s+//; # Remove leading spaces and numbers $title =~ s/\s+$//; # Remove trailing spaces print "$title\n" if ! $istty; our $fn = transform($title, $ARGV, $.); $titles[$i] = $title; $fns[$i] = $fn; if (length($title) > $longest_title) { $longest_title = length($title); } if (length($fn) > $longest_fn) { $longest_fn = length($fn); } } print "\nWriting output to $outfilename\n"; open(OUTFILE, ">:encoding(UTF-8)", $outfilename) or die "$0: Could not open $outfilename: $!\n"; print OUTFILE "#!/bin/bash\n\n"; print OUTFILE "# Convert audio disc tracks into Ogg format files\n\n"; print OUTFILE "# Album: $album\n"; print OUTFILE "# Artist: $artist\n\n"; print OUTFILE "export LANG=en_AU.UTF-8\n\n"; $longest_title += 5; $longest_fn += 5; for (my $i = 1; $i <= $num; $i++) { print OUTFILE "oggenc -q 5 -o "; printf OUTFILE "%02d", $i; printf OUTFILE "%-${longest_fn}s", "-$fns[$i].ogg"; print OUTFILE " -G Christian -l \"$album\" -N "; printf OUTFILE "%2d", $i; print OUTFILE " -a \"$artist\" "; printf OUTFILE "%-${longest_title}s", "-t \"$titles[$i]\""; printf OUTFILE " track%02d.cdda.wav\n", $i; } print OUTFILE "\nbeep -f 880 -l 500\n"; close(OUTFILE); chmod 0755, $outfilename; print "\nTo encode the Compact Disc, run:\n\n" . " mkdir /data/music/$dirname\n" . " cd /data/music/$dirname\n" . " cdparanoia -B; beep -f 880 -l 1000\n" . " " . getcwd() . "/$outfilename\n";