#!/usr/bin/perl -w # generate_random_utf8_input.pl - Don Yang (uguu.org) # # Similar to generate_random_input.pl, but generates input text with # mixed ASCII and UTF-8 sequences. use strict; use constant MAX_TIMESTAMPS => 10000; use constant MAX_EVENTS_PER_TIMESTAMP => 3; # Fix the random seed so that we can reproduce the input in case of a crash srand 42; # Return a random string sub RandomString() { my $s = ""; my $length = int(rand(10)) + 1; for(my $k = 0; $k < $length; $k++) { my $char_length = int(rand(4)) + 1; if( $char_length == 1 ) { # 1 byte, use Latin capitals $s .= chr(int(rand(26)) + ord('A')); } elsif( $char_length == 2 ) { # 2 bytes, use Cyrillic capitals $s .= chr(0xd0) . chr(0x80 + int(rand(48))); } elsif( $char_length == 3 ) { # 3 bytes, use Hiragana $s .= chr(0xe3) . chr(0x81) . chr(0x81 + int(rand(47))); } else { # 4 bytes, use Linear B $s .= chr(0xf0) . chr(0x90) . chr(0x80) . chr(0x80 + int(rand(48))); } } return $s; } my @lines = (""); my $timestamp = 0; my $frame = 0; for(my $i = 0; $i < MAX_TIMESTAMPS; $i++) { # Output cursor event my $row = int(rand($#lines + 2)) + 1; my $column = int(rand(8)) + 1; print "Y${row}X${column}F${frame}T$timestamp\n"; my $events = int(rand(MAX_EVENTS_PER_TIMESTAMP)); for(my $j = 0; $j < $events; $j++) { my $type = int(rand(5)); $row = int(rand($#lines + 10)); if( $type == 0 ) { # Append $lines[$row] = "" unless defined $lines[$row]; $lines[$row] .= RandomString(); print "L", $row + 1, "E", (scalar @lines), "=", $lines[$row], "\n"; next; } if( $type == 1 && $row <= $#lines ) { # Delete splice @lines, $row, 1; for(my $k = 0; $k <= $#lines; $k++) { my $text = defined($lines[$k]) ? $lines[$k] : ""; print "L", $k + 1, "E", (scalar @lines), "=$text\n"; } next; } # Replace $lines[$row] = RandomString(); print "L", $row + 1, "E", (scalar @lines), "=", $lines[$row], "\n"; } # Update frame, occasionally update timestamp $frame++; $timestamp++ if rand > 0.5; }