JAVA SOFTWARE FOR EQUIDISTANT LETTER SEQUENCE (ELS) SEARCHES

 

Bruce David Wilner
January 2002

"Eliyahu Rips" "Doron Witztum" "Yoav Rosenberg" "Michael Drosnin" "Bible code" "Torah code"

As an accompaniment to my refutation of the Bible Codes phenomenon, I built some quick Java software to exercise the algorithm and gain a deeper appreciation of how it works. The following Java code implements a class, els, that marshals a text string into an internal data structure and then permits one to hunt for a given sequence within the string by calling the els.findSeq() method. The code is presented first, followed by a sample output run. Certain typographical conventions apply to enhance the readability of the software: calls to newly defined methods, as well as comment blocks that introduce methods, appear in red font; class and method declarations in purple; in-line comments in blue; and I/O function calls in green.

 

// --------------------------------------------- //
// manipulation of equidistant letter sequences  //
// (ELS), a la Drosnin's "The Bible Code" (1997) //
//                                               //
//		Bruce David Wilner               //
//		    April 2000                   //
// --------------------------------------------- //


import	java.io.*;


class els {
	String	text;		// string to be searched for pattern      //
	int	textLen;	// its length, calculated once and cached //


	// -------------------------------------------------- //
	// Method:	els.els() - constructor               //
	//                                                    //
	// Purpose:	Initialize an ELS object by setting   //
	//		the searchable string and its length; //
	//		prepare to match case-insensitively   //
	// -------------------------------------------------- //
	public els( String  seq ) {
		text    = stripPunct( seq );
		textLen = text.length();
	}


	// ------------------------------------------------- //
	// Method:	els.findSeq()                        //
	//                                                   //
	// Purpose:	Search for occurrence of subject at  //
	//		different offsets (starting at head  //
	//		of text offset 0) and skip counts    //
	//		(# chars btw. successive pat chars)  //
	// ------------------------------------------------- //
	public void  findSeq( String  subj ) {
		subj = stripPunct( subj );

		// ----------------------- //
		// validate search pattern //
		// ----------------------- //
		if( subj.length() == 0 ) {
			System.out.println( "ignoring zero-length pattern" );

			return;
		}
		int	subjLen   = subj.length();
		int	maxOffset = textLen - subjLen + 1;
		int	numMatches = 0;

		for( int  offset = 0; offset < maxOffset; offset++ ) {
			// --------------------------------------------- //
			// max. skip count is dictated by length of text //
			// and length of subject; don't bother with skip //
			// counts that would exceed the text boundaries  //
			// --------------------------------------------- //
			int	maxSkipCount = ( textLen - offset ) / subjLen;

			for( int  skip = 0; skip <= maxSkipCount; skip++ ) {
				int	i;

				for( i = 0; i < subjLen; i++ ) {
					int	textLoc = i * skip + offset;

					if(    subj.charAt( i )
					    != text.charAt( textLoc ) ) {
						break;
					}
				}

				if( i != subjLen ) {
					continue;
				}

				numMatches++;

				System.out.println(    "match at offset "
						     + offset
						     + ", skip "
						     + skip
						     + ":"
						  );

				showTextWindow( offset, skip, subjLen );

				// -------------------------------- //
				// avoid reporting K matches (fixed //
				// offset, K different skip values) //
				// for one-char search pattern that //
				// occurs only once                 //
				// -------------------------------- //
				if( subjLen == 1 ) {
					break;
				}
			}
		}

		String	predicate = ( numMatches == 1 ) ? " match found"
							: " matches found";

		System.out.println( "" + numMatches + predicate );
	}


	// ---------------------------------------------------- //
	// Method:	els.showTextWindow()                    //
	//                                                      //
	// Purpose:	Display matched text through "window"   //
	//		that shows multiple rows separated by   //
	//		given skip count and highlights matched //
	//		chars between pairs of '|'              //
	// ---------------------------------------------------- //
	public void  showTextWindow( int  offset,
				     int  skip,
				     int  subjLen ) {
		// -------------------------------------------- //
		// display window around match, max width 15    //
		// (position - 7 to pos. + 7), with intervening //
		// spaces, all indented by ten spaces; ensure   //
		// non-zero-width window even if skip distance  //
		// is one (viz., consecutive characters)        //
		// -------------------------------------------- //
		final	int	MAX_SIDEBAND = 7;

			int	width = skip / 2;

		if( width > MAX_SIDEBAND ) {
			width = MAX_SIDEBAND;
		} else if( width == 0 ) {
			width++;
		}

		for( int  row = 0; row < subjLen; row++ ) {
			System.out.print( "          " );

			for( int  col = - width; col <= width; col++ ) {
				int	textLoc =   offset + col
						  + row * skip;

				// ------------------------------------ //
				// print blanks if there is no text     //
				// at that location, and issue '|' bars //
				// around the matched sample of text    //
				// ------------------------------------ //
				if( textLoc < 0 || textLoc >= textLen ) {
					System.out.print( "    " );
				} else {
					String	sep;

					if( col == -1 || col == 0 ) {
						sep = " | ";
					} else {
						sep = "   ";
					}

					System.out.print(
						  text.charAt( textLoc ) + sep
							);
				}
			}

			// ----------------------------------------- //
			// space after each "windowed" row generated //
			// ----------------------------------------- //
			System.out.println();
		}

		// ---------------------------------------- //
		// generate blank line between successive   //
		// output reports (e.g., successive matches //
		// at specific offset and skip count)       //
		// ---------------------------------------- //
		System.out.println();
	}


	// ------------------------------------------------------- //
	// Method:	els.stripPunct()                           //
	//                                                         //
	// Purpose:	Strip all characters other than alphabetic //
	//		text from string, and map all chars to     //
	//		upper case for facility of comparison      //
	// ------------------------------------------------------- //
	public String  stripPunct( String  text ) {
		char[]	chars = text.toCharArray();
		int	len   = text.length();

		char[]	newChars = new char[ len ];
		int	newLen   = 0;

		for( int  i = 0; i < len; i++ ) {
			if( Character.isJavaIdentifierStart( chars[ i ] ) ) {
				newChars[ newLen++ ] =
					Character.toUpperCase( chars[ i ] );
			}
		}

		return new String( newChars, 0, newLen );
	}


	// -------------------------------------------------- //
	// Method:	els.readScanPattern()                 //
	//                                                    //
	// Purpose:	Prompt for scan pattern at keyboard,  //
	//		and collect '\n'-terminated string;   //
	//		it is surprising that Java has no     //
	//		direct facility for this, parallel to //
	//		System.out.println( String )          //
	// -------------------------------------------------- //
	public static String  readScanPattern() {
		final	int		LINE_LEN = 80;

			StringBuffer	inBuf = new StringBuffer( LINE_LEN );

		try {
			int	ch;

			while( ( ch = System.in.read() ) != -1 ) {
				char	chVal = (char) ch;

				if( chVal == '\n' ) {
					break;
				} else {
					inBuf.append( chVal );
				}
			}

		} catch( IOException  ioe ) {
			// -------------------------------------- //
			// what can one do if keyboard I/O fails? //
			// -------------------------------------- //
		}

		return inBuf.toString();
	}


	// ------------------------------------------------ //
	// Method:	els.readFileContents()              //
	//                                                  //
	// Purpose:	Read a file and return its contents //
	//		as a single, self-contained String  //
	// ------------------------------------------------ //
	public static String  readFileContents( String  path ) {
		try {
			if( ! new File( path ).exists() ) {
				return null;
			}

			FileInputStream  fis = new FileInputStream( path );

			if( fis.available() > 0 ) {
				byte[]  txt = new byte[ fis.available() ];

				fis.read( txt );
				fis.close();

				return new String( txt );
			} else {
				fis.close();

				return "";		
			}

		} catch( IOException  e ) {
			// ------------- //
			// nothing to do //
			// ------------- //
		}

		return null;
	}


	// -------------------------------------------------------- //
	// Method:	els.main()                                  //
	//                                                          //
	// Purpose:	Conduct ELS searches upon the text fragment //
	//		stored in a file specified on command line  //
	//                                                          //
	// Warning:	For such a short text, it is recommended    //
	//		that simple patterns, such as ROE and TOES, //
	//		be searched for                             //
	// -------------------------------------------------------- //
	public static void  main( String[]  argv ) {
		if( argv.length != 1 ) {
			System.out.println( "Usage: java els inputfile" );

			System.exit( 1 );
		}

		String	quotation = readFileContents( argv[ 0 ] );

		if( quotation == null ) {
			System.out.println( "cannot open file " + argv[ 0 ] );

			System.exit( 2 );
		}

		els	els = new els( quotation );

		if( els.text.length() == 0 ) {
			System.out.println( "no searchable text in file" );

			System.exit( 3 );
		}

		System.out.println( els.text + " = " + els.textLen + " chars" );

		while( true ) {
			System.out.println( "" );
	    		System.out.print( "Enter scan pattern, ^ to exit: " );
 
			String	pattern = readScanPattern();

			// ------------------------------------------ //
			// rudimentary: accept circumflex followed by //
			// '\n' as sentinel to terminate input        //
			// ------------------------------------------ //
			if( pattern.charAt( 0 ) == '^' ) {
				break;
			}

			System.out.println();

			els.findSeq( pattern );
		}

		System.exit( 0 );
	}
}


//
Here is an example of the output of the program, where the text to be searched is a well-known quotation from William Shakespeare's Julius Caesar:

c:\> type shakquot.txt

"Friends, Romans, countrymen
lend me your ears;
I come not to bury Caesar, but to praise him.
The evil that men do lives after them;
the good is oft interr'd in their bones.
But Caesar is an honorable man,
so are they all, all honorable men.
When that the poor hath cried, Caesar hath wept:
ambition should be made of sterner stuff."

—William Shakespeare, Julius Caesar

c:\>

and the patterns sought are ROE, Q, TOES, and SHAKE. (I tried to keep the patterns short inasmuch as the likelihood of finding eschatologically interesting patterns such as JESUS IS COMING in this snippet of Shakespeare is presumbly quite low.) The program displays the fixed text string before soliciting input patterns and reporting on the results of ELS searches upon those patterns:

c:\> java els shakquot.txt

FRIENDS ROMANS COUNTRYMEN LEND ME YOUR EARS I COME NOT TO BURY CAESAR BUT TO PRAISE HIM THE EVIL THAT MEN DO LIVES AFTER THEM THE GOOD IS OFT INTERRD IN THEIR BONES BUT CAESAR IS AN HONORABLE MAN SO ARE THEY ALL ALL HONORABLE MEN WHEN THAT THE POOR HATH CRIED CAESAR HATH WEPT AMBITION SHOULD BE MADE OF STERNER STUFF WILLIAM SHAKESPEARE JULIUS CAESAR = 286 chars

Enter scan pattern, ^ to exit: roe

match at offset 7, skip 7:
          N   D   S | R | O   M   A   
          N   S   C | O | U   N   T   
          R   Y   M | E | N   L   E   

match at offset 7, skip 23:
          F   R   I   E   N   D   S | R | O   M   A   N   S   C   O   
          L   E   N   D   M   E   Y | O | U   R   E   A   R   S   I   
          O   B   U   R   Y   C   A | E | S   A   R   B   U   T   T   

match at offset 18, skip 28:
          N   S   C   O   U   N   T | R | Y   M   E   N   L   E   N   
          O   M   E   N   O   T   T | O | B   U   R   Y   C   A   E   
          E   H   I   M   T   H   E | E | V   I   L   T   H   A   T   

match at offset 18, skip 88:
          N   S   C   O   U   N   T | R | Y   M   E   N   L   E   N   
          E   M   T   H   E   G   O | O | D   I   S   O   F   T   I   
          N   T   H   A   T   T   H | E | P   O   O   R   H   A   T   

match at offset 32, skip 29:
          N   D   M   E   Y   O   U | R | E   A   R   S   I   C   O   
          S   A   R   B   U   T   T | O | P   R   A   I   S   E   H   
          E   N   D   O   L   I   V | E | S   A   F   T   E   R   T   

match at offset 32, skip 74:
          N   D   M   E   Y   O   U | R | E   A   R   S   I   C   O   
          E   M   T   H   E   G   O | O | D   I   S   O   F   T   I   
          O   N   O   R   A   B   L | E | M   E   N   W   H   E   N   

match at offset 35, skip 51:
          E   Y   O   U   R   E   A | R | S   I   C   O   M   E   N   
          H   A   T   M   E   N   D | O | L   I   V   E   S   A   F   
          E   S   B   U   T   C   A | E | S   A   R   I   S   A   N   

match at offset 49, skip 12:
          O   T   T   O   B   U | R | Y   C   A   E   S   A   
          A   R   B   U   T   T | O | P   R   A   I   S   E   
          E   H   I   M   T   H | E | E   V   I   L   T   H   

match at offset 49, skip 56:
          N   O   T   T   O   B   U | R | Y   C   A   E   S   A   R   
          H   E   M   T   H   E   G | O | O   D   I   S   O   F   T   
          M   A   N   S   O   A   R | E | T   H   E   Y   A   L   L   

match at offset 56, skip 30:
          R   Y   C   A   E   S   A | R | B   U   T   T   O   P   R   
          H   A   T   M   E   N   D | O | L   I   V   E   S   A   F   
          S   O   F   T   I   N   T | E | R   R   D   I   N   T   H   

match at offset 56, skip 54:
          R   Y   C   A   E   S   A | R | B   U   T   T   O   P   R   
          E   G   O   O   D   I   S | O | F   T   I   N   T   E   R   
          S   O   A   R   E   T   H | E | Y   A   L   L   A   L   L   

match at offset 96, skip 10:
          S   A   F   T   E | R | T   H   E   M   T   
          T   H   E   G   O | O | D   I   S   O   F   
          F   T   I   N   T | E | R   R   D   I   N   

match at offset 96, skip 14:
          V   E   S   A   F   T   E | R | T   H   E   M   T   H   E   
          E   G   O   O   D   I   S | O | F   T   I   N   T   E   R   
          R   R   D   I   N   T   H | E | I   R   B   O   N   E   S   

match at offset 126, skip 2:
          I | R | B   
          B | O | N   
          N | E | S   

match at offset 140, skip 33:
          U   T   C   A   E   S   A | R | I   S   A   N   H   O   N   
          A   L   L   A   L   L   H | O | N   O   R   A   B   L   E   
          H   A   T   H   C   R   I | E | D   C   A   E   S   A   R   

match at offset 140, skip 35:
          U   T   C   A   E   S   A | R | I   S   A   N   H   O   N   
          L   A   L   L   H   O   N | O | R   A   B   L   E   M   E   
          C   R   I   E   D   C   A | E | S   A   R   H   A   T   H   

match at offset 160, skip 13:
          M   A   N   S   O   A | R | E   T   H   E   Y   A   
          L   L   A   L   L   H | O | N   O   R   A   B   L   
          E   M   E   N   W   H | E | N   T   H   A   T   T   

17 matches found

Enter scan pattern, ^ to exit: q

0 matches found

Enter scan pattern, ^ to exit: toes

match at offset 122, skip 36:
          T   E   R   R   D   I   N | T | H   E   I   R   B   O   N   
          B   L   E   M   A   N   S | O | A   R   E   T   H   E   Y   
          N   T   H   A   T   T   H | E | P   O   O   R   H   A   T   
          M   B   I   T   I   O   N | S | H   O   U   L   D   B   E   

1 match found

Enter scan pattern, ^ to exit: shake

match at offset 263, skip 1:
          M | S | H   
          S | H | A   
          H | A | K   
          A | K | E   
          K | E | S   

1 match found

Enter scan pattern, ^ to exit: ^
c:\>