package diskindex;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import org.apache.log4j.Logger;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;

public class BuildAccession {

	private static String source = "/home/cliffp/pages/filenames.txt";
	private static String quantumRoot = "Quantum CY nonprop.001/Partition 1 [2014MB]/WINDOWS 95 [FAT32]/[root]";
	private static Logger log;

	public static void main(String[] argv) {
		log = Logger.getLogger("BCB");

		File odir = new File("/media/virtuous/accout/T1");

		if (!odir.mkdirs()) {
			log.warn("Oh dear, couldn't make output dir!");
			// not that we care as we just overwrite everything! :-)
		}

		try {
			FileInputStream fstream = new FileInputStream(source);
			DataInputStream in = new DataInputStream(fstream);
			BufferedReader br = new BufferedReader(new InputStreamReader(in));
			String filename;
			String norm_ext;

			while ((filename = br.readLine()) != null)   {
				filename = filename.replaceAll("\\\\", "/"); // bonkers! :-) (complier sees "\\\\" and makes it "\\", regexp gets "\\" and sees it "\"!)
				filename = filename.replace(quantumRoot, "");

				log.debug("Working on: " + filename);

				// a file object for where we're at now - useful for getPath, getName etc.
				File current = new File(filename);
				File demoFile = null; // will be the example file grabbed!

				norm_ext = grabExt(filename);

				if (norm_ext.matches("fa_none")) {
					demoFile = grabRandomFile("");
				} else {
					// get file from index
					String query = "q=normext:" + solrEncode(norm_ext);
					SolrSearcher s = new SolrSearcher();
					demoFile = grabDemoFile(s.search(query), norm_ext);
				}

				// create filepath
				new File(odir.getAbsolutePath() + current.getParent()).mkdirs();

				String source = demoFile.getAbsolutePath();
				String target = odir.getAbsolutePath() + current.getAbsolutePath();

				// doing copy
				log.info("Copy from " + source + " to " + target);
				copy(source, target);
			}
			in.close();
		}catch (Exception e){
			log.fatal("Error: " + e.getMessage());
		}
	}

	// get the copy working and see what happens! :-)
	public static void copy(String source, String target) throws IOException {
		// Create channel on the source
		FileChannel srcChannel = new FileInputStream(source).getChannel();

		// Create channel on the destination
		FileChannel dstChannel = new FileOutputStream(target).getChannel();

		// Copy file contents from source to destination
		dstChannel.transferFrom(srcChannel, 0, srcChannel.size());

		// Close the channels
		srcChannel.close();
		dstChannel.close();
	}

	private static String grabExt(String in) {
		String ext = "fa_none";
		// get ext

		if ((in.lastIndexOf('.')+1 == in.length())) {
			return "fa_none";
		}

		if ((in.lastIndexOf('.') > -1)) {
			ext = in.substring(in.lastIndexOf('.')+1, in.length());
		} 

		if ((ext.indexOf('/') > -1)) {
			return "fa_none";
		}

		// add a lowercase one too - I expect we could do that in solr!
		return ext.toLowerCase();
	}

	private static File grabDemoFile(String results, String ext) {
		// could get ext from the result set too, but why bother when we have it already?! :-)
		// parse XML result set - I imagine there are more elegant ways to do this! :-)
		SAXBuilder builder = new SAXBuilder();
		Random r = new Random();

		try {
			Document resultSet = builder.build(new ByteArrayInputStream(results.getBytes("UTF-8")));

			XPath x = XPath.newInstance("/response/result");
			Element e = (Element) x.selectSingleNode(resultSet);
			Attribute numFound = e.getAttribute("numFound");
			int nf = numFound.getIntValue();

			if (nf == 0) {
				// no hits
				log.warn("ext. " + ext + " not found");
				return grabRandomFile(ext);
			}

			if (nf > 10) {
				nf = 10;
			}

			x = XPath.newInstance("//*[@name='fullpath']");
			List hits = (List) x.selectNodes(resultSet);

			Element hit = (Element) hits.get(r.nextInt(nf));	      

			return new File(hit.getText());
		
		} catch (JDOMException e) {
			log.debug(results);
			log.debug("not well-formed");
			log.debug(e.getMessage());
		} catch (IOException e) { 
			log.debug("IO error: " + e.getMessage());
		}  

		log.fatal("Something bad happened...");
		System.exit(0);

		return new File("");
	}

	private static File grabRandomFile(String ext) throws IOException {
		List<String> l = new ArrayList<String>(Arrays.asList("/media/virtuous/accout/default/random_1", 
				"/media/virtuous/accout/default/random_2",
				"/media/virtuous/accout/default/random_3",
				"/media/virtuous/accout/default/random_4",
		"/media/virtuous/accout/default/random_5"));
		if (ext.length() > 0) {
			ext = "." + ext;
		}

		File newExt = new File(l.get(new Random().nextInt(l.size())) + ext);
		if (!newExt.exists()) {
			copy(l.get(new Random().nextInt(l.size())), newExt.getAbsolutePath());
		}

		return newExt;
	}

	private static String solrEncode(String in) {
		in = in.replaceAll("-", "\\\\-");
		in = in.replaceAll("!", "\\\\!");
		in = in.replaceAll("~", "\\\\~");

		in = URLUTF8Encoder.encode(in);

		return in;
	}

	/**
	 * Provides a method to encode any string into a URL-safe
	 * form.
	 * Non-ASCII characters are first encoded as sequences of
	 * two or three bytes, using the UTF-8 algorithm, before being
	 * encoded as %HH escapes.
	 *
	 * Created: 17 April 1997
	 * Author: Bert Bos <bert@w3.org>
	 *
	 * URLUTF8Encoder: http://www.w3.org/International/URLUTF8Encoder.java
	 *
	 * Copyright © 1997 World Wide Web Consortium, (Massachusetts
	 * Institute of Technology, European Research Consortium for
	 * Informatics and Mathematics, Keio University). All Rights Reserved. 
	 * This work is distributed under the W3C® Software License [1] in the
	 * hope that it will be useful, but WITHOUT ANY WARRANTY; without even
	 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
	 * PURPOSE.
	 *
	 * [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
	 */

	public static class URLUTF8Encoder {

		final static String[] hex = {
			"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
			"%08", "%09", "%0a", "%0b", "%0c", "%0d", "%0e", "%0f",
			"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
			"%18", "%19", "%1a", "%1b", "%1c", "%1d", "%1e", "%1f",
			"%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
			"%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e", "%2f",
			"%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
			"%38", "%39", "%3a", "%3b", "%3c", "%3d", "%3e", "%3f",
			"%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
			"%48", "%49", "%4a", "%4b", "%4c", "%4d", "%4e", "%4f",
			"%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
			"%58", "%59", "%5a", "%5b", "%5c", "%5d", "%5e", "%5f",
			"%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
			"%68", "%69", "%6a", "%6b", "%6c", "%6d", "%6e", "%6f",
			"%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
			"%78", "%79", "%7a", "%7b", "%7c", "%7d", "%7e", "%7f",
			"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
			"%88", "%89", "%8a", "%8b", "%8c", "%8d", "%8e", "%8f",
			"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
			"%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e", "%9f",
			"%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7",
			"%a8", "%a9", "%aa", "%ab", "%ac", "%ad", "%ae", "%af",
			"%b0", "%b1", "%b2", "%b3", "%b4", "%b5", "%b6", "%b7",
			"%b8", "%b9", "%ba", "%bb", "%bc", "%bd", "%be", "%bf",
			"%c0", "%c1", "%c2", "%c3", "%c4", "%c5", "%c6", "%c7",
			"%c8", "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf",
			"%d0", "%d1", "%d2", "%d3", "%d4", "%d5", "%d6", "%d7",
			"%d8", "%d9", "%da", "%db", "%dc", "%dd", "%de", "%df",
			"%e0", "%e1", "%e2", "%e3", "%e4", "%e5", "%e6", "%e7",
			"%e8", "%e9", "%ea", "%eb", "%ec", "%ed", "%ee", "%ef",
			"%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
			"%f8", "%f9", "%fa", "%fb", "%fc", "%fd", "%fe", "%ff"
		};

		/**
		 * Encode a string to the "x-www-form-urlencoded" form, enhanced
		 * with the UTF-8-in-URL proposal. This is what happens:
		 *
		 * <ul>
		 * <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z',
		 *        and '0' through '9' remain the same.
		 *
		 * <li><p>The unreserved characters - _ . ! ~ * ' ( ) remain the same.
		 *
		 * <li><p>The space character ' ' is converted into a plus sign '+'.
		 *
		 * <li><p>All other ASCII characters are converted into the
		 *        3-character string "%xy", where xy is
		 *        the two-digit hexadecimal representation of the character
		 *        code
		 *
		 * <li><p>All non-ASCII characters are encoded in two steps: first
		 *        to a sequence of 2 or 3 bytes, using the UTF-8 algorithm;
		 *        secondly each of these bytes is encoded as "%xx".
		 * </ul>
		 *
		 * @param s The string to be encoded
		 * @return The encoded string
		 */
		public static String encode(String s) {
			StringBuffer sbuf = new StringBuffer();
			int len = s.length();
			for (int i = 0; i < len; i++) {
				int ch = s.charAt(i);
				if ('A' <= ch && ch <= 'Z') {		// 'A'..'Z'
					sbuf.append((char)ch);
				} else if ('a' <= ch && ch <= 'z') {	// 'a'..'z'
					sbuf.append((char)ch);
				} else if ('0' <= ch && ch <= '9') {	// '0'..'9'
					sbuf.append((char)ch);
				} else if (ch == ' ') {			// space
					sbuf.append('+');
				} else if (ch == '-' || ch == '_'		// unreserved
					|| ch == '.' || ch == '!'
						|| ch == '~' || ch == '*'
							|| ch == '\'' || ch == '('
								|| ch == ')') {
					sbuf.append((char)ch);
				} else if (ch <= 0x007f) {		// other ASCII
					sbuf.append(hex[ch]);
				} else if (ch <= 0x07FF) {		// non-ASCII <= 0x7FF
					sbuf.append(hex[0xc0 | (ch >> 6)]);
					sbuf.append(hex[0x80 | (ch & 0x3F)]);
				} else {					// 0x7FF < ch <= 0xFFFF
					sbuf.append(hex[0xe0 | (ch >> 12)]);
					sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]);
					sbuf.append(hex[0x80 | (ch & 0x3F)]);
				}
			}
			return sbuf.toString();
		}

	}
}
