#!/usr/bin/env python
#
# v2vnode implementation in python
# 2nd version after the bash/perl/python/c implementation
#
# all in one package: 
#	- generates ed2k links, magent links, sha1 hash, bittorrent file
#	- updates the .info file
#	- update/generates the rdf v2vnode overview: 
#			http://localhost/~v2vadmin/v2v.rdf, http://localhost/~v2vadmin/node_info/
#
# (c)2003 <j@thing.net>
#
# TODO:
#	- integrate publish_torrent (done)
#	- minor thing, reduce mempory footprint for gen_ed2k. 
#		(right now it read howl chunks into the memory ~ 12MB)

from sys import *
from os import listdir,unlink,rename,walk,stat,rmdir
from os.path import abspath,exists,getsize,join,isdir,basename,split
from xml.dom.minidom import parse, parseString,getDOMImplementation
import urllib
import re
import time
import sha
# Import smtplib for the actual sending emails
import smtplib
from email import Encoders
from email.Message import Message
from email.MIMEBase import MIMEBase
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText

# needed for ed2k links, if you not have it run requre.py to get the link
import Crypto.Hash.MD4
# local imports
import base32


# BitTorrent imports using the local BitTorrent copy
(bin_path,file) = split(abspath(argv[0]))
path.append(join(bin_path,'../BitTorrent'))
from BitTorrent.bencode import bdecode,bencode
from btmakemetafile	import make_meta_file


# some local variables

# the main folder. a bit like prefix
v2v_dir="/home/v2vadmin/"
v2v_dir="/video/v2vadmin/"
#v2v_dir="/home/j/Projects/v2v/testbed/"

# the can be set independent of v2v_dir in needed
incoming_dir=join(v2v_dir,"incoming")
share_dir=join(v2v_dir,"share")

#url of the bittorrent tracker
tracker_url="http://v2v.cc:6969/announce"
# ~10GB in Bytes if 0 or <0 it will not delete enything
max_shared_space=10737418240  

rdf_file=join(v2v_dir,"htdocs/v2v.rdf")
rdf_template=join(v2v_dir,"var/v2vdata/rdf.xml")
about_url="http://v2v.cc/link?%s"
# node_email is the email of this node
node_email="j@thing.net"
distribution_email="v2v-server@coyote1.kein.org"
distribution_email="j@thing.net"

# that all there is to configure



# check for new clips in incoming:
def check_incoming():
	global share_dir, incoming_dir,rdf_file
	
	for folder in listdir(incoming_dir):
		if isdir(join(incoming_dir,folder)):
			clipname=""
			clipinfo=""
			for file in  listdir(join(incoming_dir,folder)):
				clipfolder=join(incoming_dir,folder)
				if re.search(".info",file):
				# looks for .info file
					clipinfo=join(clipfolder,file)
				if re.search(".avi",file) or re.search(".mov",file) or re.search(".ogg",file) or re.search(".avi",file):
				# looks for video clips
					clipname=join(clipfolder,file)

				if clipname and clipinfo:
					if not exists(join(share_dir,basename(clipfolder))):
					#now its time to update the .info file, move the file to share_dir and generate the torrent file.
						# free some space if needed
						check_and_free_diskspace(du(clipfolder))
						update_info(clipfolder,clipname,clipinfo)
						rename(clipfolder,join(share_dir,basename(clipfolder)))
						gen_torrent_file(join(share_dir,basename(clipfolder)))
						#update the rdf.
						gen_rdf(rdf_file) 
						# send the mail to the listdir
						publish_torrent("%s.torrent" % basename(clipfolder))
					else:
					#renames the folder in incoming to one with time postfix to not overwrite something in share_dir
						rename(clipfolder,clipfolder.replace(basename(clipfolder),"%s_%s" % (basename(clipfolder),time.time())))
					
#generate ed2k, magent link + torrent url + generic http link
#put them in the .info file
#how to open and read .info file?
#use rdf parser? or is this good for now?
#update of filesize is needed.
#update length as soon as we switch to ogg files.
def update_info(clipfolder,clipname,info_file):
	
	sha1base32=gen_sha1base32(clipname)
	http_link="http://v2v.cc/link?%s" % sha1base32
	torrent="%s.torrent" % basename(clipfolder)
	all_v2v_links="%s</v2v:link>\n<v2v:link>%s</v2v:link>\n<v2v:link>%s" % (gen_ed2k(clipname),gen_magnet(clipname,sha1base32).replace("&","&amp;"),gen_torrent_link(torrent))

	info_f = open(info_file,'r');
	info = info_f.readlines();	
	info_f.close();
	info_f = open(info_file,'r+');
	for line in info:
		line = line.replace("_link_",all_v2v_links)
		line = line.replace('<link></link>',"<link>%s</link>" % http_link)
		line = line.replace('about=""',"about=\"%s\"" % http_link)
		# should not be empty, so put v2v link here if so
		line = line.replace('rdf:resource=""',"rdf:resource=\"%s\"" % http_link)
		info_f.write(line);
	info_f.close
	


# generates the torrent file
# calles the function make_meta_file from btmakemetafile
def gen_torrent_file(clipfolder):
	global tracker_url
	# had to change make_meta_file
	#	target=None -> target=''
	try:
		make_meta_file(clipfolder, tracker_url)
	except ValueError, e:
		print 'error: ' + str(e)
		print 'run with no args for parameter explanations'
	
# for now torrent links are placed on v2v.cc
# could change to the server the file was uploaded.
def gen_torrent_link(torrent):
	link="http://v2v.cc/torrents/%s" % torrent
	return link

#generates magnet:?... link for filename
def gen_magnet(filename,sha1base32):
	link="magnet:?%s" % urllib.urlencode({'dn':basename(filename),'xt':"urn:sha1:%s" % sha1base32})
	return link

# generates ed2klink for filename
# based on donkeyhash.pl translated to python
# depends on Crypto.Hash function needs to be installed
# source: http://www.amk.ca/python/code/crypto.html
# debian: apt-get install python-crypto
def gen_ed2k(filename):
	donkeychunk = 9728000
	digests = []
	filesize=getsize(filename)
	file=open(filename)
	if filesize < donkeychunk:
		md4 = Crypto.Hash.MD4.new()
		buffer=file.read(4096)
		while buffer:
			md4.update(buffer)
			buffer=file.read(4096)
		file.close()
		donkeyhash=md4.hexdigest()
	else:
		if (filesize % donkeychunk == 0):
			md4 = Crypto.Hash.MD4.new()
			md4.update('')
			digests.append(md4.digest())
		buffer = file.read(donkeychunk)
		while buffer:
			md4 = Crypto.Hash.MD4.new()
			md4.update(buffer)
			digests.append(md4.digest())
			buffer = file.read(donkeychunk)
		md4 = Crypto.Hash.MD4.new()
		for d in digests:
			md4.update(d)
		donkeyhash=md4.hexdigest()
	return"ed2k://|file|%s|%s|%s|"  % (basename(filename),filesize,donkeyhash)

# genereate the sha1hash in base32 encoding
# used for magnet links and as uid for the clip
def gen_sha1base32(filename):
		sha1 = sha.new()
		file=open(filename)
		buffer=file.read(4096)
		while buffer:
			sha1.update(buffer)
			buffer=file.read(4096)
		file.close()
		return base32.b2a(sha1.digest())

def get_metainfo(metainfo_name):
	metainfo_file = open(metainfo_name, 'rb')
	metainfo = bdecode(metainfo_file.read())
	announce = metainfo['announce']
	info = metainfo['info']
	info_hash = sha.sha(bencode(info))

	metainfo_text = 'metainfo file.: %s' % basename(metainfo_name) +"\n"
	metainfo_text +=  'info hash.....: %s' % info_hash.hexdigest() +"\n"
	piece_length = info['piece length']
	if info.has_key('length'):
		# let's assume we just have a file
		metainfo_text +=  'file name.....: %s' % info['name']+"\n"
		file_length = info['length']
		name ='file size.....:'
	else:
		# let's assume we have a directory structure
		metainfo_text +=  'directory name: %s' % info['name']+"\n"
		metainfo_text +=  'files.........: '+"\n"
		file_length = 0;
		for file in info['files']:
			path = ''
			for item in file['path']:
				if (path != ''):
					path = path + "/"
				path = path + item
			metainfo_text +=  '   %s (%d)' % (path, file['length'])+"\n"
			file_length += file['length']
			name ='archive size..:'
	piece_number, last_piece_length = divmod(file_length, piece_length)
	metainfo_text +=  '%s %i (%i * %i + %i)' \
			% (name,file_length, piece_number, piece_length, last_piece_length)+"\n"
	metainfo_text += 'announce url..: %s' % announce+"\n"
	
	return metainfo_text

# convert incomming mail to torrent files?
# solution right now seems to be ok. but how knows.
def mail2torrent():
	return "missing"


# right now that is sending and email to the v2vnode mailnglist
# with thee torrent file attached.
# might change to alternative syndication solution in the future.
def publish_torrent(torrent):
	send_torrent(torrent)
	
def send_torrent(torrent):
	global share_dir,distribution_email,node_email
	
	torrent_absoulte=join(share_dir,torrent)
	message=get_metainfo(torrent_absoulte)
	# Create the container (outer) email message.
	outer = MIMEMultipart()
	outer['Subject'] = 'new torrent: %s' % torrent
	# dont know how to set that right now.
	outer['From'] = node_email
	outer['To'] = distribution_email
	outer.preamble = message
	# Guarantees the message ends in a newline
	outer.epilogue = ''
	msg = MIMEText(message)
	outer.attach(msg)
	
	ctype = 'application/x-bittorrent'
	maintype, subtype = ctype.split('/', 1)
	fp = open(torrent_absoulte, 'rb')
	msg = MIMEBase(maintype, subtype)
	msg.set_payload(fp.read())
	fp.close()
	# Encode the payload using Base64
	Encoders.encode_base64(msg)
	# Set the filename parameter
	msg.add_header('Content-Disposition', 'attachment', filename=torrent)
	outer.attach(msg)

	# Send the email via our own SMTP server.
	s = smtplib.SMTP()
	s.connect()
	s.sendmail(node_email, distribution_email, outer.as_string())
	s.close()





# send out nightly status information for admins.
# just put in to not forget.
def nightly_status():
	return "missing"

def get_dc_date(filename):
	try:
		dom=parse(filename)
		item=dom.getElementsByTagName("item")[0]
		date=item.getElementsByTagName("dc:date")[0].childNodes[0].data
	except:
		return -1
	return w3cdtf2timestamp(date)

# we need some sort fo date conversation unixtimestamp <-> w3cdtf
# that could be more elegant
def timestamp2w3cdtf(unixtimestamp):
	return time.strftime("%Y-%m-%dT%H:%M+00:00", time.gmtime(unixtimestamp))

def w3cdtf2timestamp(date_str):
	pat = "(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?"
	m = re.search(pat,date_str)
	if m:
		year=int(m.group(1))
		month=int(m.group(2))
		day=int(m.group(3))
		hours=int(m.group(4))
		minutes=int(m.group(5))
		if(m.group(6)==None): seconds=0
		else: seconds=int(m.group(7))
		
		offset = 0
		if m.group(10) == 'Z':
			# zulu time, aka GMT
			tz_hour = 0
			tz_min = 0
		else:
			tz_mod=m.group(8)
			tz_hour=m.group(9)
			tz_min=m.group(10)			
			
			offset_secs = ((int(tz_hour)*60)+int(tz_min))*60
			# is timezone ahead of GMT?  then subtract offset
			#
			if tz_mod == '+':
				offset_secs = offset_secs * -1
			offset = offset_secs
			
				# calc epoch for current date assuming GMT
		timestring="%04i-%02i-%02i %02i:%02i:%02i" % (year, month, day, hours, minutes, seconds)
		
		epoch = time.mktime(time.strptime(timestring,'%Y-%m-%d %H:%M:%S'))
		epoch = epoch + offset
		return epoch
	else:
		return -1

def get_sha1(dom):
	for node in dom.getElementsByTagName("v2v:link"):
			if node.childNodes:
				m = re.search("(?<=sha1:)(................................)",urllib.unquote(node.childNodes[0].data))
				if m:
					sha1=m.group(0)
	return sha1

def get_clipname(dom):
	for node in dom.getElementsByTagName("v2v:link"):
			if node.childNodes:
				m = re.search("file\|(.*)\|.*\|.*\|",urllib.unquote(node.childNodes[0].data))
				if m:
					clipname=m.group(1)
					return clipname
	


def format_rdf_li(about):
	return parseString("<rdf:li rdf:resource=\"%s\" />" % about).firstChild

def get_info_files():
	global share_dir
	info_files = {}
	for root,dirs,files in walk(share_dir):
		for file in files:
			if re.search(".info",file):
				info_file=join(root,file)
				info_file_date=get_dc_date(join(info_file))
				if info_file_date != -1:
					info_files[int(info_file_date)]=info_file
	return info_files

def gen_rdf(outfile=None):
	global rdf_template,about_url
	rdf=parse(rdf_template)
	# set the dc:date of the channel to now
	rdf.getElementsByTagName("dc:date")[0].firstChild.replaceWholeText(timestamp2w3cdtf(time.time()))

	# here comes the loop over all .info files in share
	info_files=get_info_files()
	if len(info_files): info_file= info_files.pop(max(info_files))
	else: info_file=None
	while info_file:
		info_dom=parse(info_file)
		about=info_dom.getElementsByTagName("item")[0].getAttribute("rdf:about")
		# so come fixes that need to go back to the .info files at some point
		# this replaces all about lines!
		sha1=get_sha1(info_dom)
		about=about_url % sha1
		info_dom.getElementsByTagName("item")[0].setAttribute("rdf:about",about)
		info_dom.getElementsByTagName('dc:identifier')[0].setAttribute("rdf:resource",about)
		
		try:
			info_dom.getElementsByTagName("link")[0].firstChild.replaceWholeText(about)
		except:
			impl = getDOMImplementation()
			newdoc = impl.createDocument(None, "some_tag", None)
			text = newdoc.createTextNode(about)
			info_dom.getElementsByTagName("link")[0].appendChild(text)
		
		for link in info_dom.getElementsByTagName("v2v:link"):
			#print dir(link.firstChild)
			m = re.search("magnet",link.toxml())
			if m:
				clipname=get_clipname(info_dom)
				if clipname: link.firstChild.replaceWholeText(gen_magnet(clipname,sha1))
			
		#if about == "":
		#	about=about_url % get_sha1(info_dom)
		#	info_dom.getElementsByTagName("item")[0].setAttribute("rdf:about",about)
		#info_dom.getElementsByTagName("item")[0].setAttribute("rdf:about",about)
		# end of fixes
		rdf.getElementsByTagName("rdf:Seq")[0].appendChild(format_rdf_li(about))
		rdf.getElementsByTagName("rdf:RDF")[0].appendChild(info_dom.getElementsByTagName("item")[0])
		if len(info_files): info_file= info_files.pop(max(info_files))
		else: info_file=None
		
	rdf_pretty=rdf.toprettyxml()
	if outfile != None:
		out_f = open(outfile,'w+');
		out_f.write(rdf_pretty)
		out_f.close
	return rdf.toprettyxml()



# checks the disk usage for the share_dir and removes the oldest
# clip if needed
def check_and_free_diskspace(new_v2v_folder_size):
	global share_dir, max_shared_space
	if max_shared_space>0:
		current_share_size=du(share_dir)
		while (new_v2v_folder_size + current_share_size) > max_shared_space:
			if(new_v2v_folder_size>max_shared_space):
				print "new file is bigger than share folder ",
				print new_v2v_folder_size
				break
			oldest_folder=get_oldest_v2v_folder()
			for file_name in listdir(oldest_folder):
				unlink(join(oldest_folder,file_name))
			rmdir(oldest_folder)
			unlink("%s.torrent" % oldest_folder)
			current_share_size=du(share_dir)
	#print "new: %s, he have: %s" % (new_v2v_folder_size/1024,current_share_size/1024)

# is this the right way to check for the oldest folder? 
# could be better to read the info file and
# extract the date there.
def get_oldest_v2v_folder():
	global share_dir
	oldest_timestamp=time.time()
	folders=listdir(share_dir)
	for folder in folders:
		if isdir(join(share_dir,folder)):
			if stat(join(share_dir,folder)).st_ctime < oldest_timestamp:
				oldest_timestamp=stat(join(share_dir,folder)).st_ctime
				oldest_folder=folder
	return join(share_dir,oldest_folder)

# simple du (disk usage) function
def du(folder):
	size=0
	for root,dirs,files in walk(folder):
		size += sum([getsize(join(root,name)) for name in files])
	return size


# returns the needed size for the torrent.
# used to check for diskspace before starting the download
def get_torrent_filesize(torrent_file):
	metainfo_file = open(torrent_file, 'rb')
	metainfo = bdecode(metainfo_file.read())
	info = metainfo['info']
	piece_length = info['piece length']
	if info.has_key('length'):
		# let's assume we just have a file
		file_length = info['length']
		name ='file size.....:'
	else: 
		# let's assume we have a directory structure
		file_length = 0;
		for file in info['files']:
			path = ''
			for item in file['path']:
				if (path != ''):
					path = path + "/"
				path = path + item
				file_length += file['length']
	return file_length


#main
if __name__ == '__main__':
	if len(argv) == 1:
		# no arguments, so check incoming_dir
		check_incoming()
	#for the procmail stuff, just checks for diskspace/free if needed
	elif len(argv) == 2:
		check_and_free_diskspace(get_torrent_filesize(argv[1]))
