#!/usr/bin/env python
# ***********************************************************
#  File: vasp2cif[.py]
#  Description: a tool to make CIF format files out of
#               VASP POSCAR+POTCAR files.
#               Output files acquire a .cif extension
#               example: POSCAR --> POSCAR.cif
#  Author: Peter Larsson
#  Affiliation: Department of Physics & Materials Science,
#               Uppsala University, Sweden.
#  Revision history:
#      2012-12-15  Torbjorn Bjorkman
#        - Extracts geometries also from OUTCAR files to a
#          set of blocks in a single CIF file. Useful for
#          visualization of a relaxation or MD run.
#      2011-05-06  Torbjorn Bjorkman
#        - You can now give many input POSCAR files and they
#          will be generated as separate data blocks in the
#          output CIF. Useful for visualization.
#        - Support for cartesian coordinates also for non-
#          orthorhombic systems.
#      2010-01-08  Peter Larsson
#        - Support for VASP 5 style CONTCAR files
#      2009-09-29  Peter Larsson
#        - More descriptive help for command line options
#      2009-04-17  Peter Larsson
#        - More robust error handling and support
#          for Cartesian coordinates in orthorhombic cells
#      2008-10-13  Peter Larsson
#        - Ported to Python and added support for
#          "Selective Dynamics" format in POSCAR and
#          volume scaling
#      2006-10-24  Peter Larsson
#        - Original version in Ruby.
# ***********************************************************

import os
import sys
import commands
import math
import re
from optparse import OptionParser

class Cell:
	def __init__(self):
		label = ""
		a = None
		b = None
		c = None
		alpha = None
		beta = None
		gamma = None
		HMSymbol = "'P 1'"
		sites = [] # list of (elementname, x, y, z) tuples

def ciffilestring(cell):
	#Make CIF header with cell parameters and coord record info
	outstring = ""
	# this does not work if you have more than one word on the first line of the poscar...
	## cif_header.append("data_" + poscar[0])
	outstring += "data_" + cell.label.strip(" ")+"\n"
	outstring += "_audit_creation_method   'Generated by vasp2cif'\n"
	outstring += "_cell_length_a    " + str(cell.a)+"\n"
	outstring += "_cell_length_b    " + str(cell.b)+"\n"
	outstring += "_cell_length_c    " + str(cell.c)+"\n"
	outstring += "_cell_angle_alpha    " + str(cell.alpha)+"\n"
	outstring += "_cell_angle_beta    " + str(cell.beta)+"\n"
	outstring += "_cell_angle_gamma    " + str(cell.gamma)+"\n"
	outstring += "\n"
	outstring += "_symmetry_space_group_name_H-M    "+cell.HMSymb+"\n"
	outstring += "loop_\n"
	outstring += "_atom_site_label\n"
	outstring += "_atom_site_type_symbol\n"
	outstring += "_atom_site_fract_x\n"
	outstring += "_atom_site_fract_y\n"
	outstring += "_atom_site_fract_z\n"
	outstring += "_atom_site_occupancy\n"
	i = 1
	for a in cell.sites:
		 outstring += "%s%i   %s   %1.15f   %1.15f   %1.15f   1.0\n" % (a[0], i, a[0], a[1], a[2], a[3])
		 i += 1		 
	outstring += "\n\n"
	return outstring

def gstrip(s):
	#Strip all whitespace in string s
	return re.sub("\s+" , "", s)

# Return OUTCAR or POSCAR depending on file type.
# If not identified, return UNKNOWN
def filetype(f):
	filetype = "UNKNOWN"
	# Something that should recognize an OUTCAR file
	sym = re.compile("Analysis of symmetry for")
	# POSCAR identified if line 2-5 can be interpreted as
	# a length scale followed by three lattice vectors
	lines = []
	for i in range(6):
		lines.append(f.readline())
	try:
		t = float(lines[1].split()[0])
		for i in range(2,5):
			t = [float(s) for s in lines[i].split()[:3]]
		filetype = "POSCAR"
	except:
		for line in f:
			if sym.match(line):
				filetype = "OUTCAR"
				break
	f.seek(0) # Rewind file
	return filetype
	
def mvmult3(mat,vec):
	# matrix-vector multiplication
	w = []
	for i in range(3):
		t = 0
		for j in range(3):
			t += mat[j][i]*vec[j]
		w.append(t)
	return w

def det3(m):
	# Determinant of 3x3 dimensional matrix
	a = m[1][1]*m[2][2]-m[1][2]*m[2][1]
	b = m[1][2]*m[2][0]-m[1][0]*m[2][2]
	c = m[1][0]*m[2][1]-m[1][1]*m[2][0]
	return m[0][0]*a + m[0][1]*b + m[0][2]*c

def minv3(m):
	# Inverse of 3x3 dimensional matrix
	det = det3(m)
	w = [[(m[1][1]*m[2][2]-m[1][2]*m[2][1])/det, (m[0][2]*m[2][1]-m[0][1]*m[2][2])/det, (m[0][1]*m[1][2]-m[0][2]*m[1][1])/det],
	     [(m[1][2]*m[2][0]-m[1][0]*m[2][2])/det, (m[0][0]*m[2][2]-m[0][2]*m[2][0])/det, (m[0][2]*m[1][0]-m[0][0]*m[1][2])/det],
	     [(m[1][0]*m[2][1]-m[1][1]*m[2][0])/det, (m[0][1]*m[2][0]-m[0][0]*m[2][1])/det, (m[0][0]*m[1][1]-m[0][1]*m[1][0])/det]]
	return w

# Input parser
parser = OptionParser()
parser.add_option("-v","--verbose",dest="verbose",help="Print CIF to stdout",action="store_true")
parser.add_option("-o","--output",dest="output",help="Save CIF to named file",metavar="FILE")
parser.add_option("-e","--elements",dest="elements",help="""Supply elements if no POTCAR is present. Example: --elements="Fe,Co,Ni" """,metavar="list of elements")
(options,args) = parser.parse_args()

#Let's get started, read input files.
# Store in lists as (inputfile,filename) tuples.
if len(args) == 0:
	#Pipe mode, read and write to stdin and stdout
	input_files = [(sys.stdin,None)]
	cif_file = sys.stdout
elif len(args) == 1:
	#Write to input.cif
	input_files = [(file(arg,'r'),arg) for arg in args]
	if options.output:
		cif_file = file(options.output,'w')
	else:
		cif_file = file(args[0] + ".cif",'w')
else:
	#Write to input.cif
	input_files = [(file(arg,'r'),arg) for arg in args[0:]]
	if options.output:
		cif_file = file(options.output,'w')
	else:
		cif_file = file(args[0] + "_etc.cif",'w')

# Initialize Cell object.
cell = Cell()
cell.HMSymb = "'P 1'"
# loop over input files
inputfilenr = 1
cifblocknr = 1
for input_file,filename in input_files:
	if filetype(input_file) == "POSCAR":
		poscar = input_file.readlines()

		# CIF block number
		cell.label = str(cifblocknr)

		#We need to determine the data format, VASP 5 stores element names in line 5
		if gstrip(poscar[5]).isdigit():
			#Old school format
			vasp5 = False
			offset = 0
		elif gstrip(poscar[5]).isalpha() and gstrip(poscar[6]).isdigit():
			#Looks like vasp5 like format
			vasp5 = True
			offset = 1

		#First deal with potential POTCAR problems
		atoms = []
		if options.elements:
			#Read atoms from supplied string, eg "Li,Fe,Si,O"
			atoms = options.elements.split(",")
			assert(len(atoms) > 0)
		else:		
			if vasp5:
				#Read elements from line 5
				words = poscar[5].split()
				atoms = [w.strip() for w in words]
			else:
				#Try to read atoms from POTCAR
				if not os.path.exists("POTCAR"):
					sys.stderr.write("ERROR: Cannot find POTCAR. Please supply atom labels with the -e flag.\n")
					sys.exit(1)

				potcar_lines = commands.getoutput("grep TITEL POTCAR").split("\n")
				if len(potcar_lines) == 0:
					sys.stderr.write("ERROR: POTCAR file exists, but is empty? Supply atom labels with the -e flag.\n")
					sys.exit(1)

				for line in potcar_lines:
					words = line.split()
					assert(words[0] == 'TITEL')

					#Note, we need the split _ to deal with names like "Li_sv"
					atoms.append(words[3].split("_")[0])

		#Lattice scaling factor
		lattice_constant = float(poscar[1].strip())

		#Dealing with volume scaling in POSCAR
		final_volume = -lattice_constant
		scale_volume = False
		if lattice_constant < 0.0:
			lattice_constant = 1.0
			scale_volume = True

		#Read cell vectors
		a = []
		b = []
		c = []
		a.append(lattice_constant*float(poscar[2].split()[0].strip()))
		a.append(lattice_constant*float(poscar[2].split()[1].strip()))
		a.append(lattice_constant*float(poscar[2].split()[2].strip()))

		b.append(lattice_constant*float(poscar[3].split()[0].strip()))
		b.append(lattice_constant*float(poscar[3].split()[1].strip()))
		b.append(lattice_constant*float(poscar[3].split()[2].strip()))

		c.append(lattice_constant*float(poscar[4].split()[0].strip()))
		c.append(lattice_constant*float(poscar[4].split()[1].strip()))
		c.append(lattice_constant*float(poscar[4].split()[2].strip()))

		unscaled_volume = a[0]*b[1]*c[2]-a[0]*b[2]*c[1]+a[1]*b[2]*c[0]-a[1]*b[0]*c[2]+a[2]*b[0]*c[1]-a[2]*b[1]*c[0]

		if scale_volume:
			lattice_constant = (final_volume/unscaled_volume)**(1.0/3.0)
			a = map(lambda x: lattice_constant*x,a)
			b = map(lambda x: lattice_constant*x,b)
			c = map(lambda x: lattice_constant*x,c)
			volume = a[0]*b[1]*c[2]-a[0]*b[2]*c[1]+a[1]*b[2]*c[0]-a[1]*b[0]*c[2]+a[2]*b[0]*c[1]-a[2]*b[1]*c[0]

		cell.a = math.sqrt(a[0]*a[0]+a[1]*a[1]+a[2]*a[2])
		cell.b = math.sqrt(b[0]*b[0]+b[1]*b[1]+b[2]*b[2])
		cell.c = math.sqrt(c[0]*c[0]+c[1]*c[1]+c[2]*c[2])

		cell.alpha = math.acos((b[0]*c[0]+b[1]*c[1]+b[2]*c[2])/(cell.b*cell.c))*180/math.pi
		cell.beta = math.acos((a[0]*c[0]+a[1]*c[1]+a[2]*c[2])/(cell.a*cell.c))*180/math.pi
		cell.gamma = math.acos((b[0]*a[0]+b[1]*a[1]+b[2]*a[2])/(cell.a*cell.b))*180/math.pi

		#Read atoms counts and make label array
		atomlabels = []

		atomcounts = poscar[5+offset].split()

		if len(atomcounts) != len(atoms):
			sys.stderr.write("ERROR: Not the same number of atom species in POTCAR and POSCAR. Please check.\n")
			sys.exit(1)

		n_atoms = 0
		for i in range(0,len(atomcounts)):
			n = int(atomcounts[i].strip())
			n_atoms = n_atoms + n
			for j in range(0,n):
				atomlabels.append(atoms[i])

		#Check for selective dynamics
		if poscar[6+offset].upper()[0] == 'S':
			offset = offset + 7
		else:
			offset = offset + 6

		#Check for direct coordinates
		direct_coordinates = True
		if poscar[offset].upper()[0] == 'D':
			direct_coordinates = True
		if poscar[offset].upper()[0] == 'C':
			direct_coordinates = False
			lattice_vectors = [a,b,c]
			inverse_lattice_vectors = minv3(lattice_vectors)

		#Scan and print atomic positions from offset
		if len(atomlabels) > (len(poscar)-offset):
			sys.stderr.write(("WARNING: vasp2cif expected to find %d coordinates, but there are only %d coordinate lines in the file!\n") % (len(atomlabels),len(poscar)-offset))
			atomlabels = atomlabels[0:len(poscar)-offset-1]

		cell.sites = []
		for i in range(0,len(atomlabels)):
			#extract first three fields in POSCAR line
			coords = map(float,poscar[i+offset+1].split()[0:3])

			if not direct_coordinates:
				coords = mvmult3(inverse_lattice_vectors, coords)

			cell.sites.append((atomlabels[i],coords[0],coords[1],coords[2]))

		# Print cell to cif files
		cifstring = ciffilestring(cell)
		cif_file.write(cifstring)
		if options.verbose:
			sys.stdout.write(cifstring)
		# increment cif block counter
		cifblocknr += 1
	
	elif filetype(input_file) == "OUTCAR":
		# First find elements and how many of each.
		atoms = []
		titellines = commands.getoutput("grep TITEL "+filename).split("\n")
		if len(titellines) == 0:
			sys.stderr.write("ERROR: Cannot read elements. Damaged OUTCAR file?\n")
			sys.exit(1)
		for line in titellines:
			words = line.split()
			assert(words[0] == 'TITEL')
			#Note, we need the split _ to deal with names like "Li_sv"
			atoms.append(words[3].split("_")[0])
		# How many of each?
		natoms = [int(s) for s in commands.getoutput("grep 'ions per type =' "+filename).split()[4:]]
		# Set up initial position array
		i = 0
		cell.sites = []
		for a in atoms:
			for j in range(natoms[i]):
				cell.sites.append((a,0.0,0.0,0.0))
			i += 1
		# Precompiled regular expressions.
		re_iter = re.compile("aborting loop because EDIFF is reached")
		re_lattice = re.compile("direct lattice vectors")
		re_positions = re.compile("POSITION")
		latticevectors = []
		introread = False
		vectorsread = False
		positionsread = False
		vecline = 5
		posline = sum(natoms)+1
		linenr = 0
		for line in input_file:
			linenr += 1
			# Start for lattices and positions after the end of first iteration.
			if not introread:
				if re_iter.search(line):
					introread = True
				continue
			# Get lattice vectors.
			if re_lattice.search(line):
				vecline = 1
				latticevectors = []
				continue
			if vecline <= 3:
				latticevectors.append([float(s) for s in line.split()[0:3]])
			if vecline == 4:
				# Lattice vectors read, set parameters
				vectorsread = True
				cell.latticevectors = latticevectors
				inverse_lattice_vectors = minv3(latticevectors)
				a,b,c = latticevectors[0],latticevectors[1],latticevectors[2]
				# crystallographic parameters
				cell.a = math.sqrt(a[0]*a[0]+a[1]*a[1]+a[2]*a[2])
				cell.b = math.sqrt(b[0]*b[0]+b[1]*b[1]+b[2]*b[2])
				cell.c = math.sqrt(c[0]*c[0]+c[1]*c[1]+c[2]*c[2])

				cell.alpha = math.acos((b[0]*c[0]+b[1]*c[1]+b[2]*c[2])/(cell.b*cell.c))*180/math.pi
				cell.beta = math.acos((a[0]*c[0]+a[1]*c[1]+a[2]*c[2])/(cell.a*cell.c))*180/math.pi
				cell.gamma = math.acos((b[0]*a[0]+b[1]*a[1]+b[2]*a[2])/(cell.a*cell.b))*180/math.pi
				
			# Read positions
			if re_positions.search(line):
				posline = -2
			if 0 <= posline < sum(natoms):
				c = [float(p) for p in line.split()[0:3]]
				c = mvmult3(inverse_lattice_vectors, c)
				cell.sites[posline] = (cell.sites[posline][0],c[0],c[1],c[2])
			if posline == sum(natoms) and introread:
				# Positions read, now print cell.
				cell.label = str(cifblocknr)
				cifstring = ciffilestring(cell)
				cif_file.write(cifstring)
				if options.verbose:
					sys.stdout.write(cifstring)
				# increment cif block counter
				cifblocknr += 1
			posline += 1
			vecline += 1
	else:
		sys.stderr.write("ERROR: Format of file %i not recognized.\n"%inputfilenr)
		sys.exit(1)
	# increment input file counter
	inputfilenr += 1