I wrote a java source code indenter. It can be refactored ( and it will sometime soon ), and perhaps it has bugs that I haven’t found so far. Here is the code :
- sample script
- source code for the indenting script
from indent import JavaIndenter
import sys
indenter = JavaIndenter()
hnd = open(sys.argv[1],"r")
print indenter.indent(hnd.read())
hnd.close()
from pygments.lexers import *
import pdb
from pygments.token import *
from StringIO import StringIO
import re
class JavaSourceProcessor(object):
def __init__(self):
self.lexer = get_lexer_by_name("java")
def set_input(self,input):
self.input = input
def get_tokens(self):
self.tokens = list(self.lexer.get_tokens(self.input))
return self.tokens
class JavaIndenter(object):
def __init__(self):
self.sp = JavaSourceProcessor()
self.tabs = 0
self.tabchar = " "
self.last_sentence = ""
self.last_words = []
def indent(self,source):
self.sp.set_input(source)
self.tokens = self.sp.get_tokens()
return self._indent(self.tokens)
def space(self):
return self.tabchar*self.tabs
def _write_token(self,source,token):
source.write(token)
def _add_word(self,word,source):
self.last_words.append(word)
if word in [";","}","{","\n"]:
if not len(self.last_words) == 1:
self.last_sentence = "".join(self.last_words)
del(self.last_words[:])
def _indent(self,tokens):
source = StringIO()
last_tok = ""
last_printed = ""
need_indent = False
for index,(token_type,token_val) in enumerate(tokens):
self._add_word(token_val,source)
if token_type == Comment:
if need_indent:
source.write(self.space())
self._write_token(source,token_val+"\n")
need_indent = True
elif token_val == "\t":
pass
elif token_val == "{":
if source.getvalue().find("main") != -1:
#pdb.set_trace()
pass
#source.write(token_val+"\n")
source.write(" ")
self._write_token(source,token_val+"\n")
self.tabs += 1
need_indent = True
elif token_val == " ":
if last_tok in ["{"," ",";","\n"]:
pass
else:
#source.write(token_val)
self._write_token(source,token_val)
elif token_val == ";":
#pdb.set_trace()
if re.search("import",self.last_sentence):
need_indent = False
else:
need_indent = True
self._write_token(source,token_val+"\n")
elif token_val == "}":
self.tabs -= 1
need_indent = True
source.write(self.space())
self._write_token(source,token_val+"\n")
elif token_val == "\n":
pass
elif re.search("^\s{2,}$",token_val):
pass
else:
if need_indent:
if not re.search("import",self.last_sentence):
source.write(self.space())
need_indent = False
self._write_token(source,token_val)
last_tok = token_val
indented = source.getvalue()
source.close()
return indented
It has a small problem with some of the multiline java comments, but I think it's not that important.
0 Responses to “java code indenter”