01
Jun
09

java code indenter



I wrote a java source code indenter. It can be refactored ( and it will sometime soon ), and perhaps it has bugs that I haven’t found so far. Here is the code :

  1. sample script
  2. 
    
    from indent import JavaIndenter
    import sys
    
    indenter    = JavaIndenter()
    hnd         = open(sys.argv[1],"r")
    print indenter.indent(hnd.read())
    hnd.close()
    
  3. source code for the indenting script
  4. 
    
    from pygments.lexers import *
    import pdb
    from pygments.token import *
    from StringIO import StringIO
    import re
    
    class JavaSourceProcessor(object):
        def __init__(self):
            self.lexer = get_lexer_by_name("java")
    
        def set_input(self,input):
            self.input = input
    
        def get_tokens(self):
            self.tokens = list(self.lexer.get_tokens(self.input))
            return self.tokens
    
    class JavaIndenter(object):
        def __init__(self):
            self.sp = JavaSourceProcessor()
            self.tabs = 0
            self.tabchar = "  "
            self.last_sentence = ""
            self.last_words = []
    
        def indent(self,source):
            self.sp.set_input(source)
            self.tokens = self.sp.get_tokens()
            return self._indent(self.tokens)
    
        def space(self):
            return self.tabchar*self.tabs
    
        def _write_token(self,source,token):
            source.write(token)
    
        def _add_word(self,word,source):
            self.last_words.append(word)
            if word in [";","}","{","\n"]:
                if not len(self.last_words) == 1:
                    self.last_sentence = "".join(self.last_words)
                    del(self.last_words[:])
    
        def _indent(self,tokens):
            source = StringIO()
            last_tok = ""
            last_printed = ""
            need_indent = False
            for index,(token_type,token_val) in enumerate(tokens):
                self._add_word(token_val,source)
                if token_type == Comment:
                    if need_indent:
                        source.write(self.space())
                    self._write_token(source,token_val+"\n")
                    need_indent = True
                elif token_val == "\t":
                    pass
                elif token_val == "{":
                    if source.getvalue().find("main") != -1:
                        #pdb.set_trace()
                        pass
                    #source.write(token_val+"\n")
                    source.write(" ")
                    self._write_token(source,token_val+"\n")
                    self.tabs += 1
                    need_indent = True
                elif token_val == " ":
                    if last_tok in ["{"," ",";","\n"]:
                        pass
                    else:
                        #source.write(token_val)
                        self._write_token(source,token_val)
                elif token_val == ";":
                    #pdb.set_trace()
                    if re.search("import",self.last_sentence):
                        need_indent = False
                    else:
                        need_indent = True
                    self._write_token(source,token_val+"\n")
                elif token_val == "}":
                    self.tabs -= 1
                    need_indent = True
                    source.write(self.space())
                    self._write_token(source,token_val+"\n")
                elif token_val == "\n":
                    pass
                elif re.search("^\s{2,}$",token_val):
                    pass
                else:
                    if need_indent:
                        if not re.search("import",self.last_sentence):
                            source.write(self.space())
                            need_indent = False
                    self._write_token(source,token_val)
                last_tok = token_val
            indented = source.getvalue()
            source.close()
            return indented
    

It has a small problem with some of the multiline java comments, but I think it's not that important.

Advertisements

0 Responses to “java code indenter”



  1. Leave a Comment

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s


Blog Stats

  • 223,857 hits