mirror of
https://github.com/gryf/pythonhelper.git
synced 2025-12-18 12:00:27 +01:00
WIP: Rewrite parser
This commit is contained in:
@@ -1,20 +1,40 @@
|
||||
"""
|
||||
Simple analyzer for python source files. Collect and give info about file
|
||||
structure: classes, its methods and functions.
|
||||
|
||||
version: 0.2
|
||||
date: 2016-05-21
|
||||
author: Roman Dobosz <gryf@vimja.com>
|
||||
|
||||
TODO: - fix the corner case with applying a tag, where it shouldn't do. like:
|
||||
|
||||
1 def foo():
|
||||
2 pass
|
||||
3
|
||||
4 if True == False:
|
||||
5 foo()
|
||||
|
||||
where line 5 is reporting as a function foo() body, which is not true.
|
||||
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
import vim
|
||||
|
||||
|
||||
RE_TAG_TYPE = re.compile(r'\s*(def|class)[ \t]+([^(:]+).*')
|
||||
RE_INDENT = re.compile(r'([ \t]*).*')
|
||||
|
||||
|
||||
class PythonTag(object):
|
||||
"""A simple storage class representing a python tag."""
|
||||
CLASS = "class"
|
||||
METHOD = "method"
|
||||
FUNCTION = "function"
|
||||
|
||||
def __init__(self, tag_type, full_name, line_number, indent_level):
|
||||
def __init__(self, tag_type='', full_name='', line_number=0,
|
||||
indent_level=0):
|
||||
"""Initializes instances of Python tags.
|
||||
|
||||
:param tag_type: Tag type as string
|
||||
@@ -30,172 +50,62 @@ class PythonTag(object):
|
||||
|
||||
def __str__(self):
|
||||
"""Returns a string representation of the tag."""
|
||||
return "%s (%s) [%s, %u, %u]" % (self.name,
|
||||
self.tag_type,
|
||||
self.full_name,
|
||||
self.line_number,
|
||||
self.indent_level,)
|
||||
return "%0.2d [%d] %s %s" % (self.line_number,
|
||||
self.indent_level,
|
||||
self.tag_type,
|
||||
self.full_name)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class SimplePythonTagsParser(object):
|
||||
"""Provides a simple Python tag parser."""
|
||||
# how many chars a single tab represents (visually)
|
||||
TABSIZE = 8
|
||||
# regexp used to extract indentation and strip comments
|
||||
COMMENTS_INDENT_RE = re.compile('([ \t]*)([^\n#]*).*')
|
||||
# regexp used to extract a class or function name
|
||||
TAG_TYPE_RE = re.compile('(def|class)[ \t]+([^(:]+).*')
|
||||
|
||||
def __init__(self, source):
|
||||
"""
|
||||
Initializes instances of SimplePythonTagsParser().
|
||||
|
||||
:param source: source for which the tags will be generated. It is
|
||||
simply vim buffer.
|
||||
"""
|
||||
self.source = source
|
||||
class EvenSimplerPythonTagsParser(object):
|
||||
"""Simplified version for Python source code tag parser."""
|
||||
|
||||
def get_tags(self):
|
||||
"""
|
||||
Determines all the tags for the buffer.
|
||||
|
||||
:returns: tuple of tags line numbers and tags
|
||||
"""
|
||||
tag_line_numbers = []
|
||||
tags = {}
|
||||
"""Return OrderedDict with all tags for current buffer"""
|
||||
tags_stack = []
|
||||
tags = OrderedDict()
|
||||
|
||||
# go through all the lines in the source and localize all Python tags
|
||||
# in it
|
||||
# for (line, line_number) in zip(self.source, itertools.count(1)):
|
||||
for line_number, line in enumerate(self.source, start=1):
|
||||
line = line + '\n'
|
||||
for line_no, line in enumerate(vim.current.buffer):
|
||||
|
||||
# extract the line's indentation characters and its content
|
||||
line_match = self.COMMENTS_INDENT_RE.match(line)
|
||||
line_content = line_match.group(2)
|
||||
tag_match = RE_TAG_TYPE.match(line)
|
||||
|
||||
# match for the class tag
|
||||
tag_match = self.TAG_TYPE_RE.match(line_content)
|
||||
|
||||
# if the class tag has been found, store some information on it
|
||||
if tag_match:
|
||||
current_tag = self.get_python_tag(tags_stack, line_number,
|
||||
line_match.group(1),
|
||||
tag_match.group(2),
|
||||
tag_match.group(1))
|
||||
indent_level = self._get_indent_level(line)
|
||||
|
||||
tag_line_numbers.append(line_number)
|
||||
tags[line_number] = current_tag
|
||||
for _ in range(len(tags_stack)):
|
||||
if tags_stack and \
|
||||
tags_stack[-1].indent_level >= indent_level:
|
||||
tags_stack.pop()
|
||||
|
||||
return tag_line_numbers, tags
|
||||
if not tags_stack:
|
||||
break
|
||||
|
||||
def get_parent_tag(self, tags_stack):
|
||||
"""
|
||||
Given a tag, returns its parent tag (instance of PythonTag()) from the
|
||||
specified tag list. If no such parent tag exists, returns None.
|
||||
tag = PythonTag(tag_match.group(1),
|
||||
self._get_full_name(tags_stack,
|
||||
tag_match.group(2)),
|
||||
line_no,
|
||||
indent_level)
|
||||
|
||||
:param tags_stack: list (stack) of currently open PythonTag() instances
|
||||
"""
|
||||
if len(tags_stack):
|
||||
parent_tag = tags_stack[-1]
|
||||
else:
|
||||
parent_tag = None
|
||||
tags[line_no] = tag
|
||||
tags_stack.append(tag)
|
||||
|
||||
return parent_tag
|
||||
return tags
|
||||
|
||||
@staticmethod
|
||||
def compute_indentation_level(indent_chars):
|
||||
"""
|
||||
Computes the indentation level from the specified string.
|
||||
def _get_full_name(self, tags_stack, name):
|
||||
"""Return full logical name dot separated starting from upper entity"""
|
||||
if tags_stack:
|
||||
return tags_stack[-1].full_name + "." + name
|
||||
|
||||
:param indent_chars: White space before any other character on line
|
||||
:returns: indent level as an int
|
||||
"""
|
||||
indent_level = 0
|
||||
return name
|
||||
|
||||
# compute the indentation level (expand tabs)
|
||||
for char in indent_chars:
|
||||
if char == '\t':
|
||||
indent_level += SimplePythonTagsParser.TABSIZE
|
||||
else:
|
||||
indent_level += 1
|
||||
|
||||
return indent_level
|
||||
|
||||
def get_python_tag(self, tags_stack, line_number, indent_chars, tag_name,
|
||||
obj_type):
|
||||
"""
|
||||
Returns instance of PythonTag based on the specified data.
|
||||
|
||||
:param tags_stack: list (stack) of tags currently active.
|
||||
Note: Modified in this method!
|
||||
:param line_number: current line number
|
||||
:param indent_chars: characters making up the indentation level of the
|
||||
current tag
|
||||
:param tag_name: short name of the current tag
|
||||
:param obj_type: one of 'class' or 'def'
|
||||
:returns: PythonTag object
|
||||
"""
|
||||
indent_level = self.compute_indentation_level(indent_chars)
|
||||
parent_tag = self.get_parent_tag(tags_stack)
|
||||
|
||||
if obj_type == 'class':
|
||||
obj_type = PythonTag.CLASS
|
||||
else:
|
||||
obj_type = PythonTag.FUNCTION
|
||||
|
||||
# handle enclosed tag
|
||||
while parent_tag:
|
||||
if parent_tag.tag_type == PythonTag.CLASS:
|
||||
obj_type = PythonTag.METHOD
|
||||
|
||||
# if the indent level of the parent tag is greater than of the
|
||||
# current tag, use parent tag of the parent tag
|
||||
if parent_tag.indent_level >= indent_level:
|
||||
del tags_stack[-1]
|
||||
|
||||
# otherwise we have all information on the current tag and can
|
||||
# return it
|
||||
else:
|
||||
tag = PythonTag(obj_type,
|
||||
"%s.%s" % (parent_tag.full_name, tag_name,),
|
||||
line_number, indent_level)
|
||||
break
|
||||
|
||||
# use the parent tag of the parent tag
|
||||
parent_tag = self.get_parent_tag(tags_stack)
|
||||
|
||||
# handle a top-indent level tag
|
||||
else:
|
||||
tag = PythonTag(obj_type, tag_name, line_number, indent_level)
|
||||
|
||||
# add the tag to the list of tags
|
||||
tags_stack.append(tag)
|
||||
|
||||
return tag
|
||||
|
||||
def tag_function_type_deciding_method(self, parent_tag_type):
|
||||
"""
|
||||
Returns tag type of the current tag based on its previous tag (super
|
||||
tag) for functions/methods.
|
||||
|
||||
Parameters
|
||||
|
||||
parent_tag_type -- type of the enclosing/parent tag
|
||||
"""
|
||||
if parent_tag_type == PythonTag.CLASS:
|
||||
return PythonTag.METHOD
|
||||
else:
|
||||
return PythonTag.FUNCTION
|
||||
def _get_indent_level(self, line):
|
||||
"""Return indentation level as a simple count of whitespaces"""
|
||||
return len(RE_INDENT.match(line).group(1))
|
||||
|
||||
|
||||
class PythonHelper(object):
|
||||
TAG_LINE_NUMBERS = {}
|
||||
TAGS = {}
|
||||
BUFFER_TICKS = {}
|
||||
|
||||
@classmethod
|
||||
def find_tag(cls, buffer_number, changed_tick):
|
||||
@@ -209,161 +119,54 @@ class PythonHelper(object):
|
||||
changed_tick -- always-increasing number used to indicate that the
|
||||
buffer has been modified since the last time
|
||||
"""
|
||||
# get the tag data for the current buffer
|
||||
tag_line_numbers, tags = get_tags(buffer_number, changed_tick)
|
||||
|
||||
# link to Vim's internal data
|
||||
current_buffer = vim.current.buffer
|
||||
current_window = vim.current.window
|
||||
row = current_window.cursor[0]
|
||||
|
||||
# get the index of the nearest line
|
||||
nearest_line_index = get_nearest_line_index(row, tag_line_numbers)
|
||||
|
||||
# if a line has been found, find out if the tag is correct {{{
|
||||
# E.g. the cursor might be below the last tag, but in code that has
|
||||
# nothing to do with the tag, which we know because the line is
|
||||
# indented differently. In such a case no applicable tag has been
|
||||
# found.
|
||||
while nearest_line_index > -1:
|
||||
# get the line number of the nearest tag
|
||||
nearest_line_number = tag_line_numbers[nearest_line_index]
|
||||
|
||||
# walk through all the lines in the range (nearestTagLine,
|
||||
# cursorRow)
|
||||
for line_number in xrange(nearest_line_number + 1, row):
|
||||
# get the current line
|
||||
line = current_buffer[line_number]
|
||||
|
||||
# count the indentation of the line, if it's lower than the
|
||||
# tag's, the tag is invalid
|
||||
if len(line):
|
||||
# initialize local auxiliary variables
|
||||
line_start = 0
|
||||
i = 0
|
||||
|
||||
# compute the indentation of the line
|
||||
while (i < len(line)) and (line[i].isspace()):
|
||||
# move the start of the line code
|
||||
if line[i] == '\t':
|
||||
line_start += SimplePythonTagsParser.TABSIZE
|
||||
else:
|
||||
line_start += 1
|
||||
|
||||
# go to the next character on the line
|
||||
i += 1
|
||||
|
||||
# if the line contains only spaces, skip it
|
||||
if i == len(line):
|
||||
continue
|
||||
|
||||
# if the next character is a '#' (python comment), skip
|
||||
# to the next line
|
||||
if line[i] == '#':
|
||||
continue
|
||||
|
||||
# if the line's indentation starts before or at the
|
||||
# nearest tag's, the tag is invalid
|
||||
if line_start <= tags[nearest_line_number].indent_level:
|
||||
nearest_line_index -= 1
|
||||
break
|
||||
|
||||
# the tag is correct, so use it
|
||||
else:
|
||||
break
|
||||
|
||||
# no applicable tag has been found
|
||||
if PythonHelper.TAGS.get(buffer_number) and \
|
||||
PythonHelper.TAGS[buffer_number]['changed_tick'] == changed_tick:
|
||||
tags = PythonHelper.TAGS[buffer_number]['tags']
|
||||
else:
|
||||
nearest_line_number = -1
|
||||
parser = EvenSimplerPythonTagsParser()
|
||||
tags = parser.get_tags()
|
||||
PythonHelper.TAGS['buffer_number'] = {'changed_tick': changed_tick,
|
||||
'tags': tags}
|
||||
|
||||
# describe the cursor position (what tag the cursor is on)
|
||||
# reset the description
|
||||
tag_description = ""
|
||||
tag_description_tag = ""
|
||||
tag_description_type = ""
|
||||
# get line number of current cursor position from Vim's internal data.
|
||||
# It is always a positive number, starts from 1. Let's decrease it by
|
||||
# one, so that it will not confuse us while operating vim interface by
|
||||
# python, where everything starts from 0.
|
||||
line_number = vim.current.window.cursor[0] - 1
|
||||
while True:
|
||||
line = vim.current.buffer[line_number]
|
||||
if line.strip():
|
||||
line_indent = len(RE_INDENT.match(line).group(1))
|
||||
break
|
||||
# line contains nothing but white characters, looking up to grab
|
||||
# some more context
|
||||
line_number -= 1
|
||||
|
||||
# if an applicable tag has been found, set the description
|
||||
# accordingly
|
||||
if nearest_line_number > -1:
|
||||
tag_info = tags[nearest_line_number]
|
||||
tag_description_tag = tag_info.full_name
|
||||
tag_description_type = tag_info.tag_type
|
||||
tag_description = "%s (%s)" % (tag_description_tag,
|
||||
tag_description_type)
|
||||
tag = tags.get(line_number)
|
||||
if not tag:
|
||||
key = None
|
||||
for key in reversed(tags.keys()):
|
||||
if line_number >= key and line_indent > tags[key].indent_level:
|
||||
tag = tags.get(key)
|
||||
break
|
||||
|
||||
# update the variable for the status line so it get updated with
|
||||
# the new description
|
||||
vim.command("let w:PHStatusLine=\"%s\"" % tag_description)
|
||||
vim.command("let w:PHStatusLineTag=\"%s\"" % tag_description_tag)
|
||||
vim.command("let w:PHStatusLineType=\"%s\"" % tag_description_type)
|
||||
update_vim_vars(tag)
|
||||
|
||||
@classmethod
|
||||
def delete_tags(cls, buffer_number):
|
||||
"""
|
||||
Removes tag data for the specified buffer number.
|
||||
|
||||
Parameters
|
||||
|
||||
buffer_number -- number of the buffer
|
||||
"""
|
||||
for item in (PythonHelper.TAGS, PythonHelper.TAG_LINE_NUMBERS,
|
||||
PythonHelper.BUFFER_TICKS):
|
||||
try:
|
||||
del item[buffer_number]
|
||||
except KeyError:
|
||||
pass
|
||||
"""Removes tag data for the specified buffer number."""
|
||||
del PythonHelper.TAGS[buffer_number]
|
||||
|
||||
|
||||
def get_nearest_line_index(row, tag_line_numbers):
|
||||
"""
|
||||
Returns the index of 'tag_line_numbers' that contains the line nearest to
|
||||
the specified cursor row.
|
||||
def update_vim_vars(tag):
|
||||
"""Update Vim variable usable with vimscript side of the plugin"""
|
||||
|
||||
Parameters
|
||||
|
||||
row -- current cursor row
|
||||
|
||||
tag_line_numbers -- list of tags' line numbers (ie. their position)
|
||||
"""
|
||||
nearest_line_number = -1
|
||||
nearest_line_index = -1
|
||||
|
||||
# go through all tag line numbers and find the one nearest to the
|
||||
# specified row
|
||||
for line_index, line_number in enumerate(tag_line_numbers):
|
||||
# if the current line is nearer the current cursor position, take it
|
||||
if nearest_line_number < line_number <= row:
|
||||
nearest_line_number = line_number
|
||||
nearest_line_index = line_index
|
||||
|
||||
# if we've come past the current cursor position, end the search
|
||||
if line_number >= row:
|
||||
break
|
||||
|
||||
return nearest_line_index
|
||||
|
||||
|
||||
def get_tags(buffer_number, changed_tick):
|
||||
"""
|
||||
Reads the tags for the buffer specified by the number..
|
||||
|
||||
:param buffer_number: Number of the current buffer
|
||||
:param changed_tick: Always-increasing number used to indicate that the
|
||||
buffer has been modified since the last time
|
||||
:returns: Tuple of the format (taglinenumber[buffer], tags[buffer])
|
||||
"""
|
||||
# return immediately if there's no need to update the tags
|
||||
if PythonHelper.BUFFER_TICKS.get(buffer_number, None) == changed_tick:
|
||||
return (PythonHelper.TAG_LINE_NUMBERS[buffer_number],
|
||||
PythonHelper.TAGS[buffer_number])
|
||||
|
||||
# get the tags
|
||||
simple_tags_parser = SimplePythonTagsParser(vim.current.buffer)
|
||||
tag_line_numbers, tags = simple_tags_parser.get_tags()
|
||||
|
||||
# update the global variables
|
||||
PythonHelper.TAGS[buffer_number] = tags
|
||||
PythonHelper.TAG_LINE_NUMBERS[buffer_number] = tag_line_numbers
|
||||
PythonHelper.BUFFER_TICKS[buffer_number] = changed_tick
|
||||
|
||||
return (tag_line_numbers, tags)
|
||||
if not tag:
|
||||
vim.command('let w:PHStatusLine=""')
|
||||
vim.command('let w:PHStatusLineTag=""')
|
||||
vim.command('let w:PHStatusLineType=""')
|
||||
else:
|
||||
vim.command('let w:PHStatusLine="%s (%s)"' % (tag.full_name,
|
||||
tag.tag_type))
|
||||
vim.command('let w:PHStatusLineTag="%s"' % tag.tag_type)
|
||||
vim.command('let w:PHStatusLineType="%s"' % tag.full_name)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
" Author: Michal Vitecek <fuf-at-mageo-dot-cz>
|
||||
" Author: Roman Dobosz <gryf@vimja.com>
|
||||
" Version: 0.84
|
||||
" Last Modified: 2016-05-18
|
||||
" Last Modified: 2016-05-21
|
||||
"
|
||||
" Overview
|
||||
" --------
|
||||
|
||||
Reference in New Issue
Block a user