Codebase Navigation Skill
Codebase Navigation Skill
When to Use This Skill
Use this skill when implementing codebase navigation patterns in your codebase.
How to Use This Skill
- Review the patterns and examples below
- Apply the relevant patterns to your implementation
- Follow the best practices outlined in this skill
File discovery, structure understanding, intelligent search strategies, and efficient codebase exploration patterns.
Core Capabilities
- Smart File Discovery - Glob patterns, ignore rules, prioritized search
- Structure Understanding - Project topology, module hierarchies, package organization
- Semantic Search - Context-aware code search, symbol lookup, reference finding
- Entry Point Identification - Main modules, API endpoints, CLI commands
- Context-Aware Navigation - Related files, test counterparts, documentation
Smart File Discovery
# scripts/smart_file_finder.py
from pathlib import Path
from typing import List, Set, Optional, Dict
import fnmatch
import json
from dataclasses import dataclass
@dataclass
class SearchConfig:
"""Configuration for smart file search."""
patterns: List[str]
exclude_patterns: List[str]
max_depth: Optional[int] = None
follow_symlinks: bool = False
case_sensitive: bool = False
prioritize: List[str] = None # Patterns to prioritize in results
class SmartFileFinder:
"""Intelligent file discovery with gitignore support."""
DEFAULT_EXCLUDES = [
'**/node_modules/**',
'**/.git/**',
'**/__pycache__/**',
'**/.venv/**',
'**/.pytest_cache/**',
'**/dist/**',
'**/build/**',
'**/*.pyc',
'**/.DS_Store'
]
def __init__(self, root_path: str, config: Optional[SearchConfig] = None):
self.root = Path(root_path)
self.config = config or SearchConfig(patterns=['**/*'], exclude_patterns=[])
self.gitignore_patterns = self._load_gitignore()
def find(self) -> List[Path]:
"""Find files matching configured patterns."""
all_files = []
for pattern in self.config.patterns:
files = self._find_pattern(pattern)
all_files.extend(files)
# Remove duplicates while preserving order
seen = set()
unique_files = []
for f in all_files:
if f not in seen:
seen.add(f)
unique_files.append(f)
# Apply filtering and sorting
filtered = self._apply_filters(unique_files)
sorted_files = self._prioritize_results(filtered)
return sorted_files
def _find_pattern(self, pattern: str) -> List[Path]:
"""Find files matching a single pattern."""
if '**' in pattern:
return list(self.root.rglob(pattern.replace('**/', '')))
else:
return list(self.root.glob(pattern))
def _apply_filters(self, files: List[Path]) -> List[Path]:
"""Apply exclusion filters."""
filtered = []
for file_path in files:
# Skip if matches exclude pattern
if self._should_exclude(file_path):
continue
# Skip if in gitignore
if self._is_gitignored(file_path):
continue
# Check depth limit
if self.config.max_depth is not None:
relative = file_path.relative_to(self.root)
if len(relative.parts) > self.config.max_depth:
continue
filtered.append(file_path)
return filtered
def _should_exclude(self, path: Path) -> bool:
"""Check if path matches any exclusion pattern."""
relative = path.relative_to(self.root)
path_str = str(relative)
# Check default excludes
for pattern in self.DEFAULT_EXCLUDES:
if fnmatch.fnmatch(path_str, pattern.replace('**/', '')):
return True
# Check custom excludes
for pattern in self.config.exclude_patterns:
if fnmatch.fnmatch(path_str, pattern):
return True
return False
def _is_gitignored(self, path: Path) -> bool:
"""Check if path is in gitignore."""
relative = path.relative_to(self.root)
path_str = str(relative)
for pattern in self.gitignore_patterns:
if fnmatch.fnmatch(path_str, pattern):
return True
return False
def _load_gitignore(self) -> List[str]:
"""Load patterns from .gitignore file."""
gitignore_path = self.root / '.gitignore'
patterns = []
if gitignore_path.exists():
with open(gitignore_path, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
patterns.append(line)
return patterns
def _prioritize_results(self, files: List[Path]) -> List[Path]:
"""Sort files by priority patterns."""
if not self.config.prioritize:
return sorted(files)
priority_files = []
normal_files = []
for file_path in files:
relative = str(file_path.relative_to(self.root))
is_priority = any(
fnmatch.fnmatch(relative, pattern)
for pattern in self.config.prioritize
)
if is_priority:
priority_files.append(file_path)
else:
normal_files.append(file_path)
return sorted(priority_files) + sorted(normal_files)
def find_by_type(self, file_type: str) -> List[Path]:
"""Find files by type (extension or category)."""
type_patterns = {
'python': ['**/*.py'],
'javascript': ['**/*.js', '**/*.jsx'],
'typescript': ['**/*.ts', '**/*.tsx'],
'config': ['**/*.json', '**/*.yaml', '**/*.yml', '**/*.toml'],
'markdown': ['**/*.md'],
'tests': ['**/test_*.py', '**/*_test.py', '**/*.test.js', '**/*.spec.js']
}
patterns = type_patterns.get(file_type, [f'**/*.{file_type}'])
self.config.patterns = patterns
return self.find()
# Usage example
finder = SmartFileFinder(
'/path/to/project',
SearchConfig(
patterns=['**/*.py'],
exclude_patterns=['**/migrations/**'],
max_depth=5,
prioritize=['**/models.py', '**/views.py']
)
)
python_files = finder.find()
print(f"Found {len(python_files)} Python files")
# Find specific file types
test_files = finder.find_by_type('tests')
config_files = finder.find_by_type('config')
Project Structure Analyzer
// tools/structure-analyzer.ts
interface ProjectStructure {
root: string;
type: ProjectType;
entryPoints: string[];
modules: ModuleInfo[];
dependencies: DependencyInfo;
buildSystem: string;
testFramework?: string;
}
enum ProjectType {
Python = 'python',
JavaScript = 'javascript',
TypeScript = 'typescript',
Monorepo = 'monorepo',
Unknown = 'unknown'
}
interface ModuleInfo {
name: string;
path: string;
type: 'package' | 'module' | 'script';
exports: string[];
submodules: string[];
}
class StructureAnalyzer {
private rootPath: string;
constructor(rootPath: string) {
this.rootPath = rootPath;
}
async analyze(): Promise<ProjectStructure> {
const projectType = await this.detectProjectType();
return {
root: this.rootPath,
type: projectType,
entryPoints: await this.findEntryPoints(projectType),
modules: await this.discoverModules(projectType),
dependencies: await this.analyzeDependencies(projectType),
buildSystem: await this.detectBuildSystem(projectType),
testFramework: await this.detectTestFramework(projectType)
};
}
private async detectProjectType(): Promise<ProjectType> {
const indicators = {
[ProjectType.Python]: ['setup.py', 'pyproject.toml', 'requirements.txt'],
[ProjectType.JavaScript]: ['package.json', 'node_modules'],
[ProjectType.TypeScript]: ['tsconfig.json', 'package.json'],
[ProjectType.Monorepo]: ['lerna.json', 'pnpm-workspace.yaml', 'nx.json']
};
for (const [type, files] of Object.entries(indicators)) {
const hasIndicator = await this.hasAnyFile(files);
if (hasIndicator) {
return type as ProjectType;
}
}
return ProjectType.Unknown;
}
private async findEntryPoints(type: ProjectType): Promise<string[]> {
const entryPointPatterns: Record<ProjectType, string[]> = {
[ProjectType.Python]: [
'__main__.py',
'main.py',
'app.py',
'manage.py',
'cli.py'
],
[ProjectType.JavaScript]: [
'index.js',
'main.js',
'app.js',
'server.js'
],
[ProjectType.TypeScript]: [
'index.ts',
'main.ts',
'app.ts',
'server.ts'
],
[ProjectType.Monorepo]: [],
[ProjectType.Unknown]: []
};
const patterns = entryPointPatterns[type];
const entryPoints: string[] = [];
for (const pattern of patterns) {
const files = await this.findFiles(`**/${pattern}`);
entryPoints.push(...files);
}
// Also check package.json main/bin entries for JS/TS
if (type === ProjectType.JavaScript || type === ProjectType.TypeScript) {
const packageJson = await this.readPackageJson();
if (packageJson?.main) {
entryPoints.push(packageJson.main);
}
if (packageJson?.bin) {
if (typeof packageJson.bin === 'string') {
entryPoints.push(packageJson.bin);
} else {
entryPoints.push(...Object.values(packageJson.bin));
}
}
}
return [...new Set(entryPoints)]; // Remove duplicates
}
private async discoverModules(type: ProjectType): Promise<ModuleInfo[]> {
const modules: ModuleInfo[] = [];
switch (type) {
case ProjectType.Python:
modules.push(...await this.discoverPythonModules());
break;
case ProjectType.JavaScript:
case ProjectType.TypeScript:
modules.push(...await this.discoverJSModules());
break;
}
return modules;
}
private async discoverPythonModules(): Promise<ModuleInfo[]> {
const modules: ModuleInfo[] = [];
// Find all __init__.py files (packages)
const initFiles = await this.findFiles('**/__init__.py');
for (const initPath of initFiles) {
const packagePath = path.dirname(initPath);
const packageName = this.getModuleName(packagePath);
const exports = await this.extractPythonExports(initPath);
const submodules = await this.findSubmodules(packagePath, '*.py');
modules.push({
name: packageName,
path: packagePath,
type: 'package',
exports,
submodules
});
}
// Find standalone modules (no __init__.py)
const pyFiles = await this.findFiles('**/*.py');
for (const pyFile of pyFiles) {
if (!pyFile.endsWith('__init__.py')) {
const moduleName = this.getModuleName(pyFile);
const exports = await this.extractPythonExports(pyFile);
modules.push({
name: moduleName,
path: pyFile,
type: 'module',
exports,
submodules: []
});
}
}
return modules;
}
private async findRelatedFiles(filePath: string): Promise<RelatedFiles> {
const ext = path.extname(filePath);
const baseName = path.basename(filePath, ext);
const dirName = path.dirname(filePath);
return {
source: filePath,
test: await this.findTestFile(filePath),
types: await this.findTypeDefinition(filePath),
documentation: await this.findDocumentation(filePath),
related: await this.findSimilarFiles(baseName, dirName)
};
}
private async findTestFile(sourcePath: string): Promise<string | null> {
const patterns = [
'test_' + path.basename(sourcePath),
path.basename(sourcePath, path.extname(sourcePath)) + '_test' + path.extname(sourcePath),
path.basename(sourcePath, path.extname(sourcePath)) + '.test' + path.extname(sourcePath),
path.basename(sourcePath, path.extname(sourcePath)) + '.spec' + path.extname(sourcePath)
];
const testDirs = ['tests', 'test', '__tests__', 'specs'];
for (const testDir of testDirs) {
for (const pattern of patterns) {
const testPath = path.join(path.dirname(sourcePath), testDir, pattern);
if (await this.fileExists(testPath)) {
return testPath;
}
}
}
return null;
}
}
// Usage
const analyzer = new StructureAnalyzer('/path/to/project');
const structure = await analyzer.analyze();
console.log('Project Type:', structure.type);
console.log('Entry Points:', structure.entryPoints);
console.log('Modules:', structure.modules.length);
Semantic Code Search
# scripts/semantic_search.py
from typing import List, Dict, Optional, Tuple
import ast
import re
from dataclasses import dataclass
from pathlib import Path
@dataclass
class SearchResult:
"""Single search result with context."""
file_path: str
line_number: int
match_type: str # 'function', 'class', 'variable', 'import', 'call'
symbol: str
context: str
score: float
class SemanticSearch:
"""Context-aware code search beyond simple grep."""
def __init__(self, root_path: str):
self.root = Path(root_path)
self.index = self._build_symbol_index()
def search_symbol(self, symbol: str, context: Optional[str] = None) -> List[SearchResult]:
"""Search for symbol with optional context."""
results = []
# Direct symbol matches
if symbol in self.index:
results.extend(self.index[symbol])
# Fuzzy matches
fuzzy = self._fuzzy_search(symbol)
results.extend(fuzzy)
# Filter by context if provided
if context:
results = [r for r in results if self._matches_context(r, context)]
# Sort by relevance
results.sort(key=lambda r: r.score, reverse=True)
return results
def find_definition(self, symbol: str) -> Optional[SearchResult]:
"""Find where a symbol is defined."""
candidates = self.search_symbol(symbol)
# Prioritize definitions over usages
definitions = [
r for r in candidates
if r.match_type in ['function', 'class', 'variable']
]
return definitions[0] if definitions else None
def find_references(self, symbol: str) -> List[SearchResult]:
"""Find all references to a symbol."""
all_results = self.search_symbol(symbol)
# Filter for actual usages (calls, imports, assignments)
references = [
r for r in all_results
if r.match_type in ['call', 'import', 'reference']
]
return references
def find_implementations(self, interface: str) -> List[SearchResult]:
"""Find all implementations of an interface/base class."""
results = []
for py_file in self.root.rglob('*.py'):
try:
with open(py_file, 'r') as f:
tree = ast.parse(f.read())
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
if any(
(isinstance(base, ast.Name) and base.id == interface) or
(isinstance(base, ast.Attribute) and base.attr == interface)
for base in node.bases
):
results.append(SearchResult(
file_path=str(py_file),
line_number=node.lineno,
match_type='class',
symbol=node.name,
context=f'class {node.name}({interface})',
score=1.0
))
except Exception:
continue
return results
def _build_symbol_index(self) -> Dict[str, List[SearchResult]]:
"""Build index of all symbols in codebase."""
index: Dict[str, List[SearchResult]] = {}
for py_file in self.root.rglob('*.py'):
try:
with open(py_file, 'r') as f:
content = f.read()
tree = ast.parse(content)
visitor = SymbolExtractor(str(py_file), content)
visitor.visit(tree)
for symbol, results in visitor.symbols.items():
if symbol not in index:
index[symbol] = []
index[symbol].extend(results)
except Exception:
continue
return index
def _fuzzy_search(self, symbol: str) -> List[SearchResult]:
"""Fuzzy search for similar symbols."""
results = []
symbol_lower = symbol.lower()
for indexed_symbol, search_results in self.index.items():
# Calculate similarity
similarity = self._calculate_similarity(symbol_lower, indexed_symbol.lower())
if similarity > 0.7: # 70% threshold
for result in search_results:
result.score *= similarity
results.append(result)
return results
def _calculate_similarity(self, s1: str, s2: str) -> float:
"""Calculate Levenshtein similarity between strings."""
if s1 == s2:
return 1.0
# Simple Levenshtein distance
m, n = len(s1), len(s2)
if m == 0 or n == 0:
return 0.0
# ... (implementation omitted for brevity)
return 0.8 # Placeholder
class SymbolExtractor(ast.NodeVisitor):
"""Extract symbols from AST."""
def __init__(self, file_path: str, content: str):
self.file_path = file_path
self.content = content
self.lines = content.splitlines()
self.symbols: Dict[str, List[SearchResult]] = {}
def visit_FunctionDef(self, node):
self._add_symbol(node.name, node.lineno, 'function')
self.generic_visit(node)
def visit_ClassDef(self, node):
self._add_symbol(node.name, node.lineno, 'class')
self.generic_visit(node)
def visit_Assign(self, node):
for target in node.targets:
if isinstance(target, ast.Name):
self._add_symbol(target.id, node.lineno, 'variable')
self.generic_visit(node)
def _add_symbol(self, name: str, line_no: int, match_type: str):
"""Add symbol to index."""
context = self.lines[line_no - 1] if line_no <= len(self.lines) else ''
result = SearchResult(
file_path=self.file_path,
line_number=line_no,
match_type=match_type,
symbol=name,
context=context.strip(),
score=1.0
)
if name not in self.symbols:
self.symbols[name] = []
self.symbols[name].append(result)
# Usage
search = SemanticSearch('/path/to/project')
# Find definition
definition = search.find_definition('MyClass')
print(f"Defined at: {definition.file_path}:{definition.line_number}")
# Find all usages
references = search.find_references('my_function')
print(f"Found {len(references)} references")
# Find implementations
implementations = search.find_implementations('BaseHandler')
for impl in implementations:
print(f"{impl.symbol} in {impl.file_path}")
Usage Examples
Discover Project Structure
Apply codebase-navigation skill to analyze project structure, identify entry points, and map module hierarchy
Find Related Files
Apply codebase-navigation skill to find test file, type definitions, and documentation for src/models/user.py
Semantic Symbol Search
Apply codebase-navigation skill to find definition and all references of UserAuthHandler class
Smart File Discovery
Apply codebase-navigation skill to find all TypeScript files excluding tests and node_modules, prioritizing API routes
Integration Points
- codebase-analysis-patterns - Architecture analysis and dependency mapping
- pattern-finding - Pattern matching across files
- code-review-patterns - Review workflow and file selection
Success Output
When successful, this skill MUST output:
✅ SKILL COMPLETE: codebase-navigation
Completed:
- [x] Project structure analyzed and documented
- [x] Entry points identified (main.py, app.js, index.ts, etc.)
- [x] Module hierarchy mapped (packages, modules, submodules)
- [x] Smart file discovery executed with gitignore support
- [x] Semantic symbol search index built
Outputs:
- Project structure report (type, entry points, modules, dependencies)
- File discovery results with prioritization
- Symbol index with definitions, references, implementations
- Related files mapping (source ↔ test ↔ types ↔ docs)
Completion Checklist
Before marking this skill as complete, verify:
- Project type detected correctly (Python, JavaScript, TypeScript, Monorepo)
- Entry points identified and validated as executable
- Module/package hierarchy discovered and mapped
- Smart file finder excludes node_modules, .git, pycache, .venv
- Gitignore patterns loaded and applied
- Semantic symbol index built (functions, classes, variables)
- Symbol search tested (find definitions, references, implementations)
- Related files discovered (test counterparts, type definitions, docs)
- File prioritization working (patterns matched correctly)
Failure Indicators
This skill has FAILED if:
- ❌ Project type detection returns "unknown" for standard projects
- ❌ Entry points not found despite existence (package.json main, main.py)
- ❌ Module discovery includes excluded directories (node_modules, .venv)
- ❌ Gitignore patterns not loaded or not applied
- ❌ Symbol index build fails for valid Python/JavaScript files
- ❌ Symbol search returns zero results for known symbols
- ❌ Related file mapping misses obvious test counterparts
- ❌ File prioritization doesn't work (priority files not listed first)
When NOT to Use
Do NOT use this skill when:
- Codebase is single-file script (no navigation needed)
- Project structure is trivial (flat directory with few files)
- Exploring unfamiliar codebase for first time (use codebase-analysis-patterns instead)
- Need deep dependency analysis (use dependency-analysis skill)
- Searching for text content not code symbols (use Grep tool)
- Building comprehensive documentation (use documentation-generator)
- Analyzing performance or security (use specialized analysis skills)
- Codebase uses non-standard structure without clear patterns
Anti-Patterns (Avoid)
| Anti-Pattern | Problem | Solution |
|---|---|---|
| Ignoring .gitignore | Searches node_modules, .venv | Always load and apply gitignore patterns |
| Hardcoded exclusions only | Misses project-specific ignores | Combine default + gitignore + custom patterns |
| Case-sensitive search only | Misses files on Windows | Support case-insensitive mode |
| No file prioritization | Important files buried in results | Prioritize by patterns (models, views, routes) |
| Full path requirement | Fails on partial paths | Use fuzzy matching (basename, partial path) |
| No depth limit | Searches too deep in monorepos | Allow configurable max_depth |
| Missing symbol types | Only finds functions | Index classes, variables, imports |
| No related file mapping | Can't find tests for sources | Map source ↔ test ↔ types ↔ docs |
Principles
This skill embodies:
- #1 First Principles - Understand project structure before navigating
- #3 Separation of Concerns - Separate file discovery, symbol search, structure analysis
- #5 Eliminate Ambiguity - Fuzzy matching handles path variations
- #6 Clear, Understandable, Explainable - Structure reports with clear hierarchy
- #8 No Assumptions - Detect project type and structure, don't hardcode
- #9 Keep It Simple - Efficient search without over-indexing
Full Standard: CODITECT-STANDARD-AUTOMATION.md