Skip to main content

Agent Skills Framework Extension

QA Review Methodology Skill

When to Use This Skill

Use this skill when implementing qa review methodology patterns in your codebase.

How to Use This Skill

  1. Review the patterns and examples below
  2. Apply the relevant patterns to your implementation
  3. Follow the best practices outlined in this skill

QA scoring methodology, improvement recommendations, testing strategies, and comprehensive quality assessment frameworks.

Core Capabilities

  1. Quality Scoring - Multi-dimensional quality metrics and scoring
  2. Test Strategy - Coverage analysis, test pyramid validation, test quality
  3. Defect Analysis - Defect density, severity distribution, root cause
  4. Improvement Roadmaps - Prioritized quality improvements, action plans
  5. Quality Gates - Entry/exit criteria, release readiness, acceptance criteria

QA Scoring Framework

# scripts/qa_scorer.py
from typing import List, Dict, Optional
from dataclasses import dataclass
from enum import Enum

class QualityDimension(Enum):
"""Quality assessment dimensions."""
FUNCTIONALITY = "functionality"
RELIABILITY = "reliability"
PERFORMANCE = "performance"
SECURITY = "security"
MAINTAINABILITY = "maintainability"
TESTABILITY = "testability"

@dataclass
class QualityScore:
"""Quality score for a single dimension."""
dimension: QualityDimension
score: float # 0-100
metrics: Dict[str, float]
findings: List[Dict]
grade: str # A, B, C, D, F

class QAScorer:
"""Comprehensive quality scoring system."""

GRADE_THRESHOLDS = {
'A': 90,
'B': 80,
'C': 70,
'D': 60,
'F': 0
}

def score(self, codebase_path: str, test_results: Dict) -> Dict[QualityDimension, QualityScore]:
"""Calculate quality scores across all dimensions."""
scores = {}

scores[QualityDimension.FUNCTIONALITY] = self._score_functionality(
codebase_path, test_results
)
scores[QualityDimension.RELIABILITY] = self._score_reliability(
codebase_path, test_results
)
scores[QualityDimension.PERFORMANCE] = self._score_performance(
codebase_path, test_results
)
scores[QualityDimension.SECURITY] = self._score_security(
codebase_path, test_results
)
scores[QualityDimension.MAINTAINABILITY] = self._score_maintainability(
codebase_path
)
scores[QualityDimension.TESTABILITY] = self._score_testability(
codebase_path, test_results
)

return scores

def _score_functionality(self, path: str, test_results: Dict) -> QualityScore:
"""Score functional correctness."""
metrics = {
'test_pass_rate': test_results.get('pass_rate', 0) * 100,
'feature_coverage': self._calculate_feature_coverage(path, test_results),
'defect_density': self._calculate_defect_density(path)
}

# Weighted score
score = (
metrics['test_pass_rate'] * 0.4 +
metrics['feature_coverage'] * 0.4 +
(100 - metrics['defect_density']) * 0.2
)

return QualityScore(
dimension=QualityDimension.FUNCTIONALITY,
score=score,
metrics=metrics,
findings=[],
grade=self._calculate_grade(score)
)

def _score_reliability(self, path: str, test_results: Dict) -> QualityScore:
"""Score reliability and stability."""
metrics = {
'crash_rate': self._calculate_crash_rate(test_results),
'mean_time_between_failures': self._calculate_mtbf(test_results),
'error_handling_coverage': self._check_error_handling(path)
}

score = (
(100 - metrics['crash_rate']) * 0.4 +
min(100, metrics['mean_time_between_failures'] / 10) * 0.3 +
metrics['error_handling_coverage'] * 0.3
)

return QualityScore(
dimension=QualityDimension.RELIABILITY,
score=score,
metrics=metrics,
findings=[],
grade=self._calculate_grade(score)
)

def _calculate_grade(self, score: float) -> str:
"""Convert score to letter grade."""
for grade, threshold in self.GRADE_THRESHOLDS.items():
if score >= threshold:
return grade
return 'F'


# Usage
scorer = QAScorer()
scores = scorer.score('/path/to/project', test_results)

print("Quality Assessment:")
for dimension, score in scores.items():
print(f" {dimension.value}: {score.score:.1f}/100 (Grade: {score.grade})")

Test Strategy Evaluator

# scripts/test_strategy_evaluator.py
from typing import Dict, List
from dataclasses import dataclass

@dataclass
class TestPyramid:
"""Test pyramid distribution."""
unit: int
integration: int
e2e: int

class TestStrategyEvaluator:
"""Evaluate test strategy effectiveness."""

IDEAL_PYRAMID = TestPyramid(unit=70, integration=20, e2e=10)

def evaluate(self, test_results: Dict) -> Dict:
"""Evaluate test strategy."""
pyramid = self._analyze_pyramid(test_results)
coverage = self._analyze_coverage(test_results)
quality = self._analyze_test_quality(test_results)

return {
'pyramid_health': self._score_pyramid(pyramid),
'coverage_score': coverage,
'test_quality': quality,
'recommendations': self._generate_recommendations(pyramid, coverage, quality)
}

def _analyze_pyramid(self, results: Dict) -> TestPyramid:
"""Analyze test distribution."""
total = results.get('total_tests', 0)
if total == 0:
return TestPyramid(0, 0, 0)

return TestPyramid(
unit=int((results.get('unit_tests', 0) / total) * 100),
integration=int((results.get('integration_tests', 0) / total) * 100),
e2e=int((results.get('e2e_tests', 0) / total) * 100)
)

def _score_pyramid(self, pyramid: TestPyramid) -> float:
"""Score how close pyramid is to ideal."""
unit_diff = abs(pyramid.unit - self.IDEAL_PYRAMID.unit)
integration_diff = abs(pyramid.integration - self.IDEAL_PYRAMID.integration)
e2e_diff = abs(pyramid.e2e - self.IDEAL_PYRAMID.e2e)

total_diff = unit_diff + integration_diff + e2e_diff
return max(0, 100 - total_diff)

def _generate_recommendations(
self,
pyramid: TestPyramid,
coverage: float,
quality: float
) -> List[str]:
"""Generate testing improvement recommendations."""
recommendations = []

if pyramid.unit < 60:
recommendations.append('Increase unit test coverage to at least 60% of total tests')

if pyramid.e2e > 20:
recommendations.append('Reduce E2E test proportion - move tests down the pyramid')

if coverage < 80:
recommendations.append('Increase code coverage to at least 80%')

if quality < 70:
recommendations.append('Improve test quality - add assertions, reduce flakiness')

return recommendations

Quality Improvement Roadmap

// tools/quality-roadmap-generator.ts
interface QualityRoadmap {
current_state: QualitySnapshot;
target_state: QualitySnapshot;
phases: ImprovementPhase[];
estimated_duration: string;
success_metrics: Metric[];
}

interface ImprovementPhase {
name: string;
duration: string;
initiatives: Initiative[];
success_criteria: string[];
dependencies: string[];
}

class QualityRoadmapGenerator {
generate(current_scores: QualityScore[], target: number): QualityRoadmap {
// Identify gaps
const gaps = this.identifyGaps(current_scores, target);

// Prioritize improvements
const prioritized = this.prioritizeImprovements(gaps);

// Create phases
const phases = this.createPhases(prioritized);

return {
current_state: this.captureCurrentState(current_scores),
target_state: this.defineTargetState(target),
phases,
estimated_duration: this.calculateDuration(phases),
success_metrics: this.defineMetrics(phases)
};
}

private prioritizeImprovements(gaps: Gap[]): Initiative[] {
// Prioritize by:
// 1. Blocking issues first
// 2. Security issues
// 3. High ROI improvements
// 4. Quick wins

return gaps
.sort((a, b) => b.priority - a.priority)
.map(gap => this.gapToInitiative(gap));
}
}

Usage Examples

Calculate Quality Scores

Apply qa-review-methodology skill to calculate comprehensive quality scores across all dimensions

Evaluate Test Strategy

Apply qa-review-methodology skill to analyze test pyramid distribution and generate improvement recommendations

Generate Improvement Roadmap

Apply qa-review-methodology skill to create phased quality improvement roadmap from current state to target

Success Output

When successful, this skill MUST output:

✅ SKILL COMPLETE: qa-review-methodology

Completed:
- [x] Quality scores calculated across all 6 dimensions
- [x] Test strategy evaluated (test pyramid, coverage, quality)
- [x] Defect analysis completed (density, severity, root cause)
- [x] Quality improvement roadmap generated
- [x] Recommendations prioritized by ROI

Outputs:
- results/quality-scorecard.json (scores for all 6 dimensions with grades A-F)
- results/test-strategy-analysis.json (pyramid health, coverage, recommendations)
- results/defect-analysis.json (density, severity distribution, trends)
- docs/quality-improvement-roadmap.md (phased plan with success criteria)
- docs/qa-review-report.md (executive summary, findings, action items)

Scores:
- Functionality: X/100 (Grade: A/B/C/D/F)
- Reliability: X/100 (Grade: A/B/C/D/F)
- Performance: X/100 (Grade: A/B/C/D/F)
- Security: X/100 (Grade: A/B/C/D/F)
- Maintainability: X/100 (Grade: A/B/C/D/F)
- Testability: X/100 (Grade: A/B/C/D/F)

Completion Checklist

Before marking this skill as complete, verify:

  • All 6 quality dimensions scored (Functionality, Reliability, Performance, Security, Maintainability, Testability)
  • Test results analyzed (pass rate, coverage, defect density)
  • Test pyramid distribution calculated (unit %, integration %, e2e %)
  • Test pyramid health scored (compared to ideal 70/20/10)
  • Defect density calculated (defects per 1000 LOC)
  • Severity distribution analyzed (Critical, High, Medium, Low)
  • Quality grades assigned (A-F) for each dimension
  • Improvement recommendations generated with priority and ROI
  • Roadmap phases defined with success criteria
  • Estimated duration calculated for quality improvements

Failure Indicators

This skill has FAILED if:

  • ❌ No test results available (cannot score functionality or testability)
  • ❌ Scores calculated without metrics (arbitrary grades assigned)
  • ❌ Test pyramid distribution missing (cannot evaluate test strategy)
  • ❌ No recommendations generated (analysis without actionable output)
  • ❌ Roadmap lacks phases or success criteria (not actionable)
  • ❌ All dimensions score same grade (likely averaging, not dimension-specific analysis)
  • ❌ Security dimension not scored (OWASP/SAST results missing)

When NOT to Use

Do NOT use this skill when:

  • Just need test coverage report (use coverage tools directly: pytest-cov, nyc)
  • Only need defect tracking (use issue tracker: Jira, GitHub Issues)
  • Simple code review without quality metrics (use code-review-patterns skill)
  • Pre-development planning (no code/tests to analyze yet)
  • Real-time quality monitoring (use continuous integration dashboards)
  • Single dimension focus (use specialized skills: security-audit, performance-profiler)

Use alternative skills:

  • code-review-patterns - When need code-level review without quality scoring
  • security-audit - When security is only concern (OWASP Top 10, SAST)
  • load-testing - When performance is only concern (latency, throughput)
  • test-automation-patterns - When building test suite, not reviewing quality

Anti-Patterns (Avoid)

Anti-PatternProblemSolution
Scoring without metricsSubjective, not reproducibleUse test pass rate, coverage %, defect density
Averaging all dimensionsHides critical issuesReport each dimension separately
Ignoring test pyramidOver-reliance on E2E testsCalculate pyramid distribution, recommend rebalancing
No prioritizationTeam overwhelmedPrioritize by: 1) Blocking issues 2) Security 3) ROI 4) Quick wins
Improvement roadmap without phasesNo incremental progressBreak into 2-4 phases with checkpoints
Generic recommendationsNot actionableSpecific: "Increase unit test coverage from 60% to 80%"
No success criteriaCan't measure improvementDefine metrics: "Functionality score 85+ (Grade B)"

Principles

This skill embodies:

  • #5 Eliminate Ambiguity - Numeric scores and grades remove subjective quality assessment
  • #6 Clear, Understandable, Explainable - Multi-dimensional scorecard makes quality explicit
  • #8 No Assumptions - Measure actual quality metrics, don't assume quality
  • #9 Progress Reporting - Phased roadmap with checkpoints enables tracking
  • Quality First - Comprehensive assessment across all 6 dimensions

Full Standard: CODITECT-STANDARD-AUTOMATION.md

Integration Points

  • qa-validation-patterns - Automated compliance checking
  • comprehensive-review-patterns - Multi-dimensional quality analysis
  • code-review-patterns - Review methodology integration