validationformsdeveloper-guideapipython

Input Validation for Body Measurements: Beyond Simple Range Checks

· 6 min read · Martin Hejda

Range validation — “height must be between 100 and 250cm” — is necessary but not sufficient for body measurement inputs. Common user errors produce values that are within range but still wrong: height entered in inches (70 inches ≈ 178cm) when the field expected cm; weight entered as 150 (meaning 150 lbs) when the field expected kg; a child’s age and an adult’s weight on the same form.

Thorough validation catches these cases before they produce nonsensical predictions.


Unit detection: when users enter the wrong unit

The most frequent input error is unit mismatch. Users who think in feet and inches will enter 5.9 or 511 when the field expects centimeters. Users from imperial countries will enter 145 when the field expects kg (they mean 145 lbs = 65.8 kg).

from dataclasses import dataclass

@dataclass
class ParsedMeasurement:
    value: float
    unit: str
    converted_cm_or_kg: float
    confidence: str  # "certain", "inferred", "ambiguous"
    note: str | None = None

def parse_height_input(raw_input: str | float, declared_unit: str = "cm") -> ParsedMeasurement:
    """
    Parse a height input, detecting likely unit errors.
    
    raw_input: what the user typed
    declared_unit: what the UI field claimed to expect ("cm" or "ft")
    """
    value = float(str(raw_input).replace(",", ".").strip())
    
    if declared_unit == "cm":
        # Check if this looks like feet (whole number 3–8)
        if 3.0 <= value <= 8.0 and value == int(value):
            height_cm = value * 30.48
            return ParsedMeasurement(
                value=value, unit="ft_inferred",
                converted_cm_or_kg=height_cm,
                confidence="ambiguous",
                note=f"{value}cm looks like {int(value)}ft ({height_cm:.0f}cm). Please confirm units."
            )
        
        # Check if this looks like feet.inches (e.g., 5.11 or 6.2)
        whole = int(value)
        fraction = round((value - whole) * 100)  # 5.11 → 11 inches
        if 3 <= whole <= 8 and 0 <= fraction <= 11:
            height_cm = (whole * 12 + fraction) * 2.54
            return ParsedMeasurement(
                value=value, unit="ft.in_inferred",
                converted_cm_or_kg=height_cm,
                confidence="ambiguous",
                note=f"{value} interpreted as {whole}ft {fraction}in ({height_cm:.0f}cm). "
                     f"If you meant {value}cm, that's {value/2.54:.1f} inches — please check."
            )
        
        # Check if this looks like inches (55–90 range for typical adults)
        if 55 <= value <= 90:
            height_cm = value * 2.54
            return ParsedMeasurement(
                value=value, unit="in_inferred",
                converted_cm_or_kg=height_cm,
                confidence="ambiguous",
                note=f"{value} might be in inches ({height_cm:.0f}cm). Field expects cm."
            )
        
        # Looks like a normal cm value
        return ParsedMeasurement(value=value, unit="cm", converted_cm_or_kg=value, confidence="certain")
    
    # declared_unit == "ft" — expect integer feet, handle decimals
    if 4 <= value <= 8:
        return ParsedMeasurement(
            value=value, unit="ft",
            converted_cm_or_kg=value * 30.48,
            confidence="certain"
        )
    
    # Might be cm entered when ft expected
    if 140 <= value <= 220:
        return ParsedMeasurement(
            value=value, unit="cm_inferred",
            converted_cm_or_kg=value,
            confidence="ambiguous",
            note=f"{value} might be cm, not ft. Converting as cm."
        )
    
    return ParsedMeasurement(value=value, unit="unknown", converted_cm_or_kg=value, confidence="ambiguous")

def parse_weight_input(raw_input: str | float, declared_unit: str = "kg") -> ParsedMeasurement:
    """
    Parse a weight input, detecting likely unit errors.
    """
    value = float(str(raw_input).replace(",", ".").strip())
    
    if declared_unit == "kg":
        # Check if this looks like pounds
        if value > 150:
            weight_from_lbs = value * 0.453592
            if 40 <= weight_from_lbs <= 200:
                return ParsedMeasurement(
                    value=value, unit="lbs_inferred",
                    converted_cm_or_kg=weight_from_lbs,
                    confidence="ambiguous",
                    note=f"{value}kg is very high. Did you mean {value}lbs ({weight_from_lbs:.1f}kg)?"
                )
        
        return ParsedMeasurement(value=value, unit="kg", converted_cm_or_kg=value, confidence="certain")
    
    # declared_unit == "lbs"
    weight_kg = value * 0.453592
    return ParsedMeasurement(value=value, unit="lbs", converted_cm_or_kg=weight_kg, confidence="certain")

Feet and inches string parsing

Users who think in imperial units often type height as “5’11"", “5ft 11in”, “5 11”, or “5-11”:

import re

def parse_feet_inches_string(s: str) -> float | None:
    """
    Parse feet-and-inches strings into centimeters.
    Handles: 5'11", 5ft11in, 5 11, 5-11, 5'11, 511 (ambiguous)
    Returns cm, or None if unparseable.
    """
    s = s.strip()
    
    # Pattern: 5'11" or 5'11 or 5' 11"
    m = re.match(r"(\d)'?\s*(\d{1,2})\"?$", s)
    if m:
        feet, inches = int(m.group(1)), int(m.group(2))
        if 0 <= inches <= 11 and 3 <= feet <= 8:
            return (feet * 12 + inches) * 2.54
    
    # Pattern: 5ft11in or 5 ft 11 in
    m = re.match(r"(\d)\s*ft\.?\s*(\d{1,2})\s*in\.?$", s, re.IGNORECASE)
    if m:
        feet, inches = int(m.group(1)), int(m.group(2))
        if 0 <= inches <= 11 and 3 <= feet <= 8:
            return (feet * 12 + inches) * 2.54
    
    # Pattern: 5 11 (space separated)
    m = re.match(r"(\d)\s+(\d{1,2})$", s)
    if m:
        feet, inches = int(m.group(1)), int(m.group(2))
        if 0 <= inches <= 11 and 3 <= feet <= 8:
            return (feet * 12 + inches) * 2.54
    
    # Pattern: plain number in feet
    m = re.match(r"^(\d+\.?\d*)$", s)
    if m:
        value = float(m.group(1))
        if 4 <= value <= 8:
            return value * 30.48  # Assume feet if in plausible range
    
    return None

Physiological plausibility checks

Some combinations of valid inputs are physiologically impossible:

def check_physiological_plausibility(
    height_cm: float,
    weight_kg: float,
    age_category: str = "ADULT"
) -> list[dict]:
    """
    Check height/weight combinations for physiological plausibility.
    Returns a list of issues (empty = plausible).
    """
    issues = []
    
    if height_cm <= 0 or weight_kg <= 0:
        issues.append({"field": "both", "severity": "error", "message": "Height and weight must be positive."})
        return issues
    
    height_m = height_cm / 100
    bmi = weight_kg / (height_m ** 2)
    
    # BMI-based plausibility (all age groups)
    if bmi < 10:
        issues.append({
            "field": "both", "severity": "error",
            "message": f"BMI of {bmi:.1f} is not physiologically possible. "
                       f"Height {height_cm}cm and weight {weight_kg}kg cannot both be correct."
        })
    elif bmi < 14:
        issues.append({
            "field": "weight", "severity": "warning",
            "message": f"BMI of {bmi:.1f} is in the severe underweight range. Predictions will be less accurate."
        })
    elif bmi > 70:
        issues.append({
            "field": "both", "severity": "error",
            "message": f"BMI of {bmi:.1f} exceeds any recorded human value. Check height and weight values."
        })
    elif bmi > 50:
        issues.append({
            "field": "weight", "severity": "warning",
            "message": f"BMI of {bmi:.1f} is in a range where prediction accuracy decreases."
        })
    
    # Age-category specific checks
    if age_category == "INFANT" and (height_cm < 40 or height_cm > 100):
        issues.append({
            "field": "height", "severity": "warning",
            "message": f"Height {height_cm}cm is unusual for an infant. Expected range: 40–100cm."
        })
    
    if age_category == "ADULT" and height_cm < 130:
        issues.append({
            "field": "height", "severity": "warning",
            "message": f"Height {height_cm}cm is very short for an adult. Predictions may be less accurate."
        })
    
    if age_category == "CHILD" and weight_kg > 80:
        issues.append({
            "field": "weight", "severity": "warning",
            "message": f"Weight {weight_kg}kg is unusual for the CHILD age category."
        })
    
    return issues

Sanitization and type coercion

Input sanitization prevents type errors and injection issues:

def sanitize_measurement_input(raw: any) -> float | None:
    """
    Safely convert any user input to a float for body measurement use.
    Returns None if the input cannot be safely converted.
    """
    if raw is None:
        return None
    
    # Handle numeric types directly
    if isinstance(raw, (int, float)):
        return float(raw)
    
    # Handle strings
    if isinstance(raw, str):
        # Remove common separators and whitespace
        cleaned = raw.strip().replace(",", ".").replace(" ", "")
        
        # Remove unit suffixes
        for suffix in ["cm", "kg", "lbs", "lb", "m", "in", "inches", "ft"]:
            if cleaned.lower().endswith(suffix):
                cleaned = cleaned[:-len(suffix)].strip()
                break
        
        # Remove non-numeric characters except . and -
        cleaned = re.sub(r"[^\d.\-]", "", cleaned)
        
        if not cleaned:
            return None
        
        try:
            return float(cleaned)
        except ValueError:
            return None
    
    return None

def prepare_api_inputs(
    height_raw: any,
    weight_raw: any,
    gender_raw: any,
    region_raw: any = "GLOBAL"
) -> dict:
    """
    Full input preparation pipeline: sanitize → parse → validate → format for API.
    Returns a dict with either 'inputs' (ready for API) or 'errors' (validation failures).
    """
    errors = []
    warnings = []
    
    # Sanitize
    height_val = sanitize_measurement_input(height_raw)
    weight_val = sanitize_measurement_input(weight_raw)
    
    if height_val is None:
        errors.append(f"Could not parse height value: {height_raw!r}")
        return {"errors": errors}
    
    if weight_val is None:
        errors.append(f"Could not parse weight value: {weight_raw!r}")
        return {"errors": errors}
    
    # Unit detection
    height_parsed = parse_height_input(height_val, "cm")
    weight_parsed = parse_weight_input(weight_val, "kg")
    
    if height_parsed.note:
        warnings.append(height_parsed.note)
    if weight_parsed.note:
        warnings.append(weight_parsed.note)
    
    height_cm = height_parsed.converted_cm_or_kg
    weight_kg = weight_parsed.converted_cm_or_kg
    
    # Normalize gender
    try:
        gender = normalize_gender(str(gender_raw))
    except ValueError as e:
        errors.append(str(e))
        gender = None
    
    # Normalize region
    valid_regions = {"GLOBAL", "EUROPE", "ASIA_PACIFIC", "AFRICA", "LATAM", "INDIA", "MIDDLE_EAST"}
    region = str(region_raw).upper() if region_raw else "GLOBAL"
    if region not in valid_regions:
        warnings.append(f"Unknown region {region!r} — defaulting to GLOBAL.")
        region = "GLOBAL"
    
    # Physiological check
    if gender:
        plausibility_issues = check_physiological_plausibility(height_cm, weight_kg)
        for issue in plausibility_issues:
            if issue["severity"] == "error":
                errors.append(issue["message"])
            else:
                warnings.append(issue["message"])
    
    if errors:
        return {"errors": errors, "warnings": warnings}
    
    return {
        "inputs": {
            "gender": gender,
            "height_mm": int(height_cm * 10),  # API requires mm
            "weight_kg": round(weight_kg, 1),
            "region": region
        },
        "warnings": warnings,
        "unit_corrections": {
            "height": height_parsed.note,
            "weight": weight_parsed.note
        }
    }

def normalize_gender(raw: str) -> str:
    """Normalize gender string to API-accepted lowercase value."""
    normalized = raw.strip().lower()
    if normalized in ("male", "m", "man", "masculine"):
        return "male"
    elif normalized in ("female", "f", "woman", "feminine"):
        return "female"
    raise ValueError(f"Unrecognized gender: {raw!r}")

Validation in the API route

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

app = FastAPI()

class SizingRequest(BaseModel):
    height: str | float
    weight: str | float
    gender: str
    region: str = "GLOBAL"

@app.post("/api/sizing/recommend")
async def sizing_recommend(req: SizingRequest):
    prepared = prepare_api_inputs(
        req.height, req.weight, req.gender, req.region
    )
    
    if "errors" in prepared:
        raise HTTPException(
            status_code=400,
            detail={
                "errors": prepared["errors"],
                "warnings": prepared.get("warnings", [])
            }
        )
    
    inputs = prepared["inputs"]
    warnings = prepared.get("warnings", [])
    
    # Call prediction API with validated, corrected inputs
    result = call_prediction_api(
        gender=inputs["gender"],
        height_mm=inputs["height_mm"],
        weight_kg=inputs["weight_kg"],
        region=inputs["region"]
    )
    
    # Attach warnings to response so client can show them
    result["input_warnings"] = warnings
    return result

Thorough input validation is boring engineering that prevents a significant fraction of production failures. The patterns above catch the most common real-world errors — unit mismatches, ambiguous inputs, physiologically impossible combinations — before they reach the API and produce confusing errors or silently wrong predictions. Build validation as a pipeline that sanitizes, detects, warns, and only blocks on genuine errors.

Try DimensionsPot

Free tier — 100 requests/month, no credit card required.

Get API on RapidAPI