Range validation — “height must be between 100 and 250cm” — is necessary but not sufficient for body measurement inputs. Common user errors produce values that are within range but still wrong: height entered in inches (70 inches ≈ 178cm) when the field expected cm; weight entered as 150 (meaning 150 lbs) when the field expected kg; a child’s age and an adult’s weight on the same form.
Thorough validation catches these cases before they produce nonsensical predictions.
Unit detection: when users enter the wrong unit
The most frequent input error is unit mismatch. Users who think in feet and inches will enter 5.9 or 511 when the field expects centimeters. Users from imperial countries will enter 145 when the field expects kg (they mean 145 lbs = 65.8 kg).
from dataclasses import dataclass
@dataclass
class ParsedMeasurement:
value: float
unit: str
converted_cm_or_kg: float
confidence: str # "certain", "inferred", "ambiguous"
note: str | None = None
def parse_height_input(raw_input: str | float, declared_unit: str = "cm") -> ParsedMeasurement:
"""
Parse a height input, detecting likely unit errors.
raw_input: what the user typed
declared_unit: what the UI field claimed to expect ("cm" or "ft")
"""
value = float(str(raw_input).replace(",", ".").strip())
if declared_unit == "cm":
# Check if this looks like feet (whole number 3–8)
if 3.0 <= value <= 8.0 and value == int(value):
height_cm = value * 30.48
return ParsedMeasurement(
value=value, unit="ft_inferred",
converted_cm_or_kg=height_cm,
confidence="ambiguous",
note=f"{value}cm looks like {int(value)}ft ({height_cm:.0f}cm). Please confirm units."
)
# Check if this looks like feet.inches (e.g., 5.11 or 6.2)
whole = int(value)
fraction = round((value - whole) * 100) # 5.11 → 11 inches
if 3 <= whole <= 8 and 0 <= fraction <= 11:
height_cm = (whole * 12 + fraction) * 2.54
return ParsedMeasurement(
value=value, unit="ft.in_inferred",
converted_cm_or_kg=height_cm,
confidence="ambiguous",
note=f"{value} interpreted as {whole}ft {fraction}in ({height_cm:.0f}cm). "
f"If you meant {value}cm, that's {value/2.54:.1f} inches — please check."
)
# Check if this looks like inches (55–90 range for typical adults)
if 55 <= value <= 90:
height_cm = value * 2.54
return ParsedMeasurement(
value=value, unit="in_inferred",
converted_cm_or_kg=height_cm,
confidence="ambiguous",
note=f"{value} might be in inches ({height_cm:.0f}cm). Field expects cm."
)
# Looks like a normal cm value
return ParsedMeasurement(value=value, unit="cm", converted_cm_or_kg=value, confidence="certain")
# declared_unit == "ft" — expect integer feet, handle decimals
if 4 <= value <= 8:
return ParsedMeasurement(
value=value, unit="ft",
converted_cm_or_kg=value * 30.48,
confidence="certain"
)
# Might be cm entered when ft expected
if 140 <= value <= 220:
return ParsedMeasurement(
value=value, unit="cm_inferred",
converted_cm_or_kg=value,
confidence="ambiguous",
note=f"{value} might be cm, not ft. Converting as cm."
)
return ParsedMeasurement(value=value, unit="unknown", converted_cm_or_kg=value, confidence="ambiguous")
def parse_weight_input(raw_input: str | float, declared_unit: str = "kg") -> ParsedMeasurement:
"""
Parse a weight input, detecting likely unit errors.
"""
value = float(str(raw_input).replace(",", ".").strip())
if declared_unit == "kg":
# Check if this looks like pounds
if value > 150:
weight_from_lbs = value * 0.453592
if 40 <= weight_from_lbs <= 200:
return ParsedMeasurement(
value=value, unit="lbs_inferred",
converted_cm_or_kg=weight_from_lbs,
confidence="ambiguous",
note=f"{value}kg is very high. Did you mean {value}lbs ({weight_from_lbs:.1f}kg)?"
)
return ParsedMeasurement(value=value, unit="kg", converted_cm_or_kg=value, confidence="certain")
# declared_unit == "lbs"
weight_kg = value * 0.453592
return ParsedMeasurement(value=value, unit="lbs", converted_cm_or_kg=weight_kg, confidence="certain")
Feet and inches string parsing
Users who think in imperial units often type height as “5’11"", “5ft 11in”, “5 11”, or “5-11”:
import re
def parse_feet_inches_string(s: str) -> float | None:
"""
Parse feet-and-inches strings into centimeters.
Handles: 5'11", 5ft11in, 5 11, 5-11, 5'11, 511 (ambiguous)
Returns cm, or None if unparseable.
"""
s = s.strip()
# Pattern: 5'11" or 5'11 or 5' 11"
m = re.match(r"(\d)'?\s*(\d{1,2})\"?$", s)
if m:
feet, inches = int(m.group(1)), int(m.group(2))
if 0 <= inches <= 11 and 3 <= feet <= 8:
return (feet * 12 + inches) * 2.54
# Pattern: 5ft11in or 5 ft 11 in
m = re.match(r"(\d)\s*ft\.?\s*(\d{1,2})\s*in\.?$", s, re.IGNORECASE)
if m:
feet, inches = int(m.group(1)), int(m.group(2))
if 0 <= inches <= 11 and 3 <= feet <= 8:
return (feet * 12 + inches) * 2.54
# Pattern: 5 11 (space separated)
m = re.match(r"(\d)\s+(\d{1,2})$", s)
if m:
feet, inches = int(m.group(1)), int(m.group(2))
if 0 <= inches <= 11 and 3 <= feet <= 8:
return (feet * 12 + inches) * 2.54
# Pattern: plain number in feet
m = re.match(r"^(\d+\.?\d*)$", s)
if m:
value = float(m.group(1))
if 4 <= value <= 8:
return value * 30.48 # Assume feet if in plausible range
return None
Physiological plausibility checks
Some combinations of valid inputs are physiologically impossible:
def check_physiological_plausibility(
height_cm: float,
weight_kg: float,
age_category: str = "ADULT"
) -> list[dict]:
"""
Check height/weight combinations for physiological plausibility.
Returns a list of issues (empty = plausible).
"""
issues = []
if height_cm <= 0 or weight_kg <= 0:
issues.append({"field": "both", "severity": "error", "message": "Height and weight must be positive."})
return issues
height_m = height_cm / 100
bmi = weight_kg / (height_m ** 2)
# BMI-based plausibility (all age groups)
if bmi < 10:
issues.append({
"field": "both", "severity": "error",
"message": f"BMI of {bmi:.1f} is not physiologically possible. "
f"Height {height_cm}cm and weight {weight_kg}kg cannot both be correct."
})
elif bmi < 14:
issues.append({
"field": "weight", "severity": "warning",
"message": f"BMI of {bmi:.1f} is in the severe underweight range. Predictions will be less accurate."
})
elif bmi > 70:
issues.append({
"field": "both", "severity": "error",
"message": f"BMI of {bmi:.1f} exceeds any recorded human value. Check height and weight values."
})
elif bmi > 50:
issues.append({
"field": "weight", "severity": "warning",
"message": f"BMI of {bmi:.1f} is in a range where prediction accuracy decreases."
})
# Age-category specific checks
if age_category == "INFANT" and (height_cm < 40 or height_cm > 100):
issues.append({
"field": "height", "severity": "warning",
"message": f"Height {height_cm}cm is unusual for an infant. Expected range: 40–100cm."
})
if age_category == "ADULT" and height_cm < 130:
issues.append({
"field": "height", "severity": "warning",
"message": f"Height {height_cm}cm is very short for an adult. Predictions may be less accurate."
})
if age_category == "CHILD" and weight_kg > 80:
issues.append({
"field": "weight", "severity": "warning",
"message": f"Weight {weight_kg}kg is unusual for the CHILD age category."
})
return issues
Sanitization and type coercion
Input sanitization prevents type errors and injection issues:
def sanitize_measurement_input(raw: any) -> float | None:
"""
Safely convert any user input to a float for body measurement use.
Returns None if the input cannot be safely converted.
"""
if raw is None:
return None
# Handle numeric types directly
if isinstance(raw, (int, float)):
return float(raw)
# Handle strings
if isinstance(raw, str):
# Remove common separators and whitespace
cleaned = raw.strip().replace(",", ".").replace(" ", "")
# Remove unit suffixes
for suffix in ["cm", "kg", "lbs", "lb", "m", "in", "inches", "ft"]:
if cleaned.lower().endswith(suffix):
cleaned = cleaned[:-len(suffix)].strip()
break
# Remove non-numeric characters except . and -
cleaned = re.sub(r"[^\d.\-]", "", cleaned)
if not cleaned:
return None
try:
return float(cleaned)
except ValueError:
return None
return None
def prepare_api_inputs(
height_raw: any,
weight_raw: any,
gender_raw: any,
region_raw: any = "GLOBAL"
) -> dict:
"""
Full input preparation pipeline: sanitize → parse → validate → format for API.
Returns a dict with either 'inputs' (ready for API) or 'errors' (validation failures).
"""
errors = []
warnings = []
# Sanitize
height_val = sanitize_measurement_input(height_raw)
weight_val = sanitize_measurement_input(weight_raw)
if height_val is None:
errors.append(f"Could not parse height value: {height_raw!r}")
return {"errors": errors}
if weight_val is None:
errors.append(f"Could not parse weight value: {weight_raw!r}")
return {"errors": errors}
# Unit detection
height_parsed = parse_height_input(height_val, "cm")
weight_parsed = parse_weight_input(weight_val, "kg")
if height_parsed.note:
warnings.append(height_parsed.note)
if weight_parsed.note:
warnings.append(weight_parsed.note)
height_cm = height_parsed.converted_cm_or_kg
weight_kg = weight_parsed.converted_cm_or_kg
# Normalize gender
try:
gender = normalize_gender(str(gender_raw))
except ValueError as e:
errors.append(str(e))
gender = None
# Normalize region
valid_regions = {"GLOBAL", "EUROPE", "ASIA_PACIFIC", "AFRICA", "LATAM", "INDIA", "MIDDLE_EAST"}
region = str(region_raw).upper() if region_raw else "GLOBAL"
if region not in valid_regions:
warnings.append(f"Unknown region {region!r} — defaulting to GLOBAL.")
region = "GLOBAL"
# Physiological check
if gender:
plausibility_issues = check_physiological_plausibility(height_cm, weight_kg)
for issue in plausibility_issues:
if issue["severity"] == "error":
errors.append(issue["message"])
else:
warnings.append(issue["message"])
if errors:
return {"errors": errors, "warnings": warnings}
return {
"inputs": {
"gender": gender,
"height_mm": int(height_cm * 10), # API requires mm
"weight_kg": round(weight_kg, 1),
"region": region
},
"warnings": warnings,
"unit_corrections": {
"height": height_parsed.note,
"weight": weight_parsed.note
}
}
def normalize_gender(raw: str) -> str:
"""Normalize gender string to API-accepted lowercase value."""
normalized = raw.strip().lower()
if normalized in ("male", "m", "man", "masculine"):
return "male"
elif normalized in ("female", "f", "woman", "feminine"):
return "female"
raise ValueError(f"Unrecognized gender: {raw!r}")
Validation in the API route
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
app = FastAPI()
class SizingRequest(BaseModel):
height: str | float
weight: str | float
gender: str
region: str = "GLOBAL"
@app.post("/api/sizing/recommend")
async def sizing_recommend(req: SizingRequest):
prepared = prepare_api_inputs(
req.height, req.weight, req.gender, req.region
)
if "errors" in prepared:
raise HTTPException(
status_code=400,
detail={
"errors": prepared["errors"],
"warnings": prepared.get("warnings", [])
}
)
inputs = prepared["inputs"]
warnings = prepared.get("warnings", [])
# Call prediction API with validated, corrected inputs
result = call_prediction_api(
gender=inputs["gender"],
height_mm=inputs["height_mm"],
weight_kg=inputs["weight_kg"],
region=inputs["region"]
)
# Attach warnings to response so client can show them
result["input_warnings"] = warnings
return result
Thorough input validation is boring engineering that prevents a significant fraction of production failures. The patterns above catch the most common real-world errors — unit mismatches, ambiguous inputs, physiologically impossible combinations — before they reach the API and produce confusing errors or silently wrong predictions. Build validation as a pipeline that sanitizes, detects, warns, and only blocks on genuine errors.