import os
os.environ["SAS_OPENCL"] = "cuda"   # use CUDA GPU backend for sasmodels

import escape as esc
import numpy as np
esc.require("0.9.8")

Loading material database from C:\dev\escape-core\python\src\escape\scattering\..\data\mdb\materials.db

# ── Variables ──────────────────────────────────────────────────────────────
Q     = esc.var("Q")
phi   = esc.var("phi")
theta = esc.var("theta")
x     = esc.var("x")
y     = esc.var("y")

# ── Parameters ─────────────────────────────────────────────────────────────
scale      = esc.par("Scale",      1.0,  scale=1e8, fixed=True)
length_a   = esc.par("Length a",  50.0, units=esc.angstr)
p          = esc.par("P",          2.5,  userlim=[0.5, 50.0])
sigma_pd   = esc.par("Sigma pd",   0.1,  userlim=[0.0, 0.5])
contrast   = esc.par("Contrast",   3.0,  scale=1e-6, units=f"{esc.angstr}^-2")
background = esc.par("Background", 0.001, userlim=[0.0, 0.03])

# ── Geometry ───────────────────────────────────────────────────────────────
R = 0.5 * length_a  # superball radius = a/2

# Superball volume: V = a^3/(12p^2) * Gamma(1/2p)^3 / Gamma(3/2p)
V = (esc.pow(length_a, 3) / (12.0 * esc.pow(p, 2))
     * esc.pow(esc.tgamma(1.0 / (2.0*p)), 3)
     / esc.tgamma(3.0 / (2.0*p)))

# ── Oriented form factor ────────────────────────────────────────────────────
Qx = Q * esc.cos(phi) * esc.sin(theta)
Qy = Q * esc.sin(phi) * esc.sin(theta)
Qz = Q * esc.cos(theta)

inv_2p = 1.0 / (2.0 * p)
G   = esc.pow(1.0 - esc.pow(x, 2.0*p), inv_2p)                       # gamma(x)
Ksi = esc.pow(1.0 - esc.pow(x, 2.0*p) - esc.pow(y, 2.0*p), inv_2p)  # zeta(x,y)

# Inner y-integral: sin(R*Qz*zeta)/Qz = R*zeta*sinc(R*Qz*zeta)
F_inner = esc.cos(R * Qy * y) * R * Ksi * esc.sinc(R * Qz * Ksi)
Fy = esc.integral(F_inner, y, 0.0, G, numpoints=21, maxiter=0)

# f = oriented amplitude (O(1..100)).  Do NOT include the (2*a^2) prefactor here.
# Keeping f small avoids float32 catastrophic cancellation in the orientation
# average: Pori = (2*a^2*f)^2 varies by ~5 orders of magnitude across orientations,
# causing the phi/theta accumulator to lose precision in float32.
f = esc.integral(esc.cos(R * Qx * x) * Fy, x, 0.0, 1.0, numpoints=21, maxiter=0)

# ── Powder average over orientations ───────────────────────────────────────
# Average f^2 (O(1..1e4)) over orientation, then apply (2*a^2)^2 outside.
# Factor 2/pi aligns with SasView's angular-average convention.
f2ori  = esc.pow(f, 2)
Pphi   = esc.integral(f2ori, phi, 0.0, np.pi/2.0, numpoints=21, maxiter=0)
Ptheta = esc.integral(Pphi * esc.sin(theta), theta, 0.0, np.pi/2.0, numpoints=21, maxiter=0)
f2_avg_ori  = Ptheta * (2.0 / np.pi)
Ptheta_norm = esc.pow(2.0 * esc.pow(length_a, 2), 2) * f2_avg_ori  # = <F^2>

# ── Polydispersity over length_a (lognormal) ───────────────────────────────
# SasView convention: I = scale * contrast^2 * <F^2(q)> / <V> + background
# where averages are over the lognormal distribution of length_a.
#
# Float32 issue: Ptheta_norm is O(1e10) — too large for float32 lognormal
# accumulation.  Fix: divide by a fixed constant C_norm before averaging, then
# multiply back.  The constant cancels in the ratio <F^2>/<V>.
C_norm = 1e6
F2_avg_pd = esc.average_lognorm(Ptheta_norm / C_norm, sigma_pd, length_a, numpoints=61, maxiter=0)
V_avg_pd  = esc.average_lognorm(V,                    sigma_pd, length_a, numpoints=61, maxiter=0)

F2_norm = esc.conditional(
    sigma_pd > 0.0*Q,
    (C_norm * F2_avg_pd) / V_avg_pd,
    Ptheta_norm / V)

i1d = scale * esc.pow(contrast, 2) * F2_norm + background

# GPU JIT: JIT-compile the full functor tree to a single CUDA kernel.
# On first call ESCAPE runs nvcc (~2-5 min compile, cached to disk afterwards).
# Float32 kernel: 64 registers/thread, ~89 ms for 300 q-points (vs 72 ms SasView CUDA).
i1d.device = "gpu"

qs = np.linspace(0.001, 1.0, 300)
i1d.show(coordinates=qs).config(
    title="Superball - powder average with polydispersity (1D)",
    xlog=True, ylog=True,
    xlabel=f"Q [{esc.angstr}^-1]", ylabel="I(q) [cm^-1]")

qx_v = esc.var("qx")
qy_v = esc.var("qy")

theta_2d = esc.par("Theta", 45.0, userlim=[0.0, 90.0], units="deg")
phi_2d   = esc.par("Phi",   45.0, userlim=[0.0, 90.0], units="deg")

deg = np.pi / 180.0
sin_t = esc.sin(theta_2d * deg); cos_t = esc.cos(theta_2d * deg)
sin_p = esc.sin(phi_2d   * deg); cos_p = esc.cos(phi_2d   * deg)

# Map detector (qx, qy) to particle-frame (Qa, Qb, Qc)
Qa =  qx_v * cos_p * cos_t + qy_v * sin_p * cos_t
Qb = -qx_v * sin_p          + qy_v * cos_p
Qc = -qx_v * cos_p * sin_t - qy_v * sin_p * sin_t

G_2d   = esc.pow(1.0 - esc.pow(x, 2.0*p), inv_2p)
Ksi_2d = esc.pow(1.0 - esc.pow(x, 2.0*p) - esc.pow(y, 2.0*p), inv_2p)

F_inner_2d = esc.cos(R * Qb * y) * R * Ksi_2d * esc.sinc(R * Qc * Ksi_2d)
Fy_2d = esc.integral(F_inner_2d, y, 0.0, G_2d, numpoints=21, maxiter=0)
F_2d  = esc.integral(esc.cos(R * Qa * x) * Fy_2d, x, 0.0, 1.0, numpoints=21, maxiter=0)

Pori_2d = esc.pow(2.0 * esc.pow(length_a, 2) * F_2d, 2)

# Polydispersity: <F^2(q)> / <V> (same SasView convention as 1D)
F2_2d_avg_pd = esc.average_lognorm(Pori_2d,  sigma_pd, length_a, numpoints=21, maxiter=0)
Pnorm_2d = esc.conditional(sigma_pd > 0.0*qx_v, F2_2d_avg_pd / V_avg_pd, Pori_2d / V)

i2d = scale * esc.pow(contrast, 2) * Pnorm_2d + background

i2d.device = "gpu"

xs = np.linspace(-0.5, 0.5, 500); ys = np.linspace(-0.5, 0.5, 500)
xv, yv = np.meshgrid(xs, ys)
coords_2d = np.column_stack([xv.flatten(), yv.flatten()]).flatten()
i2d.show(coordinates=coords_2d).config(
    title="Superball - oriented 2D SAXS with polydispersity",
    xlabel=f"qx [{esc.angstr}^-1]", ylabel=f"qy [{esc.angstr}^-1]", cblog=True, colorscale="jet")

import time
import numpy as np
import matplotlib.pyplot as plt
from sasmodels.core import load_model
from sasmodels.data import empty_data1D
from sasmodels.direct_model import DirectModel

# ── Q array (300 pts — ESCAPE parallel regime) ───────────────────────────────
qs = np.linspace(0.001, 1.0, 300)

# ── SasView / CUDA model ────────────────────────────────────────────────────
kernel = load_model("superball")          # SAS_OPENCL=cuda set in cell 0
f_sas      = DirectModel(empty_data1D(qs), kernel)

base_pars = dict(
    scale=1.0, background=0.001,
    sld=4.0, sld_solvent=1.0,
    length_a=50.0, exponent_p=2.5,
)
sas_pars = dict(
    **base_pars,
    length_a_pd=0.1,
    length_a_pd_type="lognormal",
    length_a_pd_n=35,
)

# ── Warm-up ──────────────────────────────────────────────────────────────────
f_sas(**sas_pars)
i1d(qs[:5])   # GPU JIT warm-up (small array)

# ── Timing helper ────────────────────────────────────────────────────────────
def timeit(fn, n=5):
    t0 = time.perf_counter()
    for _ in range(n): result = fn()
    return (time.perf_counter() - t0) / n * 1e3, result

t_sas, Iq_sas = timeit(lambda: f_sas(**sas_pars))

i1d.device = "gpu"
t_gpu, Iq_gpu = timeit(lambda: i1d(qs), n=3)
i1d.device = "cpu"
t_cpu, Iq_cpu = timeit(lambda: i1d(qs))
i1d.device = "gpu"

print(f"SASView GPU : {t_sas:.0f} ms")
print(f"ESCAPE GPU  : {t_gpu:.0f} ms")
print(f"ESCAPE CPU  : {t_cpu:.0f} ms  ({len(qs)} q-pts)")
rel = np.max(np.abs((Iq_gpu - Iq_sas) / Iq_sas)) * 100
print(f"Max relative diff vs SasView: {rel:.2f}%")

esc.overlay(Iq_sas, Iq_gpu, Iq_cpu, coordinates=qs).config(
    xlabel="Q (1/A)", ylabel="I(q) (1/cm)",
    xlog=True, ylog=True,
    fig_title=f"Superball I(q) — {len(qs)} pts",
    labels=["SASView", "ESCAPE GPU", "ESCAPE CPU"],
    line_styles=["solid", "dash", "dot"],
    line_widths=[2, 3, 3]
)

C:\Users\User\AppData\Local\Temp\ipykernel_35164\501573743.py:29: UserWarning:

Input array does not own its data (e.g. it is a view or slice); data will be copied

C:\Users\User\AppData\Local\Temp\ipykernel_35164\501573743.py:40: UserWarning:

Input array does not own its data (e.g. it is a view or slice); data will be copied

C:\Users\User\AppData\Local\Temp\ipykernel_35164\501573743.py:42: UserWarning:

Input array does not own its data (e.g. it is a view or slice); data will be copied

SASView GPU : 2488 ms
ESCAPE GPU  : 906 ms
ESCAPE CPU  : 59880 ms  (300 q-pts)
Max relative diff vs SasView: 4.35%

Parameter	Variable	Value
Scale	`scale`	1
Background (cm⁻¹)	`background`	0.001
Edge length (Å)	`length_a`	50
Shape exponent	`p`	2.5
Polydispersity (relative σ)	`sigma_pd`	0
Contrast Δρ (10⁻⁶ Å⁻²)	`contrast`	3 (= sld 4 − sld_solvent 1)

ESCAPE parameter	SasView parameter	Notes
`length_a`	`length_a`	edge length a (Ang)
`p`	`exponent_p`	shape exponent
`contrast * 1e-6`	`sld - sld_solvent`	contrast in Ang^-2
`sigma_pd`	`length_a_pd`	lognormal relative width

SAXS. Form-factors. Superball (SasView-aligned)¶

Parameters (SasView defaults)¶

Form-factor (SasView superball.c / superball.py, Dresen 2021)¶

2D oriented scattering (qx, qy)¶

SasView reference model & speed comparison¶