import zeroeval as ze
ze.init()
dataset = ze.Dataset(
"math-demo",
data=[
{"row_id": "q1", "question": "6 * 7", "answer": "42"},
{"row_id": "q2", "question": "10 + 7", "answer": "17"},
],
)
dataset.push()
@ze.task(outputs=["prediction"])
def solve(row):
# Replace with real model call.
return {"prediction": row.answer}
@ze.evaluation(mode="row", outputs=["exact_match"])
def exact_match(row, answer_col, prediction_col):
return {"exact_match": int(answer_col == prediction_col)}
@ze.evaluation(mode="column", outputs=["accuracy"])
def accuracy(exact_match_col):
n = len(exact_match_col)
return {"accuracy": (sum(exact_match_col) / n) if n else 0.0}
run = dataset.eval(solve, workers=8)
run = run.score(
[exact_match, accuracy],
column_map={
"exact_match": {
"answer_col": "answer",
"prediction_col": "prediction",
},
"accuracy": {"exact_match_col": "exact_match"},
},
)
print("run_id:", run.run_id)
print("metrics:", run.metrics)
print("health:", run.health)