-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnative_mode.py
More file actions
107 lines (86 loc) · 3.73 KB
/
native_mode.py
File metadata and controls
107 lines (86 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""VynFi v2.3 Output Modes — native numbers and flat layout.
DataSynth 2.3 added two output options that dramatically simplify analytics:
1. ``output.numericMode = "native"`` -- decimals as JSON numbers (1729237.30)
instead of strings ("1729237.30"). No more pd.to_numeric() boilerplate.
2. ``output.exportLayout = "flat"`` -- header fields merged onto each line.
No more manual flattening of journal entries.
This script demonstrates both, comparing legacy vs v2.3-native workflows.
"""
import json
import os
import pandas as pd
import vynfi
client = vynfi.VynFi(api_key=os.environ["VYNFI_API_KEY"])
# ── v2.3 config: native numbers + flat layout ───────────────────────────────
config = {
"sector": "retail",
"country": "US",
"accountingFramework": "us_gaap",
"rows": 500,
"companies": 3,
"periods": 2,
"periodLength": "monthly",
"processModels": ["o2c", "p2p"],
"exportFormat": "json",
"fraudPacks": [],
"fraudRate": 0.0,
"output": {
"numericMode": "native", # JSON numbers, not strings
# NOTE: exportLayout="flat" is still broken upstream as of DS 4.1.x
# (tracked in docs/ds-3.1.1-verification.md § D). Leaving it at the
# default "nested" layout so this example actually runs end-to-end.
},
}
print("Submitting v2.3 generation job (native numbers + flat layout)...")
job = client.jobs.generate_config(config=config)
print(f" Job: {job.id}")
completed = client.jobs.wait(job.id, timeout=300)
print(f" Status: {completed.status}")
if completed.status != "completed":
print(f" Error: {completed.error_detail}")
raise SystemExit(1)
# ── Download as DataFrame -- no pd.to_numeric() needed ──────────────────────
archive = client.jobs.download_archive(completed.id)
print(f"\n{archive}")
# Use archive.json() — works for both zip and managed_blob backends.
# (For managed_blob, this fetches lazily via presigned URL.)
entries = archive.json("journal_entries.json")
print(f"\nLoaded {len(entries)} journal entry documents")
if entries:
first = entries[0]
header = first.get("header", first)
print("\nFirst record (sample):")
for k, v in list(header.items())[:10]:
print(f" {k}: {v!r} ({type(v).__name__})")
# Flatten the nested layout (header + lines) into one row per line
rows = []
for e in entries:
h = e.get("header", e)
for line in e.get("lines", [e]) if "lines" in e else [e]:
rows.append({**h, **line})
df = pd.DataFrame(rows)
print(f"\nDataFrame: {df.shape[0]} rows x {df.shape[1]} cols")
# Test: sum amounts. numericMode=native applies to flat layout only; nested
# output serialises amounts as strings, so coerce before summing.
if "debit_amount" in df.columns:
amounts = pd.to_numeric(df["debit_amount"], errors="coerce").fillna(0)
print(f" Total debits: {float(amounts.sum()):,.2f}")
print(f" Raw column dtype: {df['debit_amount'].dtype}")
# ── Compare with legacy nested + string mode ────────────────────────────────
#
# Without native mode, you'd need:
# for col in ["debit_amount", "credit_amount"]:
# df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
#
# Without flat mode, you'd need:
# rows = []
# for entry in entries:
# header = entry["header"]
# for line in entry["lines"]:
# rows.append({**header, **line})
# df = pd.DataFrame(rows)
#
# v2.3 eliminates both layers of boilerplate.
print("\n=== v2.3 simplification summary ===")
print(" Legacy: nested + string -> ~10 lines of flatten + numeric conversion")
print(" v2.3: flat + native -> 2 lines: download_file + pd.DataFrame")