Skip to content

Commit 284cc02

Browse files
authored
Merge pull request #1 from ai-112/main
feat: jsonl
2 parents 3d5c311 + 43e947e commit 284cc02

File tree

9 files changed

+94
-11
lines changed

9 files changed

+94
-11
lines changed

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ A high-performance SQL INSERT statement parser that processes large SQL files an
99
- Parallel processing with configurable worker count
1010
- Multiple output formats:
1111
- JSON
12+
- JSONL
1213
- CSV
1314
- Text
1415
- Buffered I/O for optimal performance
@@ -25,7 +26,7 @@ go build cmd/sqlparser/main.go
2526
## Usage
2627

2728
```bash
28-
sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] <sqlfile>
29+
sqlparser [-format=txt|csv|json|jsonl] [-output=filename] [-workers=N] <sqlfile>
2930
```
3031

3132
### Arguments
@@ -34,6 +35,7 @@ sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] <sqlfile>
3435
- `txt`: Human-readable text format
3536
- `csv`: CSV format with headers
3637
- `json`: JSON format with table structure
38+
- `jsonl`: JSON lines format with table structure
3739
- `-output`: Output file path (default: stdout)
3840
- `-workers`: Number of worker threads (default: 1)
3941
- `<sqlfile>`: Input SQL file containing INSERT statements
@@ -64,6 +66,11 @@ sqlparser -format=csv -workers=4 -output=output.csv input.sql
6466
sqlparser input.sql
6567
```
6668

69+
4. Process SQL file and output as JSON lines:
70+
```bash
71+
sqlparser -format=jsonl -output=output.json input.sql
72+
```
73+
6774
## Performance Optimization
6875

6976
The parser is optimized for performance through:
@@ -110,6 +117,12 @@ Row,id,name,email
110117
]
111118
```
112119

120+
### JSONL Format
121+
```json
122+
{"table_name": "users", "rows": [{"row_number": 1, "data": {"id": "1", "name": "John Doe", "email": "[email protected]"}}]}
123+
{"table_name": "users", "rows": [{"row_number": 2, "data": {"id": "2", "name": "John Doe", "email": "[email protected]"}}]}
124+
```
125+
113126
## License
114127

115128
This project is licensed under the MIT License - see the LICENSE file for details.

cmd/sqlparser/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
)
1313

1414
func main() {
15-
format := flag.String("format", "txt", "Output format (txt, csv, json)")
15+
format := flag.String("format", "txt", "Output format (txt, csv, json, jsonl)")
1616
output := flag.String("output", "", "Output file (if not specified, prints to stdout)")
1717
workers := flag.Int("workers", getWorkerCount(), "Number of worker threads")
1818
flag.Parse()

pkg/models/types.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ func getBatchSize() int {
2121
type OutputFormat string
2222

2323
const (
24-
FormatText OutputFormat = "txt"
25-
FormatCSV OutputFormat = "csv"
26-
FormatJSON OutputFormat = "json"
24+
FormatText OutputFormat = "txt"
25+
FormatCSV OutputFormat = "csv"
26+
FormatJSON OutputFormat = "json"
27+
FormatJSONL OutputFormat = "jsonl"
2728
)
2829

2930
type Row struct {
31+
TableName string `json:"table_name,omitempty"`
3032
RowNumber int `json:"row_number"`
3133
Data map[string]interface{} `json:"data"`
3234
}

pkg/parser/parser.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ func ProcessSQLFileInBatches(filename string, writer writer.Writer, numWorkers i
8888

8989
// Process rows immediately
9090
for _, row := range result.rows {
91+
if writer.Type() == models.FormatJSONL {
92+
row.TableName = currentTableName
93+
}
94+
9195
rowCount++
9296
row.RowNumber = rowCount
9397
currentBatch = append(currentBatch, row)

pkg/writer/csv_writer.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,7 @@ func (w *CSVWriter) Close() error {
7676
w.writer.Flush()
7777
return w.buffer.Flush()
7878
}
79+
80+
func (w *CSVWriter) Type() models.OutputFormat {
81+
return models.FormatCSV
82+
}

pkg/writer/json_writer.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,7 @@ func (w *JSONWriter) Close() error {
8585
}
8686
return w.writer.Flush()
8787
}
88+
89+
func (w *JSONWriter) Type() models.OutputFormat {
90+
return models.FormatJSON
91+
}

pkg/writer/jsonl_writer.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package writer
2+
3+
import (
4+
"bufio"
5+
"encoding/json"
6+
7+
"sqlparser/pkg/models"
8+
)
9+
10+
type JSONLWriter struct {
11+
writer *bufio.Writer
12+
}
13+
14+
func NewJSONLWriter(output *bufio.Writer) (*JSONLWriter, error) {
15+
return &JSONLWriter{writer: output}, nil
16+
}
17+
18+
func (w *JSONLWriter) WriteTableStart(tableName string) error {
19+
return nil
20+
}
21+
22+
func (w *JSONLWriter) WriteRows(rows []models.Row) error {
23+
if len(rows) == 0 {
24+
return nil
25+
}
26+
27+
for i, row := range rows {
28+
if i > 0 {
29+
if _, err := w.writer.Write([]byte("\n")); err != nil {
30+
return err
31+
}
32+
}
33+
data, err := json.Marshal(row)
34+
if err != nil {
35+
return err
36+
}
37+
if _, err = w.writer.Write(data); err != nil {
38+
return err
39+
}
40+
}
41+
return w.writer.Flush()
42+
}
43+
44+
func (w *JSONLWriter) WriteTableEnd() error {
45+
return nil
46+
}
47+
48+
func (w *JSONLWriter) Close() error {
49+
return w.writer.Flush()
50+
}
51+
52+
func (w *JSONLWriter) Type() models.OutputFormat {
53+
return models.FormatJSONL
54+
}

pkg/writer/text_writer.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,7 @@ func (w *TextWriter) WriteTableEnd() error {
5252
func (w *TextWriter) Close() error {
5353
return w.writer.Flush()
5454
}
55+
56+
func (w *TextWriter) Type() models.OutputFormat {
57+
return models.FormatText
58+
}

pkg/writer/writer.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,17 @@ type Writer interface {
1515
WriteRows(rows []models.Row) error
1616
WriteTableEnd() error
1717
Close() error
18+
Type() models.OutputFormat
1819
}
1920

2021
func CreateWriter(format models.OutputFormat, output io.Writer) (Writer, error) {
21-
2222
bufferedWriter := bufio.NewWriterSize(output, bufferSize)
2323

2424
switch format {
2525
case models.FormatJSON:
26-
w, err := NewJSONWriter(bufferedWriter)
27-
if err != nil {
28-
return nil, err
29-
}
30-
return w, nil
26+
return NewJSONWriter(bufferedWriter)
27+
case models.FormatJSONL:
28+
return NewJSONLWriter(bufferedWriter)
3129
case models.FormatCSV:
3230
return NewCSVWriter(bufferedWriter), nil
3331
case models.FormatText:

0 commit comments

Comments
 (0)