Skip to content

Commit 379e0e2

Browse files
committed
add readme and .env support
1 parent 40b43aa commit 379e0e2

File tree

5 files changed

+167
-5
lines changed

5 files changed

+167
-5
lines changed

.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# SQL Parser Configuration
2+
BATCH_SIZE=100000
3+
WORKER_COUNT=1

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 SQL Parser Contributors
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# SQL Parser
2+
3+
A high-performance SQL INSERT statement parser that processes large SQL files and outputs the data in various formats.
4+
5+
## Features
6+
7+
- Processes large SQL files with INSERT statements
8+
- Memory-efficient batch processing
9+
- Parallel processing with configurable worker count
10+
- Multiple output formats:
11+
- JSON
12+
- CSV
13+
- Text
14+
- Buffered I/O for optimal performance
15+
- Configurable batch size and worker count via environment variables
16+
17+
## Installation
18+
19+
```bash
20+
git clone https://github.com/githubesson/sqlparser
21+
cd sqlparser
22+
go build cmd/sqlparser/main.go
23+
```
24+
25+
## Usage
26+
27+
```bash
28+
sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] <sqlfile>
29+
```
30+
31+
### Arguments
32+
33+
- `-format`: Output format (default: txt)
34+
- `txt`: Human-readable text format
35+
- `csv`: CSV format with headers
36+
- `json`: JSON format with table structure
37+
- `-output`: Output file path (default: stdout)
38+
- `-workers`: Number of worker threads (default: 1)
39+
- `<sqlfile>`: Input SQL file containing INSERT statements
40+
41+
### Environment Variables
42+
43+
The application can be configured using environment variables in a `.env` file:
44+
45+
```env
46+
BATCH_SIZE=100000 # Number of rows to process in each batch
47+
WORKER_COUNT=1 # Default number of worker threads
48+
```
49+
50+
### Examples
51+
52+
1. Process SQL file and output as JSON:
53+
```bash
54+
sqlparser -format=json -output=output.json input.sql
55+
```
56+
57+
2. Process SQL file with 4 workers and output as CSV:
58+
```bash
59+
sqlparser -format=csv -workers=4 -output=output.csv input.sql
60+
```
61+
62+
3. Process SQL file and print to console in text format:
63+
```bash
64+
sqlparser input.sql
65+
```
66+
67+
## Performance Optimization
68+
69+
The parser is optimized for performance through:
70+
- Batch processing to manage memory usage
71+
- Parallel processing with configurable worker count
72+
- Buffered I/O operations
73+
- Memory pooling for row data
74+
- Efficient string handling
75+
76+
## Output Formats
77+
78+
### Text Format
79+
```
80+
Table: users
81+
Row 1:
82+
id: 1
83+
name: John Doe
84+
85+
```
86+
87+
### CSV Format
88+
```
89+
Table:,users
90+
Row,id,name,email
91+
1,1,John Doe,[email protected]
92+
```
93+
94+
### JSON Format
95+
```json
96+
[
97+
{
98+
"table_name": "users",
99+
"rows": [
100+
{
101+
"row_number": 1,
102+
"data": {
103+
"id": "1",
104+
"name": "John Doe",
105+
"email": "[email protected]"
106+
}
107+
}
108+
]
109+
}
110+
]
111+
```
112+
113+
## License
114+
115+
This project is licensed under the MIT License - see the LICENSE file for details.

cmd/sqlparser/main.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import (
44
"flag"
55
"fmt"
66
"os"
7-
"runtime"
7+
"strconv"
88

99
"sqlparser/pkg/models"
1010
"sqlparser/pkg/parser"
@@ -14,15 +14,15 @@ import (
1414
func main() {
1515
format := flag.String("format", "txt", "Output format (txt, csv, json)")
1616
output := flag.String("output", "", "Output file (if not specified, prints to stdout)")
17-
workers := flag.Int("workers", runtime.NumCPU(), "Number of worker threads (default: number of CPU cores)")
17+
workers := flag.Int("workers", getWorkerCount(), "Number of worker threads")
1818
flag.Parse()
1919

2020
args := flag.Args()
2121
if len(args) < 1 {
2222
fmt.Printf("Usage: sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] <sqlfile>\n")
2323
fmt.Printf(" -format: Output format (default: txt)\n")
2424
fmt.Printf(" -output: Output file (default: stdout)\n")
25-
fmt.Printf(" -workers: Number of worker threads (default: %d)\n", runtime.NumCPU())
25+
fmt.Printf(" -workers: Number of worker threads (default: %d)\n", getWorkerCount())
2626
os.Exit(1)
2727
}
2828

@@ -59,3 +59,12 @@ func main() {
5959
os.Exit(1)
6060
}
6161
}
62+
63+
func getWorkerCount() int {
64+
if val := os.Getenv("WORKER_COUNT"); val != "" {
65+
if count, err := strconv.Atoi(val); err == nil && count > 0 {
66+
return count
67+
}
68+
}
69+
return 1
70+
}

pkg/models/types.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
11
package models
22

3-
const (
4-
BatchSize = 100000
3+
import (
4+
"os"
5+
"strconv"
6+
)
7+
8+
var (
9+
BatchSize = getBatchSize()
510
)
611

12+
func getBatchSize() int {
13+
if val := os.Getenv("BATCH_SIZE"); val != "" {
14+
if size, err := strconv.Atoi(val); err == nil && size > 0 {
15+
return size
16+
}
17+
}
18+
return 100000 // default batch size
19+
}
20+
721
type OutputFormat string
822

923
const (

0 commit comments

Comments
 (0)