|
67 | 67 | # Here as a starting point. You can reference these models downstream in models that actually
|
68 | 68 | # materialize as tables.
|
69 | 69 | staging:
|
70 |
| - +materialized: cte |
| 70 | + +materialized: ephemeral |
71 | 71 | """
|
72 | 72 |
|
73 | 73 | SOURCES_YML_TEMPLATE = """# This file defines all data sources referenced by this model. The mapping
|
|
132 | 132 | "SQL credentials"). You can get them at https://www.splitgraph.com/settings/sql-credentials (or
|
133 | 133 | your deployment URL if you're on a private deployment).
|
134 | 134 |
|
| 135 | +### Edit `splitgraph.yml` |
| 136 | +
|
| 137 | +We generated a [`splitgraph.yml`](./splitgraph.yml) file from your chosen plugins' |
| 138 | +parameters JSONSchema. You should review it and add suitable plugin settings: |
| 139 | +
|
| 140 | + - set `tables` to `tables: {}` to let the plugin automatically infer the schema and the |
| 141 | + options of the data source (by default, it adds a sample table into the project file) |
| 142 | + - change and customize the `metadata` block |
| 143 | + - set up the plugin parameters in `external.params`. Where the comment says `CHOOSE ONE` |
| 144 | + and offers a list of alternative subobjects, choose one entry from the list and delete |
| 145 | + the list itself, leaving the object at the top level. |
| 146 | +
|
| 147 | +Example: |
| 148 | +
|
| 149 | +```yaml |
| 150 | +- namespace: my_namespace |
| 151 | + repository: csv |
| 152 | + # Catalog-specific metadata for the repository. Optional. |
| 153 | + metadata: |
| 154 | + readme: |
| 155 | + text: Readme |
| 156 | + description: Description of the repository |
| 157 | + topics: |
| 158 | + - sample_topic |
| 159 | + # Data source settings for the repository. Optional. |
| 160 | + external: |
| 161 | + # Name of the credential that the plugin uses. This can also be a credential_id if the |
| 162 | + # credential is already registered on Splitgraph. |
| 163 | + credential: csv |
| 164 | + plugin: csv |
| 165 | + # Plugin-specific parameters matching the plugin's parameters schema |
| 166 | + params: |
| 167 | + connection: # Choose one of: |
| 168 | + - connection_type: http # REQUIRED. Constant |
| 169 | + url: '' # REQUIRED. HTTP URL to the CSV file |
| 170 | + - connection_type: s3 # REQUIRED. Constant |
| 171 | + s3_endpoint: '' # REQUIRED. S3 endpoint (including port if required) |
| 172 | + s3_bucket: '' # REQUIRED. Bucket the object is in |
| 173 | + s3_region: '' # Region of the S3 bucket |
| 174 | + s3_secure: false # Whether to use HTTPS for S3 access |
| 175 | + s3_object: '' # Limit the import to a single object |
| 176 | + s3_object_prefix: '' # Prefix for object in S3 bucket |
| 177 | + autodetect_header: true # Detect whether the CSV file has a header automatically |
| 178 | + autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically |
| 179 | + autodetect_encoding: true # Detect the CSV file's encoding automatically |
| 180 | + autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection |
| 181 | + schema_inference_rows: 100000 # Number of rows to use for schema inference |
| 182 | + encoding: utf-8 # Encoding of the CSV file |
| 183 | + ignore_decode_errors: false # Ignore errors when decoding the file |
| 184 | + header: true # First line of the CSV file is its header |
| 185 | + delimiter: ',' # Character used to separate fields in the file |
| 186 | + quotechar: '"' # Character used to quote fields |
| 187 | + tables: |
| 188 | + sample_table: |
| 189 | + # Plugin-specific table parameters matching the plugin's schema |
| 190 | + options: |
| 191 | + url: '' # HTTP URL to the CSV file |
| 192 | + s3_object: '' # S3 object of the CSV file |
| 193 | + autodetect_header: true # Detect whether the CSV file has a header automatically |
| 194 | + autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically |
| 195 | + autodetect_encoding: true # Detect the CSV file's encoding automatically |
| 196 | + autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection |
| 197 | + schema_inference_rows: 100000 # Number of rows to use for schema inference |
| 198 | + encoding: utf-8 # Encoding of the CSV file |
| 199 | + ignore_decode_errors: false # Ignore errors when decoding the file |
| 200 | + header: true # First line of the CSV file is its header |
| 201 | + delimiter: ',' # Character used to separate fields in the file |
| 202 | + quotechar: '"' # Character used to quote fields |
| 203 | + # Schema of the table, a list of objects with `name` and `type`. If set to `[]`, will infer. |
| 204 | + schema: [] |
| 205 | + # Whether live querying is enabled for the plugin (creates a "live" tag in the |
| 206 | + # repository proxying to the data source). The plugin must support live querying. |
| 207 | + is_live: true |
| 208 | + # Ingestion schedule settings. Disable this if you're using GitHub Actions or other methods |
| 209 | + # to trigger ingestion. |
| 210 | + schedule: |
| 211 | +``` |
| 212 | +
|
| 213 | +becomes: |
| 214 | +
|
| 215 | +```yaml |
| 216 | +- namespace: my_namespace |
| 217 | + repository: csv |
| 218 | + metadata: |
| 219 | + readme: |
| 220 | + text: Readme |
| 221 | + description: Description of the repository |
| 222 | + topics: |
| 223 | + - sample_topic |
| 224 | + external: |
| 225 | + # No credential required since we're querying a CSV file over HTTP |
| 226 | + plugin: csv |
| 227 | + # Plugin-specific parameters matching the plugin's parameters schema |
| 228 | + params: |
| 229 | + connection: |
| 230 | + connection_type: http # REQUIRED. Constant |
| 231 | + url: 'https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv' # REQUIRED. HTTP URL to the CSV file |
| 232 | + autodetect_header: true # Detect whether the CSV file has a header automatically |
| 233 | + autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically |
| 234 | + autodetect_encoding: true # Detect the CSV file's encoding automatically |
| 235 | + autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection |
| 236 | + schema_inference_rows: 100000 # Number of rows to use for schema inference |
| 237 | + encoding: utf-8 # Encoding of the CSV file |
| 238 | + ignore_decode_errors: false # Ignore errors when decoding the file |
| 239 | + header: true # First line of the CSV file is its header |
| 240 | + delimiter: ',' # Character used to separate fields in the file |
| 241 | + quotechar: '"' # Character used to quote fields |
| 242 | + # Automatically infer table parameters |
| 243 | + tables: {} |
| 244 | + is_live: true |
| 245 | +``` |
| 246 | +
|
135 | 247 | ### Set up GitHub Actions
|
136 | 248 |
|
137 | 249 | Because this repository was itself generated by a GitHub Actions job, we can't edit the workflow
|
|
0 commit comments