From a5d4227636a613002fd756910432e4f8dcfe1c55 Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Tue, 17 Dec 2024 12:21:16 +0800 Subject: [PATCH 1/3] Epic: rename old names for ASF brand Compliance This commit renames "Cloudberry Database" to "Apache Cloudberry" in the documentation and ensures clarity across references. --- docs/advanced-analytics/directory-tables.md | 10 +- docs/advanced-analytics/postgis.md | 24 +-- docs/basic-query-syntax.md | 8 +- docs/cbdb-architecture.md | 30 ++-- docs/cbdb-linux-compile.md | 68 ++++---- docs/cbdb-macos-compile.md | 42 ++--- docs/cbdb-op-deploy-guide.md | 26 +-- docs/cbdb-overview.md | 78 ++++----- docs/cbdb-scenarios.md | 6 +- docs/cbdb-vs-gp-features.md | 10 +- docs/connect-to-cbdb.md | 46 +++--- docs/create-and-manage-database.md | 26 +-- docs/create-and-manage-materialized-views.md | 6 +- docs/create-and-manage-schemas.md | 12 +- docs/create-and-manage-tables.md | 14 +- docs/create-and-manage-tablespaces.md | 12 +- docs/create-and-manage-views.md | 4 +- docs/data-loading/index.md | 6 +- .../load-data-from-web-services.md | 6 +- docs/data-loading/load-data-using-copy.md | 6 +- .../load-data-using-file-protocol.md | 8 +- docs/data-loading/load-data-using-gpfdist.md | 24 +-- docs/data-loading/load-data-using-gpload.md | 8 +- docs/data-types.md | 6 +- docs/deploy-cbdb-with-single-node.md | 34 ++-- .../functions/advanced-aggregate-functions.md | 4 +- docs/functions/index.md | 12 +- .../functions/json-functions-and-operators.md | 14 +- .../range-functions-and-operators.md | 2 +- .../text-search-functions-and-operators.md | 2 +- docs/functions/window-functions.md | 4 +- docs/insert-update-delete-rows.md | 10 +- docs/performance/index.md | 10 +- .../parallel-create-ao-refresh-mv.md | 4 +- docs/performance/parallel-query-execution.md | 6 +- .../performance/update-stats-using-analyze.md | 8 +- .../use-aggre-pushdown-to-speed-up-queries.md | 16 +- ...uto-materialized-view-to-answer-queries.md | 6 +- .../use-incremental-materialized-view.md | 16 +- .../use-index-scan-on-ao-tables.md | 8 +- .../use-runtimefilter-to-optimize-queries.md | 10 +- .../use-unique-index-on-ao-tables.md | 4 +- docs/security/client-auth.md | 46 +++--- docs/security/manage-roles-and-privileges.md | 24 +-- docs/security/protect-passwords.md | 4 +- docs/security/set-password-profile.md | 14 +- docs/security/transparent-data-encryption.md | 10 +- docs/sql-stmts/abort.md | 2 +- docs/sql-stmts/alter-database.md | 6 +- docs/sql-stmts/alter-default-privileges.md | 2 +- docs/sql-stmts/alter-domain.md | 2 +- docs/sql-stmts/alter-extension.md | 2 +- docs/sql-stmts/alter-external-table.md | 4 +- docs/sql-stmts/alter-foreign-data-wrapper.md | 8 +- docs/sql-stmts/alter-foreign-table.md | 12 +- docs/sql-stmts/alter-function.md | 8 +- docs/sql-stmts/alter-index.md | 4 +- docs/sql-stmts/alter-materialized-view.md | 2 +- docs/sql-stmts/alter-operator-family.md | 2 +- docs/sql-stmts/alter-policy.md | 2 +- docs/sql-stmts/alter-procedure.md | 4 +- docs/sql-stmts/alter-resource-group.md | 6 +- docs/sql-stmts/alter-resource-queue.md | 6 +- docs/sql-stmts/alter-role.md | 4 +- docs/sql-stmts/alter-routine.md | 2 +- docs/sql-stmts/alter-rule.md | 2 +- docs/sql-stmts/alter-sequence.md | 8 +- docs/sql-stmts/alter-server.md | 6 +- docs/sql-stmts/alter-table.md | 46 +++--- docs/sql-stmts/alter-trigger.md | 4 +- docs/sql-stmts/alter-type.md | 8 +- docs/sql-stmts/alter-user-mapping.md | 4 +- docs/sql-stmts/alter-user.md | 2 +- docs/sql-stmts/alter-view.md | 4 +- docs/sql-stmts/analyze.md | 18 +- docs/sql-stmts/begin.md | 6 +- docs/sql-stmts/checkpoint.md | 2 +- docs/sql-stmts/close.md | 4 +- docs/sql-stmts/cluster.md | 8 +- docs/sql-stmts/comment.md | 6 +- docs/sql-stmts/commit.md | 2 +- docs/sql-stmts/copy.md | 44 ++--- docs/sql-stmts/create-access-method.md | 2 +- docs/sql-stmts/create-aggregate.md | 16 +- docs/sql-stmts/create-cast.md | 12 +- docs/sql-stmts/create-collation.md | 6 +- docs/sql-stmts/create-conversion.md | 4 +- docs/sql-stmts/create-database.md | 4 +- docs/sql-stmts/create-domain.md | 6 +- docs/sql-stmts/create-extension.md | 14 +- docs/sql-stmts/create-external-table.md | 38 ++--- docs/sql-stmts/create-foreign-data-wrapper.md | 10 +- docs/sql-stmts/create-foreign-table.md | 22 +-- docs/sql-stmts/create-function.md | 32 ++-- docs/sql-stmts/create-index.md | 8 +- docs/sql-stmts/create-language.md | 16 +- docs/sql-stmts/create-materialized-view.md | 4 +- docs/sql-stmts/create-operator-class.md | 6 +- docs/sql-stmts/create-operator-family.md | 2 +- docs/sql-stmts/create-operator.md | 6 +- docs/sql-stmts/create-policy.md | 4 +- docs/sql-stmts/create-procedure.md | 2 +- docs/sql-stmts/create-protocol.md | 12 +- docs/sql-stmts/create-resource-group.md | 14 +- docs/sql-stmts/create-resource-queue.md | 14 +- docs/sql-stmts/create-role.md | 16 +- docs/sql-stmts/create-rule.md | 6 +- docs/sql-stmts/create-schema.md | 10 +- docs/sql-stmts/create-sequence.md | 20 +-- docs/sql-stmts/create-server.md | 8 +- docs/sql-stmts/create-statistics.md | 2 +- docs/sql-stmts/create-table-as.md | 18 +- docs/sql-stmts/create-table.md | 154 +++++++++--------- docs/sql-stmts/create-tablespace.md | 18 +- docs/sql-stmts/create-transform.md | 2 +- docs/sql-stmts/create-trigger.md | 20 +-- docs/sql-stmts/create-type.md | 16 +- docs/sql-stmts/create-user-mapping.md | 6 +- docs/sql-stmts/create-user.md | 2 +- docs/sql-stmts/create-view.md | 12 +- docs/sql-stmts/declare.md | 26 +-- docs/sql-stmts/delete.md | 10 +- docs/sql-stmts/discard.md | 4 +- docs/sql-stmts/do.md | 2 +- docs/sql-stmts/drop-access-method.md | 4 +- docs/sql-stmts/drop-collation.md | 2 +- docs/sql-stmts/drop-conversion.md | 2 +- docs/sql-stmts/drop-domain.md | 2 +- docs/sql-stmts/drop-extension.md | 8 +- docs/sql-stmts/drop-external-table.md | 2 +- docs/sql-stmts/drop-foreign-data-wrapper.md | 4 +- docs/sql-stmts/drop-foreign-table.md | 4 +- docs/sql-stmts/drop-index.md | 2 +- docs/sql-stmts/drop-materialized-view.md | 2 +- docs/sql-stmts/drop-owned.md | 2 +- docs/sql-stmts/drop-policy.md | 2 +- docs/sql-stmts/drop-procedure.md | 2 +- docs/sql-stmts/drop-protocol.md | 6 +- docs/sql-stmts/drop-resource-group.md | 4 +- docs/sql-stmts/drop-resource-queue.md | 4 +- docs/sql-stmts/drop-role.md | 2 +- docs/sql-stmts/drop-routine.md | 2 +- docs/sql-stmts/drop-rule.md | 4 +- docs/sql-stmts/drop-schema.md | 2 +- docs/sql-stmts/drop-sequence.md | 4 +- docs/sql-stmts/drop-server.md | 4 +- docs/sql-stmts/drop-statistics.md | 2 +- docs/sql-stmts/drop-table.md | 4 +- docs/sql-stmts/drop-tablespace.md | 6 +- .../drop-text-search-configuration.md | 2 +- docs/sql-stmts/drop-text-search-dictionary.md | 2 +- docs/sql-stmts/drop-text-search-parser.md | 2 +- docs/sql-stmts/drop-text-search-template.md | 2 +- docs/sql-stmts/drop-transform.md | 4 +- docs/sql-stmts/drop-trigger.md | 2 +- docs/sql-stmts/drop-type.md | 4 +- docs/sql-stmts/drop-user-mapping.md | 4 +- docs/sql-stmts/drop-user.md | 2 +- docs/sql-stmts/drop-view.md | 4 +- docs/sql-stmts/end.md | 4 +- docs/sql-stmts/execute.md | 2 +- docs/sql-stmts/explain.md | 12 +- docs/sql-stmts/fetch.md | 10 +- docs/sql-stmts/grant.md | 24 +-- docs/sql-stmts/import-foreign-schema.md | 4 +- docs/sql-stmts/index.md | 8 +- docs/sql-stmts/insert.md | 12 +- docs/sql-stmts/load.md | 10 +- docs/sql-stmts/lock.md | 16 +- docs/sql-stmts/move.md | 2 +- docs/sql-stmts/notify.md | 4 +- docs/sql-stmts/prepare.md | 8 +- docs/sql-stmts/reassign-owned.md | 2 +- docs/sql-stmts/refresh-materialized-view.md | 2 +- docs/sql-stmts/reindex.md | 4 +- docs/sql-stmts/release-savepoint.md | 4 +- docs/sql-stmts/reset.md | 2 +- docs/sql-stmts/retrieve.md | 4 +- docs/sql-stmts/revoke.md | 12 +- docs/sql-stmts/rollback-to-savepoint.md | 2 +- docs/sql-stmts/rollback.md | 2 +- docs/sql-stmts/savepoint.md | 2 +- docs/sql-stmts/select-into.md | 2 +- docs/sql-stmts/select.md | 66 ++++---- docs/sql-stmts/set-constraints.md | 4 +- docs/sql-stmts/set-role.md | 2 +- docs/sql-stmts/set-session-authorization.md | 2 +- docs/sql-stmts/set-transaction.md | 14 +- docs/sql-stmts/set.md | 8 +- docs/sql-stmts/show.md | 6 +- docs/sql-stmts/start-transaction.md | 6 +- docs/sql-stmts/truncate.md | 6 +- docs/sql-stmts/unlisten.md | 2 +- docs/sql-stmts/update.md | 12 +- docs/sql-stmts/vacuum.md | 10 +- docs/sql-stmts/values.md | 4 +- docs/start-and-stop-cbdb-database.md | 42 ++--- docs/sys-admin/backup-and-restore/index.md | 10 +- .../perform-full-backup-and-restore.md | 36 ++-- .../perform-incremental-backup-and-restore.md | 4 +- docs/sys-admin/check-database-system.md | 64 ++++---- docs/sys-admin/configure-database-system.md | 14 +- .../sys-admin/enable-coordinator-mirroring.md | 8 +- ...ecommended-maintenance-monitoring-tasks.md | 28 ++-- docs/sys-utilities/analyzedb.md | 6 +- docs/sys-utilities/clusterdb.md | 2 +- docs/sys-utilities/createdb.md | 10 +- docs/sys-utilities/createuser.md | 6 +- docs/sys-utilities/dropuser.md | 2 +- docs/sys-utilities/gpactivatestandby.md | 10 +- docs/sys-utilities/gpaddmirrors.md | 24 +-- docs/sys-utilities/gpbackup.md | 20 +-- docs/sys-utilities/gpcheckcat.md | 18 +- docs/sys-utilities/gpcheckperf.md | 6 +- docs/sys-utilities/gpconfig.md | 14 +- docs/sys-utilities/gpdeletesystem.md | 12 +- docs/sys-utilities/gpdemo.md | 4 +- docs/sys-utilities/gpexpand.md | 26 +-- docs/sys-utilities/gpfdist.md | 26 +-- docs/sys-utilities/gpinitstandby.md | 30 ++-- docs/sys-utilities/gpinitsystem.md | 86 +++++----- docs/sys-utilities/gpload.md | 40 ++--- docs/sys-utilities/gplogfilter.md | 10 +- docs/sys-utilities/gpmemwatcher.md | 6 +- docs/sys-utilities/gpmovemirrors.md | 6 +- docs/sys-utilities/gppkg.md | 12 +- docs/sys-utilities/gprecoverseg.md | 14 +- docs/sys-utilities/gpreload.md | 6 +- docs/sys-utilities/gprestore.md | 18 +- docs/sys-utilities/gpshrink.md | 6 +- docs/sys-utilities/gpssh-exkeys.md | 8 +- docs/sys-utilities/gpssh.md | 2 +- docs/sys-utilities/gpstart.md | 20 +-- docs/sys-utilities/gpstate.md | 34 ++-- docs/sys-utilities/gpstop.md | 18 +- docs/sys-utilities/gpsync.md | 4 +- docs/sys-utilities/index.md | 36 ++-- docs/sys-utilities/pg-checksums.md | 6 +- docs/sys-utilities/pg-config.md | 26 +-- docs/sys-utilities/pg-dump.md | 32 ++-- docs/sys-utilities/pg-dumpall.md | 16 +- docs/sys-utilities/pg-filedump.md | 14 +- docs/sys-utilities/pg-restore.md | 10 +- docs/sys-utilities/psql.md | 28 ++-- docs/sys-utilities/reindexdb.md | 8 +- docs/sys-utilities/vacuumdb.md | 4 +- docs/table-storage-models.md | 10 +- docs/transactional-concurrency-control.md | 22 +-- docs/work-with-transactions.md | 14 +- i18n/zh/code.json | 8 +- .../advanced-analytics/directory-tables.md | 10 +- .../current/advanced-analytics/postgis.md | 24 +-- .../current/basic-query-syntax.md | 6 +- .../current/cbdb-architecture.md | 32 ++-- .../current/cbdb-linux-compile.md | 66 ++++---- .../current/cbdb-macos-compile.md | 42 ++--- .../current/cbdb-op-deploy-guide.md | 24 +-- .../current/cbdb-overview.md | 72 ++++---- .../current/cbdb-scenarios.md | 6 +- .../current/cbdb-vs-gp-features.md | 10 +- .../current/connect-to-cbdb.md | 46 +++--- .../current/create-and-manage-database.md | 28 ++-- .../current/create-and-manage-tables.md | 14 +- .../load-data-from-web-services.md | 10 +- .../data-loading/load-data-overview.md | 6 +- .../data-loading/load-data-using-copy.md | 8 +- .../load-data-using-file-protocol.md | 8 +- .../data-loading/load-data-using-gpfdist.md | 22 +-- .../data-loading/load-data-using-gpload.md | 8 +- .../current/data-types.md | 2 +- .../current/deploy-cbdb-with-single-node.md | 34 ++-- .../current/insert-update-delete-rows.md | 14 +- .../current/manage-roles-and-privileges.md | 2 +- .../parallel-create-ao-refresh-mv.md | 2 +- .../performance/parallel-query-execution.md | 6 +- .../performance/update-stats-using-analyze.md | 6 +- .../use-aggre-pushdown-to-speed-up-queries.md | 14 +- ...uto-materialized-view-to-answer-queries.md | 4 +- .../use-incremental-materialized-view.md | 12 +- .../use-index-scan-on-ao-tables.md | 8 +- .../use-runtimefilter-to-optimize-queries.md | 8 +- .../use-unique-index-on-ao-tables.md | 4 +- .../current/query-performance-overview.md | 8 +- .../security/transparent-data-encryption.md | 10 +- .../current/set-password-profile.md | 14 +- .../current/sql-stmts/sql-stmt-abort.md | 4 +- .../sql-stmts/sql-stmt-alter-database.md | 8 +- .../current/sql-stmts/sql-stmt-alter-rule.md | 2 +- .../sql-stmts/sql-stmt-create-database.md | 4 +- .../sql-stmts/sql-stmt-create-index.md | 10 +- .../sql-stmts/sql-stmt-create-tablespace.md | 18 +- .../sql-stmts/sql-stmt-drop-database.md | 2 +- .../sql-stmts/sql-stmt-drop-extension.md | 10 +- .../sql-stmts/sql-stmt-drop-external-table.md | 4 +- .../current/sql-stmts/sql-stmt-drop-index.md | 6 +- .../sql-stmt-drop-materialized-view.md | 6 +- .../current/sql-stmts/sql-stmt-drop-role.md | 2 +- .../current/sql-stmts/sql-stmt-drop-rule.md | 6 +- .../current/sql-stmts/sql-stmt-drop-schema.md | 6 +- .../current/sql-stmts/sql-stmt-drop-table.md | 6 +- .../sql-stmts/sql-stmt-drop-tablespace.md | 6 +- .../current/sql-stmts/sql-stmt-drop-type.md | 6 +- .../current/sql-stmts/sql-stmt-drop-user.md | 2 +- .../current/sql-stmts/sql-stmt-drop-view.md | 6 +- .../current/sql-stmts/sql-stmt-end.md | 4 +- .../sql-stmt-rollback-to-savepoint.md | 2 +- .../current/sql-stmts/sql-stmt-rollback.md | 2 +- .../current/sql-stmts/sql-stmt-savepoint.md | 2 +- .../current/sql-stmts/sql-stmt-set-role.md | 2 +- .../sql-stmts/sql-stmt-set-transaction.md | 16 +- .../sql-stmts/sql-stmt-start-transaction.md | 6 +- .../current/sql-stmts/sql-stmt-truncate.md | 6 +- .../current/start-and-stop-cbdb-database.md | 38 ++--- .../sys-admin/check-database-system.md | 2 +- .../current/sys-utilities/db-util-gpdemo.md | 4 +- .../current/sys-utilities/gpdemo.md | 4 +- .../current/sys-utilities/gpshrink.md | 6 +- .../current/sys-utilities/pg-filedump.md | 14 +- .../current/table-storage-models.md | 2 +- .../transactional-concurrency-control.md | 18 +- .../current/work-with-transactions.md | 14 +- src/components/bootcamp/TextPic/index.tsx | 4 +- src/components/common/Layout/index.tsx | 2 +- src/consts/bootcamp.tsx | 4 +- ...o-database-and-cloudberry-architecture.md} | 0 .../101-6-backup-and-recovery-operations.md | 4 +- src/pages/bootcamp/102-cbdb-crash-course.md | 6 +- ...on-to-cloudberry-in-database-analytics.md} | 0 328 files changed, 1915 insertions(+), 1915 deletions(-) rename src/pages/bootcamp/{101-0-introduction-to-database-and-cloudberrydb-architecture.md => 101-0-introduction-to-database-and-cloudberry-architecture.md} (100%) rename src/pages/bootcamp/{104-1-introduction-to-cloudberrydb-in-database-analytics.md => 104-1-introduction-to-cloudberry-in-database-analytics.md} (100%) diff --git a/docs/advanced-analytics/directory-tables.md b/docs/advanced-analytics/directory-tables.md index ae15745049..55290d155b 100644 --- a/docs/advanced-analytics/directory-tables.md +++ b/docs/advanced-analytics/directory-tables.md @@ -4,11 +4,11 @@ title: Directory Table # Directory Table -Cloudberry Database has introduced *directory tables* in v1.5.3 for unified management of unstructured data on local or object storage. +Apache Cloudberry has introduced *directory tables* in v1.5.3 for unified management of unstructured data on local or object storage. In the context of large-scale AI, AI applications have generated the need to manage unstructured multi-modal corpora. There is a need to continuously prepare a large amount of high-quality curated unstructured corpora, train large models through data iteration, and summarize rich knowledge bases. Therefore, there are technical challenges in the management and processing of structured corpora. -To address these challenges, Cloudberry Database introduces directory tables for managing multiple types of unstructured data. Developer users can use simple SQL statements to take advantage of the capabilities of multiple computing engines to achieve one-stop data processing and application development. +To address these challenges, Apache Cloudberry introduces directory tables for managing multiple types of unstructured data. Developer users can use simple SQL statements to take advantage of the capabilities of multiple computing engines to achieve one-stop data processing and application development. Directory tables store, manage, and analyze unstructured data objects. They reside within tablespaces. When unstructured data files are imported, a directory table record (file metadata) is created, and the file itself is loaded into object storage. The table metadata remains associated with the corresponding object storage file. @@ -40,7 +40,7 @@ CREATE DIRECTORY TABLE ; To create a directory table in an external storage, you first need to create a tablespace in that storage. You'll need to provide connection information of the external storage server, such as server IP address, protocol, and access credentials. The following examples show how to create directory tables on QingCloud Object Storage and HDFS. -1. Create server objects and define connection methods for external data sources. Cloudberry Database supports protocols for multiple storage options, including S3 object storage and HDFS. The following examples create server objects named `oss_server` and `hdfs_server` on QingCloud and HDFS, respectively. +1. Create server objects and define connection methods for external data sources. Apache Cloudberry supports protocols for multiple storage options, including S3 object storage and HDFS. The following examples create server objects named `oss_server` and `hdfs_server` on QingCloud and HDFS, respectively. - For QingCloud: @@ -58,7 +58,7 @@ To create a directory table in an external storage, you first need to create a t - `protocol`: The protocol used to connect to the external data source. In the examples above, `'qingstor'` indicates using the QingCloud object storage service protocol, and `'hdfs'` indicates using the HDFS storage service protocol. - `prefix`: Sets the path prefix when accessing object storage. If this prefix is set, all operations will be limited to this specific path, such as `prefix '/rose-oss-test4/usf1'`. This is typically used to organize and isolate data stored in the same bucket. - - `endpoint`: Specifies the network address of the external object storage service. For example, `'pek3b.qingstor.com'` is a specific regional node of the QingCloud service. Through this endpoint, Cloudberry Database can access external data. + - `endpoint`: Specifies the network address of the external object storage service. For example, `'pek3b.qingstor.com'` is a specific regional node of the QingCloud service. Through this endpoint, Apache Cloudberry can access external data. - `https`: Specifies whether to connect to the object storage service using the HTTPS protocol. In this command, `'false'` indicates using an unencrypted HTTP connection. This setting might be influenced by data transmission security requirements, and it is generally recommended to use HTTPS to ensure data security. - `virtual_host`: Determines whether to access the bucket using virtual hosting. `'false'` means that bucket access is not done in virtual host style (which means that the bucket name is not included in the URL). This option is typically dependent on the URL format support provided by the storage service provider. - `namenode`: Represents the IP of the HDFS node. You need to replace `` with the actual IP address and port number, such as `'192.168.51.106:8020'`. @@ -134,7 +134,7 @@ In general, the fields of a directory table are as follows: ### Upload file into directory table -After uploading a file to a directory table, Cloudberry Database manages the file's upload to local storage or object storage and stores the file's metadata in the directory table. In Cloudberry Database v1.5.3, users cannot directly manage object storage directory files. +After uploading a file to a directory table, Apache Cloudberry manages the file's upload to local storage or object storage and stores the file's metadata in the directory table. In Apache Cloudberry v1.5.3, users cannot directly manage object storage directory files. Upload files from local storage to database object storage: diff --git a/docs/advanced-analytics/postgis.md b/docs/advanced-analytics/postgis.md index af562dc74e..fded9b0bca 100644 --- a/docs/advanced-analytics/postgis.md +++ b/docs/advanced-analytics/postgis.md @@ -4,17 +4,17 @@ title: Geospatial Analytics # Geospatial Analytics -[PostGIS](https://postgis.net/) extends the capabilities of the PostgreSQL by adding support for storing, indexing, and querying geospatial data. Cloudberry Database supports PostGIS for geospatial analytics. +[PostGIS](https://postgis.net/) extends the capabilities of the PostgreSQL by adding support for storing, indexing, and querying geospatial data. Apache Cloudberry supports PostGIS for geospatial analytics. -This document introduces how to compile and build PostGIS for your Cloudberry Database cluster. +This document introduces how to compile and build PostGIS for your Apache Cloudberry cluster. -You can access the Cloudberry Database PostGIS project repo at [`cloudberrydb/postgis`](https://github.com/cloudberrydb/postgis). The PostGIS code in this repo is dedicated to Cloudberry Database. The compilation and building method introduced in this document is based on the code of this repo. +You can access the PostGIS for Apache Cloudberry project repo at [`cloudberry-contrib/postgis`](https://github.com/cloudberry-contrib/postgis). The PostGIS code in this repo is dedicated to Apache Cloudberry. The compilation and building method introduced in this document is based on the code of this repo. -## Compile PostGIS for Cloudberry Database +## Compile PostGIS for Apache Cloudberry -Before installing PostGIS for Cloudberry Database, install the required dependencies and compile several components. This process is currently supported only on CentOS, with plans to support Rocky Linux in the future. +Before installing PostGIS for Apache Cloudberry, install the required dependencies and compile several components. This process is currently supported only on CentOS, with plans to support Rocky Linux in the future. -Before you get started, ensure that the Cloudberry Database is correctly installed on your machine. If it is not installed, see the [documentation](https://cloudberrydb.org/docs/) for installation instructions. +Before you get started, ensure that the Apache Cloudberry is correctly installed on your machine. If it is not installed, see the [documentation](https://cloudberry.apache.org/docs/) for installation instructions. 1. Install the pre-requested dependencies. @@ -93,10 +93,10 @@ Before you get started, ensure that the Cloudberry Database is correctly install 3. Build and install PostGIS. - 1. Download the `cloudberrydb/postgis` repo to your `/home/gpadmin` directory: + 1. Download the `cloudberry-contrib/postgis` repo to your `/home/gpadmin` directory: ```bash - git clone https://github.com/cloudberrydb/postgis.git /home/gpadmin/postgis + git clone https://github.com/cloudberry-contrib/postgis.git /home/gpadmin/postgis chown -R gpadmin:gpadmin /home/gpadmin/postgis ``` @@ -105,8 +105,8 @@ Before you get started, ensure that the Cloudberry Database is correctly install Before starting the compilation process, run the following commands to make sure the environment variables are set ready: ```bash - source /usr/local/cloudberrydb/greenplum_path.sh - source /home/gpadmin/cloudberrydb/gpAux/gpdemo/gpdemo-env.sh + source /usr/local/cloudberry/greenplum_path.sh + source /home/gpadmin/cloudberry/gpAux/gpdemo/gpdemo-env.sh scl enable devtoolset-10 bash source /opt/rh/devtoolset-10/enable ``` @@ -120,9 +120,9 @@ Before you get started, ensure that the Cloudberry Database is correctly install make && make install ``` -## Use PostGIS in Cloudberry Database +## Use PostGIS in Apache Cloudberry -After you have compiled and built PostGIS and the supporting extensions successfully on your Cloudberry Database cluster and have started the demo cluster, you can run the following commands to enable PostGIS and the supporting extensions: +After you have compiled and built PostGIS and the supporting extensions successfully on your Apache Cloudberry cluster and have started the demo cluster, you can run the following commands to enable PostGIS and the supporting extensions: ```sql $ psql -p 7000 postgres diff --git a/docs/basic-query-syntax.md b/docs/basic-query-syntax.md index f2e51ff736..015f6d27f4 100644 --- a/docs/basic-query-syntax.md +++ b/docs/basic-query-syntax.md @@ -2,11 +2,11 @@ title: Basic Query Syntax --- -# Basic Queries of Cloudberry Database +# Basic Queries of Apache Cloudberry -This document introduce the basic queries of Cloudberry Database. +This document introduce the basic queries of Apache Cloudberry. -Cloudberry Database is a high-performance, highly parallel data warehouse developed based on PostgreSQL and Greenplum. Here are some examples of the basic query syntax. +Apache Cloudberry is a high-performance, highly parallel data warehouse developed based on PostgreSQL and Greenplum. Here are some examples of the basic query syntax. - `SELECT`: Used to retrieve data from databases & tables. @@ -59,7 +59,7 @@ Cloudberry Database is a high-performance, highly parallel data warehouse develo WHERE department_id IN (SELECT id FROM departments WHERE location = 'New York'); -- Queries all employees working in New York. ``` -The above is just a brief overview of the basic query syntax in Cloudberry Database. Cloudberry Database also provides more advanced queries and functions to help developers perform complex data operations and analyses. +The above is just a brief overview of the basic query syntax in Apache Cloudberry. Apache Cloudberry also provides more advanced queries and functions to help developers perform complex data operations and analyses. ## See also diff --git a/docs/cbdb-architecture.md b/docs/cbdb-architecture.md index 22a7cf5955..606f24ba44 100644 --- a/docs/cbdb-architecture.md +++ b/docs/cbdb-architecture.md @@ -2,37 +2,37 @@ title: Architecture --- -# Cloudberry Database Architecture +# Apache Cloudberry Architecture -This document introduces the product architecture and the implementation mechanism of the internal modules in Cloudberry Database. +This document introduces the product architecture and the implementation mechanism of the internal modules in Apache Cloudberry. -In most cases, Cloudberry Database is similar to PostgreSQL in terms of SQL support, features, configuration options, and user functionalities. Users can interact with Cloudberry Database in a similar way to how they interact with a standalone PostgreSQL system. +In most cases, Apache Cloudberry is similar to PostgreSQL in terms of SQL support, features, configuration options, and user functionalities. Users can interact with Apache Cloudberry in a similar way to how they interact with a standalone PostgreSQL system. -Cloudberry Database uses MPP (Massively Parallel Processing) architecture to store and process large volumes of data, by distributing data and computing workloads across multiple servers or hosts. +Apache Cloudberry uses MPP (Massively Parallel Processing) architecture to store and process large volumes of data, by distributing data and computing workloads across multiple servers or hosts. -MPP, known as the shared-nothing architecture, refers to systems with multiple hosts that work together to perform a task. Each host has its own processor, memory, disk, network resources, and operating system. Cloudberry Database uses this high-performance architecture to distribute data loads and can use all system resources in parallel to process queries. +MPP, known as the shared-nothing architecture, refers to systems with multiple hosts that work together to perform a task. Each host has its own processor, memory, disk, network resources, and operating system. Apache Cloudberry uses this high-performance architecture to distribute data loads and can use all system resources in parallel to process queries. -From users' view, Cloudberry Database is a complete relational database management system (RDBMS). In a physical view, it contains multiple PostgreSQL instances. To make these independent PostgreSQL instances work together, Cloudberry Database performs distributed cluster processing at different levels for data storage, computing, communication, and management. Cloudberry Database hides the complex details of the distributed system, giving users a single logical database view. This greatly eases the work of developers and operational staff. +From users' view, Apache Cloudberry is a complete relational database management system (RDBMS). In a physical view, it contains multiple PostgreSQL instances. To make these independent PostgreSQL instances work together, Apache Cloudberry performs distributed cluster processing at different levels for data storage, computing, communication, and management. Apache Cloudberry hides the complex details of the distributed system, giving users a single logical database view. This greatly eases the work of developers and operational staff. -The architecture diagram of Cloudberry Database is as follows: +The architecture diagram of Apache Cloudberry is as follows: -![Cloudberry Database Architecture](./media/cbdb-arch.png) +![Apache Cloudberry Architecture](./media/cbdb-arch.png) -- **Coordinator node** (or control node) is the gateway to the Cloudberry Database system, which accepts client connections and SQL queries, and allocates tasks to data node instances. Users interact with Cloudberry Database by connecting to the coordinator node using a client program (such as psql) or an application programming interface (API) (such as JDBC, ODBC, or libpq PostgreSQL C API). - - The coordinator node acts as the global system directory, containing a set of system tables that record the metadata of Cloudberry Database. +- **Coordinator node** (or control node) is the gateway to the Apache Cloudberry system, which accepts client connections and SQL queries, and allocates tasks to data node instances. Users interact with Apache Cloudberry by connecting to the coordinator node using a client program (such as psql) or an application programming interface (API) (such as JDBC, ODBC, or libpq PostgreSQL C API). + - The coordinator node acts as the global system directory, containing a set of system tables that record the metadata of Apache Cloudberry. - The coordinator node does not store user data. User data is stored only in data node instances. - The coordinator node performs authentication for client connections, processes SQL commands, distributes workload among segments, coordinates the results returned by each segment, and returns the final results to the client program. - - Cloudberry Database uses Write Ahead Logging (WAL) for coordinator/standby mirroring. In WAL-based logging, all modifications are first written to a log before being written to the disk, which ensures the data integrity of in-process operations. + - Apache Cloudberry uses Write Ahead Logging (WAL) for coordinator/standby mirroring. In WAL-based logging, all modifications are first written to a log before being written to the disk, which ensures the data integrity of in-process operations. - **Segment** (or data node) instances are individual Postgres processes, each storing a portion of the data and executing the corresponding part of the query. When a user connects to the database through the coordinator node and submits a query request, a process is created on each segment node to handle the query. User-defined tables and their indexes are distributed across the available segments, and each segment node contains distinct portions of the data. The processes of data processing runs in the corresponding segment. Users interact with segments through the coordinator, and the segment operate on servers known as the segment host. - Typically, a segment host runs 2 to 8 data nodes, depending on the processor, memory, storage, network interface, and workload. The configuration of the segment host needs to be balanced, because evenly distributing the data and workload among segments is the key to achieving optimal performance with Cloudberry Database, which allows all segments to start processing a task and finish the work at the same time. + Typically, a segment host runs 2 to 8 data nodes, depending on the processor, memory, storage, network interface, and workload. The configuration of the segment host needs to be balanced, because evenly distributing the data and workload among segments is the key to achieving optimal performance with Apache Cloudberry, which allows all segments to start processing a task and finish the work at the same time. -- **Interconnect** is the network layer in the Cloudberry Database system architecture. Interconnect refers to the network infrastructure upon which the communication between the coordinator node and the segments relies, which uses a standard Ethernet switching structure. +- **Interconnect** is the network layer in the Apache Cloudberry system architecture. Interconnect refers to the network infrastructure upon which the communication between the coordinator node and the segments relies, which uses a standard Ethernet switching structure. - For performance reasons, a 10 GB or faster network is recommended. By default, the Interconnect module uses the UDP protocol with flow control (UDPIFC) for communication to send messages through the network. The data packet verification performed by Cloudberry Database exceeds the scope provided by UDP, which means that its reliability is equivalent to using the TCP protocol, and its performance and scalability surpass the TCP protocol. If the Interconnect is changed to the TCP protocol instead, the scalability of Cloudberry Database is limited to 1000 segments. This limit does not apply when UDPIFC is used as the default protocol. + For performance reasons, a 10 GB or faster network is recommended. By default, the Interconnect module uses the UDP protocol with flow control (UDPIFC) for communication to send messages through the network. The data packet verification performed by Apache Cloudberry exceeds the scope provided by UDP, which means that its reliability is equivalent to using the TCP protocol, and its performance and scalability surpass the TCP protocol. If the Interconnect is changed to the TCP protocol instead, the scalability of Apache Cloudberry is limited to 1000 segments. This limit does not apply when UDPIFC is used as the default protocol. -- Cloudberry Database uses Multiversion Concurrency Control (MVCC) to ensure data consistency. When querying the database, each transaction only sees a snapshot of the data, ensuring that current transactions do not see modifications made by other transactions on the same records. In this way, MVCC provides transaction isolation in the database. +- Apache Cloudberry uses Multiversion Concurrency Control (MVCC) to ensure data consistency. When querying the database, each transaction only sees a snapshot of the data, ensuring that current transactions do not see modifications made by other transactions on the same records. In this way, MVCC provides transaction isolation in the database. MVCC minimizes lock contention to ensure performance in a multi-user environment. This is done by avoiding explicit locking for database transactions. diff --git a/docs/cbdb-linux-compile.md b/docs/cbdb-linux-compile.md index a4feea0841..29442687cb 100644 --- a/docs/cbdb-linux-compile.md +++ b/docs/cbdb-linux-compile.md @@ -5,28 +5,28 @@ title: On Linux import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Compile and Install Cloudberry Database on Linux +# Compile and Install Apache Cloudberry on Linux :::info -The source of this document is from the GitHub repository [`cloudberrydb/cloudberrydb`](https://github.com/cloudberrydb/cloudberrydb/blob/main/deploy/build/README.Linux.md). +The source of this document is from the GitHub repository [`apache/cloudberry`](https://github.com/apache/cloudberry/blob/main/deploy/build/README.Linux.md). ::: -This document shares how to compile and install Cloudberry Database on Linux systems (CentOS 7, RHEL, and Ubuntu). Note that this document is for developers to try out Cloudberry Database in a single-node environments. DO NOT use this document for production environments. +This document shares how to compile and install Apache Cloudberry on Linux systems (CentOS 7, RHEL, and Ubuntu). Note that this document is for developers to try out Apache Cloudberry in a single-node environments. DO NOT use this document for production environments. -Take the following steps to compile and install Cloudberry Database: +Take the following steps to compile and install Apache Cloudberry: 1. [Clone GitHub repo](#step-1-clone-github-repo). 2. [Install dependencies](#step-2-install-dependencies). 3. [Perform prerequisite platform tasks](#step-3-perform-prerequisite-platform-tasks). -4. [Build Cloudberry Database](#step-4-build-cloudberry-database). +4. [Build Apache Cloudberry](#step-4-build-apache-database). 5. [Verify the cluster](#step-5-verify-the-cluster). ## Step 1. Clone GitHub repo -Clone the GitHub repository `cloudberrydb/cloudberrydb` to the target machine: +Clone the GitHub repository `apache/cloudberry` to the target machine: ```shell -git clone https://github.com/cloudberrydb/cloudberrydb.git +git clone https://github.com/apache/cloudberry.git ``` ## Step 2. Install dependencies @@ -38,10 +38,10 @@ Enter the repository and install dependencies according to your operating system The following steps work on CentOS 7. For other CentOS versions, these steps might work but are not guaranteed to work. -1. Run the Bash script `README.CentOS.bash` in the `deploy/build` directory of the `cloudberrydb/cloudberrydb` repository. To run this script, password is required. Then, some required dependencies will be automatically downloaded. +1. Run the Bash script `README.CentOS.bash` in the `deploy/build` directory of the `apache/cloudberry` repository. To run this script, password is required. Then, some required dependencies will be automatically downloaded. ```bash - cd cloudberrydb/deploy/build + cd cloudberry/deploy/build ./README.CentOS.bash ``` @@ -90,7 +90,7 @@ The following steps work on CentOS 7. For other CentOS versions, these steps mig 3. Install more dependencies by running the `README.Rhel-Rocky.bash` script. ```bash - cd ~/cloudberrydb/deploy/build/ + cd ~/cloudberry/deploy/build/ ./README.Rhel-Rocky.bash ``` @@ -101,7 +101,7 @@ The following steps work on CentOS 7. For other CentOS versions, these steps mig ```shell ## You need to enter your password to run. - sudo ~/cloudberrydb/deploy/build/README.Ubuntu.bash + sudo ~/cloudberry/deploy/build/README.Ubuntu.bash ``` :::info @@ -124,7 +124,7 @@ The following steps work on CentOS 7. For other CentOS versions, these steps mig ## Step 3. Perform prerequisite platform tasks -After you have installed all the dependencies for your operating system, it is time to do some prerequisite platform tasks before you go on building Cloudberry Database. These operation include manually running `ldconfig` on all platforms, creating the `gpadmin` user, and setting up a password to start the Cloudberry Database and test. +After you have installed all the dependencies for your operating system, it is time to do some prerequisite platform tasks before you go on building Apache Cloudberry. These operation include manually running `ldconfig` on all platforms, creating the `gpadmin` user, and setting up a password to start the Apache Cloudberry and test. 1. Make sure that you add `/usr/local/lib` and `/usr/local/lib64` to the `/etc/ld.so.conf` file. @@ -162,22 +162,22 @@ After you have installed all the dependencies for your operating system, it is t -## Step 4. Build Cloudberry Database +## Step 4. Build Apache Cloudberry -After you have installed all the dependencies and performed the prerequisite platform tasks, you can start to build Cloudberry Database. Run the following commands in sequence. +After you have installed all the dependencies and performed the prerequisite platform tasks, you can start to build Apache Cloudberry. Run the following commands in sequence. -1. Configure the build environment. Enter the `cloudberrydb` directory and run the `configure` script. +1. Configure the build environment. Enter the `cloudberry` directory and run the `configure` script. ```bash - cd cloudberrydb - ./configure --with-perl --with-python --with-libxml --with-gssapi --prefix=/usr/local/cloudberrydb + cd cloudberry + ./configure --with-perl --with-python --with-libxml --with-gssapi --prefix=/usr/local/cloudberry ``` :::info - Cloudberry Database is built with GPORCA by default. If you want to build CBDB without GPORCA, add the `--disable-orca` flag in the `./configure` command. + Apache Cloudberry is built with GPORCA by default. If you want to build CBDB without GPORCA, add the `--disable-orca` flag in the `./configure` command. ```bash - ./configure --disable-orca --with-perl --with-python --with-libxml --prefix=/usr/local/cloudberrydb + ./configure --disable-orca --with-perl --with-python --with-libxml --prefix=/usr/local/cloudberry ``` ::: @@ -193,12 +193,12 @@ After you have installed all the dependencies and performed the prerequisite pla ```bash cd .. - cp -r cloudberrydb/ /home/gpadmin/ + cp -r cloudberry/ /home/gpadmin/ cd /home/gpadmin/ - chown -R gpadmin:gpadmin cloudberrydb/ + chown -R gpadmin:gpadmin cloudberry/ su - gpadmin - cd cloudberrydb/ - source /usr/local/cloudberrydb/greenplum_path.sh + cd cloudberry/ + source /usr/local/cloudberry/greenplum_path.sh ``` 4. Start the demo cluster. @@ -238,7 +238,7 @@ After you have installed all the dependencies and performed the prerequisite pla ps -ef | grep postgres ``` -2. Connect to the Cloudberry Database and see the active segment information by querying the system table `gp_segement_configuration`. For detailed description of this table, see the Greenplum document [here](https://docs.vmware.com/en/VMware-Greenplum/6/greenplum-database/ref_guide-system_catalogs-gp_segment_configuration.html). +2. Connect to the Apache Cloudberry and see the active segment information by querying the system table `gp_segement_configuration`. For detailed description of this table, see the Greenplum document [here](https://docs.vmware.com/en/VMware-Greenplum/6/greenplum-database/ref_guide-system_catalogs-gp_segment_configuration.html). ```sql $ psql -p 7000 postgres @@ -248,19 +248,19 @@ After you have installed all the dependencies and performed the prerequisite pla postgres=# select version(); version ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - PostgreSQL 14.4 (Cloudberry Database 1.0.0+1c0d6e2224 build dev) on x86_64( GCC 13.2.0) 13.2.0, 64-bit compiled on Sep 22 2023 10:56:01 + PostgreSQL 14.4 (Apache Cloudberry 1.0.0+1c0d6e2224 build dev) on x86_64( GCC 13.2.0) 13.2.0, 64-bit compiled on Sep 22 2023 10:56:01 (1 row) postgres=# select * from gp_segment_configuration; dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir | warehouseid ------+---------+------+----------------+------+--------+------+------------+------------+------------------------------------------------------------------------------+------------- - 1 | -1 | p | p | n | u | 7000 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 | 0 - 8 | -1 | m | m | s | u | 7001 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/standby | 0 - 3 | 1 | p | p | s | u | 7003 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast2/demoDataDir1 | 0 - 6 | 1 | m | m | s | u | 7006 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1 | 0 - 2 | 0 | p | p | s | u | 7002 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 | 0 - 5 | 0 | m | m | s | u | 7005 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 | 0 - 4 | 2 | p | p | s | u | 7004 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2 | 0 - 7 | 2 | m | m | s | u | 7007 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2 | 0 + 1 | -1 | p | p | n | u | 7000 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 | 0 + 8 | -1 | m | m | s | u | 7001 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/standby | 0 + 3 | 1 | p | p | s | u | 7003 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast2/demoDataDir1 | 0 + 6 | 1 | m | m | s | u | 7006 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1 | 0 + 2 | 0 | p | p | s | u | 7002 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 | 0 + 5 | 0 | m | m | s | u | 7005 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 | 0 + 4 | 2 | p | p | s | u | 7004 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2 | 0 + 7 | 2 | m | m | s | u | 7007 | i-6wvpa9wt | i-6wvpa9wt | /home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2 | 0 (8 rows) - ``` \ No newline at end of file + ``` diff --git a/docs/cbdb-macos-compile.md b/docs/cbdb-macos-compile.md index fe831c6165..9ec77e44ff 100644 --- a/docs/cbdb-macos-compile.md +++ b/docs/cbdb-macos-compile.md @@ -2,13 +2,13 @@ title: On macOS --- -# Compile and Install Cloudberry Database on macOS +# Compile and Install Apache Cloudberry on macOS :::info -The source of this document is from the GitHub repository [`cloudberrydb/cloudberrydb`](https://github.com/cloudberrydb/cloudberrydb/blob/main/deploy/build/README.macOS.md). +The source of this document is from the GitHub repository [`apache/cloudberry`](https://github.com/apache/cloudberry/blob/main/deploy/build/README.macOS.md). ::: -This document shares how to build, compile, and install Cloudberry Database on macOS (single node) for development and trial purposes. Follow the steps below. +This document shares how to build, compile, and install Apache Cloudberry on macOS (single node) for development and trial purposes. Follow the steps below. According to our test, these steps work well on macOS Ventura 13.4+ with both Intel and Apple silicon processors (M1 or M2). If you have an older version of macOS, upgrading is recommended. Make sure that the Mac you use has at least 4 cores and 8 GB memory, and is connected to the Internet. @@ -18,16 +18,16 @@ DO NOT use this guide for production deployment. ## Step 1. Install needed dependencies -1. Clone the source code of Cloudberry Database from GitHub to your local Mac. +1. Clone the source code of Apache Cloudberry from GitHub to your local Mac. ```bash - git clone git@github.com:cloudberrydb/cloudberrydb.git + git clone git@github.com:apache/cloudberry.git ``` -2. Enter the `cloudberrydb/` directory. +2. Enter the `cloudberry/` directory. ```bash - cd cloudberrydb/ + cd cloudberry/ ``` 3. Run the following command to install the needed dependencies. You will be asked to enter the `sudo` password of your macOS system. @@ -77,17 +77,17 @@ DO NOT use this guide for production deployment. ## Step 3. Configure, compile, and install ```bash -# Run the following commands under the `cloudberrydb/` dir. +# Run the following commands under the `cloudberry/` dir. # 1. Configure the build environment. BREWPREFIX=$(brew --prefix); export PATH="$BREWPREFIX/opt/gnu-sed/libexec/gnubin:$BREWPREFIX/opt/apr/bin:$PATH"; CXXFLAGS="-I $BREWPREFIX/include" CFLAGS="-ggdb -Og -g3 -fno-omit-frame-pointer -I $BREWPREFIX/include" LDFLAGS="-L $BREWPREFIX/lib" CC=$(which gcc-13) CXX=$(which g++-13) ./configure --enable-debug --prefix=$(cd ~; pwd)/install/cbdb; -# 2. Compile and install Cloudberry Database. +# 2. Compile and install Apache Cloudberry. make -j8 make -j8 install -# 3. Bring in Greenplum environment for Cloudberry Database into your running shell. +# 3. Bring in Greenplum environment for Apache Cloudberry into your running shell. source $(cd ~; pwd)/install/cbdb/greenplum_path.sh @@ -114,7 +114,7 @@ source gpAux/gpdemo/gpdemo-env.sh ps -ef | grep postgres ``` -2. Connect to the Cloudberry Database and see the active segment information by querying the system table `gp_segement_configuration`. For detailed description of this table, see the Greenplum document [here](https://docs.vmware.com/en/VMware-Greenplum/7/greenplum-database/ref_guide-system_catalogs-gp_segment_configuration.html). +2. Connect to the Apache Cloudberry and see the active segment information by querying the system table `gp_segement_configuration`. For detailed description of this table, see the Greenplum document [here](https://docs.vmware.com/en/VMware-Greenplum/7/greenplum-database/ref_guide-system_catalogs-gp_segment_configuration.html). ```sql $ psql -p 8000 postgres @@ -132,20 +132,20 @@ source gpAux/gpdemo/gpdemo-env.sh postgres=# select version(); version ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - PostgreSQL 14.4 (Cloudberry Database 1.0.0+1c0d6e2224 build dev) on x86_64-apple-darwin22.4.0, compiled by gcc-13 (Homebrew GCC 13.2.0) 13.2.0, 64-bit compiled on Sep 22 2023 10:56:01 + PostgreSQL 14.4 (Apache Cloudberry 1.0.0+1c0d6e2224 build dev) on x86_64-apple-darwin22.4.0, compiled by gcc-13 (Homebrew GCC 13.2.0) 13.2.0, 64-bit compiled on Sep 22 2023 10:56:01 (1 row) postgres=# select * from gp_segment_configuration; dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir | warehouseid ------+---------+------+----------------+------+--------+------+-----------------------------+-----------------------------+----------------------------------------------------------------------------------------------------------+------------- - 1 | -1 | p | p | n | u | 8000 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 | 0 - 8 | -1 | m | m | s | u | 8001 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/standby | 0 - 3 | 1 | p | p | s | u | 8003 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/dbfast2/demoDataDir1 | 0 - 6 | 1 | m | m | s | u | 8006 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1 | 0 - 2 | 0 | p | p | s | u | 8002 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 | 0 - 5 | 0 | m | m | s | u | 8005 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 | 0 - 4 | 2 | p | p | s | u | 8004 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2 | 0 - 7 | 2 | m | m | s | u | 8007 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2 | 0 + 1 | -1 | p | p | n | u | 8000 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 | 0 + 8 | -1 | m | m | s | u | 8001 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/standby | 0 + 3 | 1 | p | p | s | u | 8003 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/dbfast2/demoDataDir1 | 0 + 6 | 1 | m | m | s | u | 8006 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1 | 0 + 2 | 0 | p | p | s | u | 8002 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 | 0 + 5 | 0 | m | m | s | u | 8005 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 | 0 + 4 | 2 | p | p | s | u | 8004 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2 | 0 + 7 | 2 | m | m | s | u | 8007 | cbdb.local | cbdb.local | /Users/cbdb/Documents/GitHub/upstream/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2 | 0 (8 rows) postgres=# @@ -158,4 +158,4 @@ source gpAux/gpdemo/gpdemo-env.sh make installcheck-world ``` -Congratulations 🎉! You've successfully installed and created a CloudberryDB cluster. Happy Hacking! 😉 +Congratulations 🎉! You've successfully installed and created a cloudberry cluster. Happy Hacking! 😉 diff --git a/docs/cbdb-op-deploy-guide.md b/docs/cbdb-op-deploy-guide.md index 998ba4577f..5f12377008 100644 --- a/docs/cbdb-op-deploy-guide.md +++ b/docs/cbdb-op-deploy-guide.md @@ -32,14 +32,14 @@ After the preparation, it is time to install Apache Cloudberry. You need to down 2. Install the RPM package in the `/home/gpadmin` directory. - When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry-db/` is automatically created. + When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry/` is automatically created. ```bash cd /home/gpadmin yum install ``` -3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry-db/` directory. +3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry/` directory. ```bash chown -R gpadmin:gpadmin /usr/local @@ -74,14 +74,14 @@ After the preparation, it is time to install Apache Cloudberry. You need to down 1. Run `ssh-keygen` on each host to generate SSH key. For example: ```bash - [gpadmin@cbbd-coordinator cloudberry-db-1.0.0]$ ssh-keygen + [gpadmin@cbbd-coordinator cloudberry-1.0.0]$ ssh-keygen Generating public/private rsa key pair. - Enter file in which to save the key (/usr/local/cloudberry-db/.ssh/id_rsa): + Enter file in which to save the key (/usr/local/cloudberry/.ssh/id_rsa): Enter passphrase (empty for no passphrase): Enter same passphrase again: - Your identification has been saved in /usr/local/cloudberry-db/.ssh/id_rsa. - Your public key has been saved in /usr/local/cloudberry-db/.ssh/id_rsa.pub. + Your identification has been saved in /usr/local/cloudberry/.ssh/id_rsa. + Your public key has been saved in /usr/local/cloudberry/.ssh/id_rsa.pub. The key fingerprint is: SHA256:cvcYS87egYCyh/v6UtdqrejVU5qqF7OvpcHg/T9lRrg gpadmin@cbbd-coordinator The key's randomart image is: @@ -113,15 +113,15 @@ After the preparation, it is time to install Apache Cloudberry. You need to down ```bash [gpadmin@cbdb-coordinator ~]$ gpssh -f all_hosts => pwd - [ cbdb-datanode03] b'/usr/local/cloudberry-db\r' - [ cbdb-coordinator] b'/usr/local/cloudberry-db\r' - [ cbdb-datanode02] b'/usr/local/cloudberry-db\r' - [cbdb-standbycoordinator] b'/usr/local/cloudberry-db\r' - [ cbdb-datanode01] b'/usr/local/cloudberry-db\r' + [ cbdb-datanode03] b'/usr/local/cloudberry\r' + [ cbdb-coordinator] b'/usr/local/cloudberry\r' + [ cbdb-datanode02] b'/usr/local/cloudberry\r' + [cbdb-standbycoordinator] b'/usr/local/cloudberry\r' + [ cbdb-datanode01] b'/usr/local/cloudberry\r' => ``` - If you fail to run `gpssh`, you can first run `source /usr/local/cloudberry-db/greenplum_path.sh` on the coordinator node. + If you fail to run `gpssh`, you can first run `source /usr/local/cloudberry/greenplum_path.sh` on the coordinator node. ## Step 4. Initialize Apache Cloudberry @@ -130,7 +130,7 @@ Before performing the following operations, run `su - gpadmin` to switch to the 1. Add a new line of `source` command to the `~/.bashrc` files of all nodes (coordinator/standby coordinator/segment). The example is as follows: ```bash - source /usr/local/cloudberry-db/greenplum_path.sh + source /usr/local/cloudberry/greenplum_path.sh ``` 2. Run the `source` command to make the newly added content effective: diff --git a/docs/cbdb-overview.md b/docs/cbdb-overview.md index 977d358568..16e2ca035c 100644 --- a/docs/cbdb-overview.md +++ b/docs/cbdb-overview.md @@ -3,89 +3,89 @@ title: Feature Overview slug: / --- -# Cloudberry Database Feature Overview +# Apache Cloudberry Feature Overview -Cloudberry Database, built on the latest PostgreSQL 14.4 kernel, is one of the most advanced and mature open-source MPP databases available. It comes with multiple features, including high concurrency and high availability. It can perform quick and efficient computing for complex tasks, meeting the demands of managing and computing vast amounts of data. It is widely applied in multiple fields. +Apache Cloudberry, built on the latest PostgreSQL 14.4 kernel, is one of the most advanced and mature open-source MPP databases available. It comes with multiple features, including high concurrency and high availability. It can perform quick and efficient computing for complex tasks, meeting the demands of managing and computing vast amounts of data. It is widely applied in multiple fields. -This document gives a general introduction to the features of Cloudberry Database. +This document gives a general introduction to the features of Apache Cloudberry. ## Efficient queries in different scenarios -- Cloudberry Database allows you to perform efficient queries in big data analysis environments and distributed environments: +- Apache Cloudberry allows you to perform efficient queries in big data analysis environments and distributed environments: - - **Big data analysis environment**: Cloudberry Database uses the built-in PostgreSQL optimizer, which offers better support for distributed environments. This means that it can generate more efficient query plans when handling big data analysis tasks. - - **Distributed environment**: Built in with the specially-adapted open-source GPORCA optimizer, Cloudberry Database meets the query optimization needs in distributed environments. + - **Big data analysis environment**: Apache Cloudberry uses the built-in PostgreSQL optimizer, which offers better support for distributed environments. This means that it can generate more efficient query plans when handling big data analysis tasks. + - **Distributed environment**: Built in with the specially-adapted open-source GPORCA optimizer, Apache Cloudberry meets the query optimization needs in distributed environments. - Multiple technologies are used such as static and dynamic partition pruning, aggregate push-down, and join filtering to help you get the fastest and most accurate query results possible. - Both rule-based and cost-based query optimization methods are provided to help you generate more efficient query execution plans. ## Polymorphic data storage -For different scenarios, Cloudberry Database supports multiple storage formats, including Heap storage, AO row storage, and AOCS column storage. Cloudberry Database also supports partitioned tables. You can define the partitioning of a table based on certain conditions. When executing a query, it automatically filters out the sub-tables that are not needed for the query to improve query efficiency. +For different scenarios, Apache Cloudberry supports multiple storage formats, including Heap storage, AO row storage, and AOCS column storage. Apache Cloudberry also supports partitioned tables. You can define the partitioning of a table based on certain conditions. When executing a query, it automatically filters out the sub-tables that are not needed for the query to improve query efficiency.
Click to see details -- **Even data distribution**: By using Hash and Random methods for data distribution, Cloudberry Database takes better advantage of disk performance and solves I/O bottleneck issues. +- **Even data distribution**: By using Hash and Random methods for data distribution, Apache Cloudberry takes better advantage of disk performance and solves I/O bottleneck issues. - **Storage types**: - Row-based storage: Suitable for scenarios where most fields are frequently queried, and there are many random row accesses. - Column-based storage: When you need to query a small number of fields, this method can greatly save I/O operations, making it ideal for scenarios where large amounts of data are accessed frequently. -- **Specialized storage modes**: Cloudberry Database has different storage modes such as Heap storage, AO row storage, AOCS column storage to optimize the performance of different types of applications. At the finest granularity level of partitioning, a table can have multiple storage modes. +- **Specialized storage modes**: Apache Cloudberry has different storage modes such as Heap storage, AO row storage, AOCS column storage to optimize the performance of different types of applications. At the finest granularity level of partitioning, a table can have multiple storage modes. - **Support for partitioned tables**: You can define the partitioning of a table based on specific conditions. During querying, the system will automatically filter out the sub-tables that are not needed for the query to improve query efficiency. -- **Efficient data compression function**: Cloudberry Database supports multiple compression algorithms, such as Zlib 1-9 and Zstandard 1~19, to improve data processing performance and maintain a balance between CPU and compression ratio. +- **Efficient data compression function**: Apache Cloudberry supports multiple compression algorithms, such as Zlib 1-9 and Zstandard 1~19, to improve data processing performance and maintain a balance between CPU and compression ratio. - **Optimization for small tables**: You can choose to use the Replication Table and specify a custom Hash algorithm when creating the table, allowing for more flexible control of data distribution.
## Multi-layer data security -Cloudberry Database enhances user data protection by supporting function encryption and transparent data encryption (TDE). TDE means that the Cloudberry Database kernel performs these processes invisibly to users. The data formats subject to TDE include Heap tables, AO row storage, and AOCS column storage. In addition to common encryption algorithms like AES, Cloudberry Database also supports national secret algorithms, allowing seamless integration of your own algorithms into TDE process. +Apache Cloudberry enhances user data protection by supporting function encryption and transparent data encryption (TDE). TDE means that the Apache Cloudberry kernel performs these processes invisibly to users. The data formats subject to TDE include Heap tables, AO row storage, and AOCS column storage. In addition to common encryption algorithms like AES, Apache Cloudberry also supports national secret algorithms, allowing seamless integration of your own algorithms into TDE process.
Click to view details -Cloudberry Database focuses on data security and provides security protection measures. These security measures are designed to satisfy different database environment needs and offer multi-layer security protection: +Apache Cloudberry focuses on data security and provides security protection measures. These security measures are designed to satisfy different database environment needs and offer multi-layer security protection: -- **Database isolation**: In Cloudberry Database, data is not shared between databases, which achieves isolation in a multi-database environment. If cross-database access is required, you can use the DBLink feature. +- **Database isolation**: In Apache Cloudberry, data is not shared between databases, which achieves isolation in a multi-database environment. If cross-database access is required, you can use the DBLink feature. - **Internal data organization**: The logical organization of data in the database includes data objects such as tables, views, indexes, and functions. Data access can be performed across schemas. -- **Data storage security**: Cloudberry Database offers different storage modes to support data redundancy. It uses encryption methods including AES 128, AES 192, AES 256, DES, and national secret encryption to secure data storage. It also supports ciphertext authentication, which includes encryption algorithms like SCRAM-SHA-256, MD5, LDAP, RADIUS. -- **User data protection**: Cloudberry Database supports function encryption and decryption, and transparent data encryption and decryption. The process is implemented by the Cloudberry Database kernel without any user interaction. It supports data formats such as Heap tables, AO row storage, and AOCS column storage. In addition to common encryption algorithms like AES, Cloudberry Database also supports national secret algorithms, allowing you to easily add your own algorithms into transparent data encryption. -- **Detailed permission settings**: To satisfy different users and objects (like schemas, tables, rows, columns, views, functions), Cloudberry Database provides a range of permission setting options, including `SELECT`, `UPDATE`, execution, and ownership. +- **Data storage security**: Apache Cloudberry offers different storage modes to support data redundancy. It uses encryption methods including AES 128, AES 192, AES 256, DES, and national secret encryption to secure data storage. It also supports ciphertext authentication, which includes encryption algorithms like SCRAM-SHA-256, MD5, LDAP, RADIUS. +- **User data protection**: Apache Cloudberry supports function encryption and decryption, and transparent data encryption and decryption. The process is implemented by the Apache Cloudberry kernel without any user interaction. It supports data formats such as Heap tables, AO row storage, and AOCS column storage. In addition to common encryption algorithms like AES, Apache Cloudberry also supports national secret algorithms, allowing you to easily add your own algorithms into transparent data encryption. +- **Detailed permission settings**: To satisfy different users and objects (like schemas, tables, rows, columns, views, functions), Apache Cloudberry provides a range of permission setting options, including `SELECT`, `UPDATE`, execution, and ownership.
## Data loading -Cloudberry Database provides a series of efficient and flexible data loading solutions to meet various data processing needs, including parallel and persistent data loading, support for flexible data sources and file formats, integration of multiple ETL tools, and support for stream data loading and high-performance data access. +Apache Cloudberry provides a series of efficient and flexible data loading solutions to meet various data processing needs, including parallel and persistent data loading, support for flexible data sources and file formats, integration of multiple ETL tools, and support for stream data loading and high-performance data access.
Click to view details -- **Parallel and persistent data loading**: Cloudberry Database supports massive parallel and persistent data loading through external table technology, and performs automatic conversion between character sets, such as from GBK to UTF-8. This feature makes data entry much smoother. +- **Parallel and persistent data loading**: Apache Cloudberry supports massive parallel and persistent data loading through external table technology, and performs automatic conversion between character sets, such as from GBK to UTF-8. This feature makes data entry much smoother. -- **Flexible data source and file format support**: Cloudberry Database supports data sources such as external file servers, Hive, Hbase, HDFS or S3, and supports data formats such as CSV, Text, JSON, ORC, and Parquet. In addition, the database can also load compressed data files such as Zip. +- **Flexible data source and file format support**: Apache Cloudberry supports data sources such as external file servers, Hive, Hbase, HDFS or S3, and supports data formats such as CSV, Text, JSON, ORC, and Parquet. In addition, the database can also load compressed data files such as Zip. -- **Integrate multiple ETL tools**: Cloudberry Database is integrated with ETL tools such as DataStage, Informatica, and Kettle to facilitate data processing. +- **Integrate multiple ETL tools**: Apache Cloudberry is integrated with ETL tools such as DataStage, Informatica, and Kettle to facilitate data processing. -- **Support stream data loading**: Cloudberry Database can start multiple parallel read tasks for the subscribed Kafka topic, cache the read records, and load the records into the database via gpfdist after a certain time or number of records. This method can ensure the integrity of data without duplication or loss, and is suitable for stream data collection and real-time analysis scenarios. Cloudberry Database supports data loading throughput of tens of millions per minute. +- **Support stream data loading**: Apache Cloudberry can start multiple parallel read tasks for the subscribed Kafka topic, cache the read records, and load the records into the database via gpfdist after a certain time or number of records. This method can ensure the integrity of data without duplication or loss, and is suitable for stream data collection and real-time analysis scenarios. Apache Cloudberry supports data loading throughput of tens of millions per minute. -- **High-performance data access**: PXF is a built-in component of Cloudberry Database, which can map external data sources to external tables of Cloudberry Database to achieve parallel and high-speed data access. PXF supports the management and access of hybrid data ecology and helps realize the Data Fabric architecture. +- **High-performance data access**: PXF is a built-in component of Apache Cloudberry, which can map external data sources to external tables of Apache Cloudberry to achieve parallel and high-speed data access. PXF supports the management and access of hybrid data ecology and helps realize the Data Fabric architecture.
## Multi-layer fault tolerance -To ensure data security and service continuity, Cloudberry Database adopts a multi-level fault-tolerant mechanism of data pages, checksum, mirror node configuration, and control node backup. +To ensure data security and service continuity, Apache Cloudberry adopts a multi-level fault-tolerant mechanism of data pages, checksum, mirror node configuration, and control node backup.
Click to view details -- **Checksum of data page**: In the underlying storage, Cloudberry Database uses the checksum mechanism to detect bad blocks to ensure data integrity. +- **Checksum of data page**: In the underlying storage, Apache Cloudberry uses the checksum mechanism to detect bad blocks to ensure data integrity. -- **Mirror node configuration**: By configuring mirror nodes among segments (or data nodes), Cloudberry Database can achieve high availability and failover of services. Once an unrecoverable failure of the coordinator node is detected, the system will automatically switch to the backup segment to ensure that user queries will not be affected. +- **Mirror node configuration**: By configuring mirror nodes among segments (or data nodes), Apache Cloudberry can achieve high availability and failover of services. Once an unrecoverable failure of the coordinator node is detected, the system will automatically switch to the backup segment to ensure that user queries will not be affected. - **Backup of control nodes**: Similar to segments, coordinator nodes (or control nodes) can also be configured as backup nodes or standby nodes in case the coordinator node fails. Once the coordinator node fails, the system will automatically switch to the standby node to ensure the continuity of services. @@ -93,55 +93,55 @@ To ensure data security and service continuity, Cloudberry Database adopts a mul ## Rich data analysis support -Cloudberry Database provides powerful data analysis features. These features make data processing, query and analysis more efficient, and meets multiple complex data processing, analysis and query requirements. +Apache Cloudberry provides powerful data analysis features. These features make data processing, query and analysis more efficient, and meets multiple complex data processing, analysis and query requirements.
Click to view details -- **Parallel optimizer and executor**: The Cloudberry Database kernel has a built-in parallel optimizer and executor, which is not only compatible with the PostgreSQL ecosystem, but also supports data partition pruning and multiple indexing technologies (including B-Tree, Bitmap, Hash, Brin, GIN), and JIT (expression just-in-time compilation processing). +- **Parallel optimizer and executor**: The Apache Cloudberry kernel has a built-in parallel optimizer and executor, which is not only compatible with the PostgreSQL ecosystem, but also supports data partition pruning and multiple indexing technologies (including B-Tree, Bitmap, Hash, Brin, GIN), and JIT (expression just-in-time compilation processing). -- **Machine learning components MADlib**: Cloudberry Database integrates MADlib components, providing users with fully SQL-driven machine learning features, enabling deep integration of algorithms, computing power, and data. +- **Machine learning components MADlib**: Apache Cloudberry integrates MADlib components, providing users with fully SQL-driven machine learning features, enabling deep integration of algorithms, computing power, and data. -- **Support multiple programming languages**: Cloudberry Database provides developers with rich programming languages, including R, Python, Perl, Java, and PostgreSQL, so that they can easily write custom functions. +- **Support multiple programming languages**: Apache Cloudberry provides developers with rich programming languages, including R, Python, Perl, Java, and PostgreSQL, so that they can easily write custom functions. -- **High-performance parallel computing based on MPP engine**: The MPP engine of Cloudberry Database supports high-performance parallel computing, seamlessly integrated with SQL, and can perform fast computing and analysis on SQL execution results. +- **High-performance parallel computing based on MPP engine**: The MPP engine of Apache Cloudberry supports high-performance parallel computing, seamlessly integrated with SQL, and can perform fast computing and analysis on SQL execution results. -- **PostGIS geographic data processing**: Cloudberry Database introduces an upgraded version of PostGIS 2.X, supports its MPP architecture, and further improves the processing capability of geospatial data. Key features include: +- **PostGIS geographic data processing**: Apache Cloudberry introduces an upgraded version of PostGIS 2.X, supports its MPP architecture, and further improves the processing capability of geospatial data. Key features include: - Support for object storage: supports directly loading large-capacity geospatial data from object storage (OSS) into the database. - Comprehensive spatial data type support: including geometry, geography, and raster. - Spatio-temporal index: Provides spatio-temporal index technology, which can effectively accelerate spatial and temporal queries. - Complex spatial and geographic calculations: including sphere length calculations as well as spatial aggregation functions (such as contain, cover, intersect). -- **Cloudberry Database text component**: This component supports using ElasticSearch to accelerate file retrieval capabilities. Compared with traditional GIN data text query performance, this component has an order of magnitude improvement. It supports multiple word segmentation, natural language processing, and query result rendering. +- **Apache Cloudberry text component**: This component supports using ElasticSearch to accelerate file retrieval capabilities. Compared with traditional GIN data text query performance, this component has an order of magnitude improvement. It supports multiple word segmentation, natural language processing, and query result rendering.
## Flexible workload management -Cloudberry Database provides comprehensive workload management capabilities designed to effectively utilize and optimize database resources to ensure efficient and stable operations. Its workload management includes three levels of control: connection level management, session level management, and SQL level management. +Apache Cloudberry provides comprehensive workload management capabilities designed to effectively utilize and optimize database resources to ensure efficient and stable operations. Its workload management includes three levels of control: connection level management, session level management, and SQL level management.
Click to view details -- **Connection pool PGBouncer (connection-level management)**: Through the connection pool, Cloudberry Database manages user access in a unified manner, and limits the number of concurrently active users to improve efficiency, and avoid wasting resources caused by frequently creating and destructing service processes. The connection pool has a small memory footprint and can support high concurrent connections, using libevent for Socket communication to improve communication efficiency. +- **Connection pool PGBouncer (connection-level management)**: Through the connection pool, Apache Cloudberry manages user access in a unified manner, and limits the number of concurrently active users to improve efficiency, and avoid wasting resources caused by frequently creating and destructing service processes. The connection pool has a small memory footprint and can support high concurrent connections, using libevent for Socket communication to improve communication efficiency. -- **Resource Group (session-level management)**: Through resource groups, Cloudberry Database can analyze and categorize typical workloads, and quantify the CPU, memory, concurrency and other resources required by each workload. In this way, according to the actual requirements of the workload, you can set a suitable resource group and dynamically adjust the resource usage to ensure the overall operating efficiency. At the same time, you can use rules to clean up idle sessions and release unnecessary resources. +- **Resource Group (session-level management)**: Through resource groups, Apache Cloudberry can analyze and categorize typical workloads, and quantify the CPU, memory, concurrency and other resources required by each workload. In this way, according to the actual requirements of the workload, you can set a suitable resource group and dynamically adjust the resource usage to ensure the overall operating efficiency. At the same time, you can use rules to clean up idle sessions and release unnecessary resources. -- **Dynamic resource group allocation (SQL-level management)**: Through dynamic resource group allocation, Cloudberry Database can flexibly allocate resources before or during the execution of SQL statements, which can give priority to specific queries and shorten the execution time. +- **Dynamic resource group allocation (SQL-level management)**: Through dynamic resource group allocation, Apache Cloudberry can flexibly allocate resources before or during the execution of SQL statements, which can give priority to specific queries and shorten the execution time.
## Multiple compatibility -The compatibility of Cloudberry Database is reflected in multiple aspects such as SQL syntax, components, tools and programs, hardware platforms and operating systems. This makes the database flexible enough to deal with different tools, platforms and languages. +The compatibility of Apache Cloudberry is reflected in multiple aspects such as SQL syntax, components, tools and programs, hardware platforms and operating systems. This makes the database flexible enough to deal with different tools, platforms and languages.
Click to view details -- **SQL compatibility**: Cloudberry Database is compatible with PostgreSQL and Greenplum syntax, supports SQL-92, SQL-99, and SQL 2003 standards, including SQL 2003 OLAP extensions, such as window functions, `rollup`, and `cube`. +- **SQL compatibility**: Apache Cloudberry is compatible with PostgreSQL and Greenplum syntax, supports SQL-92, SQL-99, and SQL 2003 standards, including SQL 2003 OLAP extensions, such as window functions, `rollup`, and `cube`. -- **Component compatibility**: Based on the PostgreSQL 14.4 kernel, Cloudberry Database is compatible with most of the PostgreSQL components and extensions commonly used. +- **Component compatibility**: Based on the PostgreSQL 14.4 kernel, Apache Cloudberry is compatible with most of the PostgreSQL components and extensions commonly used. - **Tool and program compatibility**: Good connectivity with various BI tools, mining forecasting tools, ETL tools, and J2EE/.NET applications. diff --git a/docs/cbdb-scenarios.md b/docs/cbdb-scenarios.md index 8f079c5108..9847bb88f2 100644 --- a/docs/cbdb-scenarios.md +++ b/docs/cbdb-scenarios.md @@ -2,11 +2,11 @@ title: User Scenarios --- -This document introduces the use cases of Cloudberry Database. +This document introduces the use cases of Apache Cloudberry. **Scenario 1: Batch processing data warehouse offline and building data marts** -- Builds high-performance Cloudberry Database warehouses and data marts for storing and querying large-scale datasets. This includes Operational Data Store (ODS), Data Warehouse Detail (DWD), and Data Warehouse Summary (DWS). Supports building source model, normalization model, dimension tables, fact tables, and more, with multiple ways to load source data into the data warehouse. +- Builds high-performance Apache Cloudberry warehouses and data marts for storing and querying large-scale datasets. This includes Operational Data Store (ODS), Data Warehouse Detail (DWD), and Data Warehouse Summary (DWS). Supports building source model, normalization model, dimension tables, fact tables, and more, with multiple ways to load source data into the data warehouse. - Supports multiple types of data processing. - Supports building data warehouse and data marts with high concurrency, high performance, and low maintenance cost. - Supports complex data analysis and query needs, including data aggregation, multi-dimensional analysis, and correlated queries. @@ -31,6 +31,6 @@ This document introduces the use cases of Cloudberry Database. **Scenario 6: Applicable to Geographic Information System (GIS) applications** -- Builds Geographic Information System (GIS) applications on Cloudberry Database. +- Builds Geographic Information System (GIS) applications on Apache Cloudberry. - Stores and queries geographic location data. Supports spatial data analysis, geocoding, and map visualization. - Can be applied to city planning, geographic analysis, and map navigation. diff --git a/docs/cbdb-vs-gp-features.md b/docs/cbdb-vs-gp-features.md index 8de2bb44d4..fd33156ff3 100644 --- a/docs/cbdb-vs-gp-features.md +++ b/docs/cbdb-vs-gp-features.md @@ -4,9 +4,9 @@ title: Comparison with Greenplum Features # Comparison with Greenplum Features -Cloudberry Database is 100% compatible with Greenplum, and provides all the Greenplum features you need. +Apache Cloudberry is 100% compatible with Greenplum, and provides all the Greenplum features you need. -In addition, Cloudberry Database possesses some features that Greenplum currently lacks or does not support. More details are listed below. +In addition, Apache Cloudberry possesses some features that Greenplum currently lacks or does not support. More details are listed below. ## General features @@ -17,7 +17,7 @@ In addition, Cloudberry Database possesses some features that Greenplum currentl ::: -| Feature names | Cloudberry Database | Greenplum | +| Feature names | Apache Cloudberry | Greenplum | | ---------------------------------------- | ------------------- | ----------- | | `EXPLAIN` (WAL) support | ✅ | ❌ | | Multiranges | ✅ | ❌ | @@ -45,7 +45,7 @@ In addition, Cloudberry Database possesses some features that Greenplum currentl ## Performance-related features -| Feature names | Cloudberry Database | Greenplum | +| Feature names | Apache Cloudberry | Greenplum | | ------------------------------------------- | ------------------- | --------- | | `REINDEX CONCURRENTLY` | ✅ | ❌ | | Aggregation pushdown | ✅ | ❌ | @@ -64,7 +64,7 @@ In addition, Cloudberry Database possesses some features that Greenplum currentl ## Security-related features -| Feature names | Cloudberry Database | Greenplum | +| Feature names | Apache Cloudberry | Greenplum | | --------------------------- | ------------------- | --------- | | Transparent Data Encryption (TDE) | ✅ | ❌ | | Trusted extensions | ✅ | ❌ | diff --git a/docs/connect-to-cbdb.md b/docs/connect-to-cbdb.md index 81d16b0c3c..f99543c785 100644 --- a/docs/connect-to-cbdb.md +++ b/docs/connect-to-cbdb.md @@ -2,37 +2,37 @@ title: Connect to Database --- -# Connect to Cloudberry Database +# Connect to Apache Cloudberry -This document introduces how to connect to Cloudberry Database. +This document introduces how to connect to Apache Cloudberry. ## Connection parameters -Users can connect to Cloudberry Database using a PostgreSQL-compatible client program, such as `psql`. Users and administrators always connect to Cloudberry Database through the *coordinator*. The segments cannot accept client connections. +Users can connect to Apache Cloudberry using a PostgreSQL-compatible client program, such as `psql`. Users and administrators always connect to Apache Cloudberry through the *coordinator*. The segments cannot accept client connections. -To establish a connection to the Cloudberry Database coordinator, you will need to know the following connection information and configure your client program accordingly. +To establish a connection to the Apache Cloudberry coordinator, you will need to know the following connection information and configure your client program accordingly. |Connection parameter|Description|Environment variable| |--------------------|-----------|--------------------| |Application name|The application name that is connecting to the database. The default value, held in the `application_name` connection parameter is *psql*.|`$PGAPPNAME`| |Database name|The name of the database to which you want to connect. For a newly initialized system, use the `postgres` database to connect for the first time.|`$PGDATABASE`| -|Host name|The host name of the Cloudberry Database coordinator. The default host is the local host.|`$PGHOST`| -|Port|The port number that the Cloudberry Database coordinator instance is running on. The default is 5432.|`$PGPORT`| -|User name|The database user (role) name to connect as. This is not necessarily the same as your OS user name. Check with your Cloudberry Database administrator if you are not sure what you database user name is. Note that every Cloudberry Database system has one superuser account that is created automatically at initialization time. This account has the same name as the OS name of the user who initialized the Cloudberry Database system (typically `gpadmin`).|`$PGUSER`| +|Host name|The host name of the Apache Cloudberry coordinator. The default host is the local host.|`$PGHOST`| +|Port|The port number that the Apache Cloudberry coordinator instance is running on. The default is 5432.|`$PGPORT`| +|User name|The database user (role) name to connect as. This is not necessarily the same as your OS user name. Check with your Apache Cloudberry administrator if you are not sure what you database user name is. Note that every Apache Cloudberry system has one superuser account that is created automatically at initialization time. This account has the same name as the OS name of the user who initialized the Apache Cloudberry system (typically `gpadmin`).|`$PGUSER`| -[Connecting with psql](#connect-with-psql) provides example commands for connecting to Cloudberry Database. +[Connecting with psql](#connect-with-psql) provides example commands for connecting to Apache Cloudberry. ## Supported client applications -Users can connect to Cloudberry Database using various client applications: +Users can connect to Apache Cloudberry using various client applications: -- A number of [Cloudberry Database Client Applications](#client-utility-applications) are provided with your Cloudberry Database installation. The `psql` client application provides an interactive command-line interface to Cloudberry Database. -- Using standard [Database Application Interfaces](#connect-with-application-interfaces), such as ODBC and JDBC, users can create their own client applications that interface to Cloudberry Database. -- Most client tools that use standard database interfaces, such as ODBC and JDBC, can be configured to connect to Cloudberry Database. +- A number of [Apache Cloudberry Client Applications](#client-utility-applications) are provided with your Apache Cloudberry installation. The `psql` client application provides an interactive command-line interface to Apache Cloudberry. +- Using standard [Database Application Interfaces](#connect-with-application-interfaces), such as ODBC and JDBC, users can create their own client applications that interface to Apache Cloudberry. +- Most client tools that use standard database interfaces, such as ODBC and JDBC, can be configured to connect to Apache Cloudberry. ### Client utility applications -Cloudberry Database comes installed with a number of client utility applications located in the `$GPHOME/bin` directory of your Cloudberry Database coordinator host installation. The following are the most commonly used client utility applications: +Apache Cloudberry comes installed with a number of client utility applications located in the `$GPHOME/bin` directory of your Apache Cloudberry coordinator host installation. The following are the most commonly used client utility applications: |Name|Usage| |----|-----| @@ -44,9 +44,9 @@ Cloudberry Database comes installed with a number of client utility applications |`reindexdb`|Reindexes a database| |`vacuumdb`|Garbage-collects and analyzes a database| -When using these client applications, you must connect to a database through the Cloudberry Database coordinator instance. You will need to know the name of your target database, the host name and port number of the coordinator, and what database user name to connect as. This information can be provided on the command-line using the options `-d`, `-h`, `-p`, and `-U` respectively. If an argument is found that does not belong to any option, it will be interpreted as the database name first. +When using these client applications, you must connect to a database through the Apache Cloudberry coordinator instance. You will need to know the name of your target database, the host name and port number of the coordinator, and what database user name to connect as. This information can be provided on the command-line using the options `-d`, `-h`, `-p`, and `-U` respectively. If an argument is found that does not belong to any option, it will be interpreted as the database name first. -All of these options have default values which will be used if the option is not specified. The default host is the local host. The default port number is 5432. The default user name is your OS system user name, as is the default database name. Note that OS user names and Cloudberry Database user names are not necessarily the same. +All of these options have default values which will be used if the option is not specified. The default host is the local host. The default port number is 5432. The default user name is your OS system user name, as is the default database name. Note that OS user names and Apache Cloudberry user names are not necessarily the same. If the default values are not correct, you can set the environment variables `PGDATABASE`, `PGHOST`, `PGPORT`, and `PGUSER` to the appropriate values, or use a `psql` `~/.pgpass` file to contain frequently-used passwords. @@ -86,7 +86,7 @@ At the prompt, you might type in SQL commands. A SQL command must end with a `;` ## Connect with application interfaces -You might want to develop your own client applications that interface to Cloudberry Database. PostgreSQL provides a number of database drivers for the most commonly used database application programming interfaces (APIs), which can also be used with Cloudberry Database. These drivers are available as a separate download. Each driver (except libpq, which comes with PostgreSQL) is an independent PostgreSQL development project and must be downloaded, installed and configured to connect to Cloudberry Database. The following drivers are available: +You might want to develop your own client applications that interface to Apache Cloudberry. PostgreSQL provides a number of database drivers for the most commonly used database application programming interfaces (APIs), which can also be used with Apache Cloudberry. These drivers are available as a separate download. Each driver (except libpq, which comes with PostgreSQL) is an independent PostgreSQL development project and must be downloaded, installed and configured to connect to Apache Cloudberry. The following drivers are available: |API|PostgreSQL Driver|Download Link| |---|-----------------|-------------| @@ -97,19 +97,19 @@ You might want to develop your own client applications that interface to Cloudbe |Python DBI|psycopg2|[https://www.psycopg.org/](https://www.psycopg.org/)| |libpq C Library|libpq|[https://www.postgresql.org/docs/14/libpq.html](https://www.postgresql.org/docs/14/libpq.html)| -General instructions for accessing a Cloudberry Database with an API are: +General instructions for accessing a Apache Cloudberry with an API are: 1. Download your programming language platform and respective API from the appropriate source. For example, you can get the Java Development Kit (JDK) and JDBC API from Oracle. -2. Write your client application according to the API specifications. When programming your application, be aware of the SQL support in Cloudberry Database so you do not include any unsupported SQL syntax. +2. Write your client application according to the API specifications. When programming your application, be aware of the SQL support in Apache Cloudberry so you do not include any unsupported SQL syntax. -Download the appropriate driver and configure connectivity to your Cloudberry Database coordinator instance. +Download the appropriate driver and configure connectivity to your Apache Cloudberry coordinator instance. ## Troubleshoot connection problems -A number of things can prevent a client application from successfully connecting to Cloudberry Database. This topic explains some of the common causes of connection problems and how to correct them. +A number of things can prevent a client application from successfully connecting to Apache Cloudberry. This topic explains some of the common causes of connection problems and how to correct them. | Problem | Solution | | ------- | -------- | -| No `pg_hba.conf` entry for host or user | To enable Cloudberry Database to accept remote client connections, you must configure your Cloudberry Database coordinator instance so that connections are allowed from the client hosts and database users that will be connecting to Cloudberry Database. This is done by adding the appropriate entries to the `pg_hba.conf` configuration file (located in the coordinator instance's data directory). | -| Cloudberry Database is not running | If the Cloudberry Database coordinator instance is down, users will not be able to connect. You can verify that the Cloudberry Database system is up by running the `gpstate` utility on the Cloudberry Database coordinator host. | -| Network problems: Interconnect timeouts | If users connect to the Cloudberry Database coordinator host from a remote client, network problems can prevent a connection (for example, DNS host name resolution problems, the host system is down, and so on.). To ensure that network problems are not the cause, connect to the Cloudberry Database coordinator host from the remote client host. For example: `ping hostname`

If the system cannot resolve the host names and IP addresses of the hosts involved in Cloudberry Database, queries and connections will fail. For some operations, connections to the Cloudberry Database coordinator use `localhost` and others use the actual host name, so you must be able to resolve both. If you encounter this error, first make sure you can connect to each host in your Cloudberry Database array from the coordinator host over the network. In the `/etc/hosts` file of the coordinator and all segments, make sure you have the correct host names and IP addresses for all hosts involved in the Cloudberry Database array. The `127.0.0.1` IP must resolve to `localhost`. | +| No `pg_hba.conf` entry for host or user | To enable Apache Cloudberry to accept remote client connections, you must configure your Apache Cloudberry coordinator instance so that connections are allowed from the client hosts and database users that will be connecting to Apache Cloudberry. This is done by adding the appropriate entries to the `pg_hba.conf` configuration file (located in the coordinator instance's data directory). | +| Apache Cloudberry is not running | If the Apache Cloudberry coordinator instance is down, users will not be able to connect. You can verify that the Apache Cloudberry system is up by running the `gpstate` utility on the Apache Cloudberry coordinator host. | +| Network problems: Interconnect timeouts | If users connect to the Apache Cloudberry coordinator host from a remote client, network problems can prevent a connection (for example, DNS host name resolution problems, the host system is down, and so on.). To ensure that network problems are not the cause, connect to the Apache Cloudberry coordinator host from the remote client host. For example: `ping hostname`

If the system cannot resolve the host names and IP addresses of the hosts involved in Apache Cloudberry, queries and connections will fail. For some operations, connections to the Apache Cloudberry coordinator use `localhost` and others use the actual host name, so you must be able to resolve both. If you encounter this error, first make sure you can connect to each host in your Apache Cloudberry array from the coordinator host over the network. In the `/etc/hosts` file of the coordinator and all segments, make sure you have the correct host names and IP addresses for all hosts involved in the Apache Cloudberry array. The `127.0.0.1` IP must resolve to `localhost`. | diff --git a/docs/create-and-manage-database.md b/docs/create-and-manage-database.md index 22691e71cc..c9b4254e2a 100644 --- a/docs/create-and-manage-database.md +++ b/docs/create-and-manage-database.md @@ -2,21 +2,21 @@ title: Create and Manage Database --- -# Create and Manage Cloudberry Database +# Create and Manage Apache Cloudberry -A Cloudberry Database system is a single instance of Cloudberry Database. There can be multiple running Cloudberry Database systems co-existing with each other, but usually a client can only connect to one of them. +A Apache Cloudberry system is a single instance of Apache Cloudberry. There can be multiple running Apache Cloudberry systems co-existing with each other, but usually a client can only connect to one of them. -There can be multiple databases in a Cloudberry Database system. This is different from some database management systems (such as Oracle) where the database instance *is* the database. Although you can create many databases in a Cloudberry Database system, client programs can connect to and access only one database at a time — you cannot cross-query between databases. +There can be multiple databases in a Apache Cloudberry system. This is different from some database management systems (such as Oracle) where the database instance *is* the database. Although you can create many databases in a Apache Cloudberry system, client programs can connect to and access only one database at a time — you cannot cross-query between databases. ## About template and default databases -Cloudberry Database provides some template databases and a default database, *template1*, *template0*, and *postgres*. +Apache Cloudberry provides some template databases and a default database, *template1*, *template0*, and *postgres*. -By default, each new database you create is based on a *template1* database. Cloudberry Database uses *template1* to create databases unless you specify another template. Creating objects in *template1* is not recommended. The objects will be in every database you create using the default template database. +By default, each new database you create is based on a *template1* database. Apache Cloudberry uses *template1* to create databases unless you specify another template. Creating objects in *template1* is not recommended. The objects will be in every database you create using the default template database. -Cloudberry Database uses another database template, *template0*, internally. Do not drop or modify *template0*. You can use *template0* to create a completely clean database containing only the standard objects predefined by Cloudberry Database at initialization. +Apache Cloudberry uses another database template, *template0*, internally. Do not drop or modify *template0*. You can use *template0* to create a completely clean database containing only the standard objects predefined by Apache Cloudberry at initialization. -You can use the *postgres* database to connect to Cloudberry Database for the first time. Cloudberry Database uses *postgres* as the default database for administrative connections. +You can use the *postgres* database to connect to Apache Cloudberry for the first time. Apache Cloudberry uses *postgres* as the default database for administrative connections. ## Create a database @@ -26,17 +26,17 @@ The `CREATE DATABASE` command creates a new database. For example: => CREATE DATABASE ; ``` -To create a database, you must have privileges to create a database or be a Cloudberry Database superuser. If you do not have the correct privileges, you cannot create a database. Contact your Cloudberry Database administrator to either give you the necessary privilege or to create a database for you. +To create a database, you must have privileges to create a database or be a Apache Cloudberry superuser. If you do not have the correct privileges, you cannot create a database. Contact your Apache Cloudberry administrator to either give you the necessary privilege or to create a database for you. -You can also use the client program `createdb` to create a database. For example, running the following command in a command line terminal connects to Cloudberry Database using the provided host name and port and creates a database named *mydatabase*: +You can also use the client program `createdb` to create a database. For example, running the following command in a command line terminal connects to Apache Cloudberry using the provided host name and port and creates a database named *mydatabase*: ```shell $ createdb -h coordinator_host -p 5432 mydatabase ``` -The host name and port must match the host name and port of the installed Cloudberry Database system. +The host name and port must match the host name and port of the installed Apache Cloudberry system. -Some objects, such as roles, are shared by all the databases in a Cloudberry Database system. Other objects, such as tables that you create, are known only in the database in which you create them. +Some objects, such as roles, are shared by all the databases in a Apache Cloudberry system. Other objects, such as tables that you create, are known only in the database in which you create them. :::caution The `CREATE DATABASE` command is not transactional. @@ -60,7 +60,7 @@ Another database owner can be assigned when a database is created: ## View the list of databases -If you are working in the `psql` client program, you can use the `\l` meta-command to show the list of databases and templates in your Cloudberry Database system. If using another client program and you are a superuser, you can query the list of databases from the `pg_database` system catalog table. For example: +If you are working in the `psql` client program, you can use the `\l` meta-command to show the list of databases and templates in your Apache Cloudberry system. If using another client program and you are a superuser, you can query the list of databases from the `pg_database` system catalog table. For example: ```sql => SELECT datname from pg_database; @@ -85,7 +85,7 @@ The `DROP DATABASE` command drops (or deletes) a database. It removes the system => DROP DATABASE mydatabase; ``` -You can also use the client program `dropdb` to drop a database. For example, the following command connects to Cloudberry Database using the provided host name and port and drops the database *mydatabase*: +You can also use the client program `dropdb` to drop a database. For example, the following command connects to Apache Cloudberry using the provided host name and port and drops the database *mydatabase*: ```shell $ dropdb -h coordinator_host -p 5432 mydatabase diff --git a/docs/create-and-manage-materialized-views.md b/docs/create-and-manage-materialized-views.md index 546394a2fd..642aedac7a 100644 --- a/docs/create-and-manage-materialized-views.md +++ b/docs/create-and-manage-materialized-views.md @@ -2,9 +2,9 @@ title: Create and Manage Materialized Views --- -# Create and Manage Materialized Views in Cloudberry Database +# Create and Manage Materialized Views in Apache Cloudberry -In Cloudberry Database, materialized views are similar to views. A materialized view enables you to save a frequently used or complex query, then access the query results in a `SELECT` statement as if they were a table. Materialized views persist the query results in a table-like form. +In Apache Cloudberry, materialized views are similar to views. A materialized view enables you to save a frequently used or complex query, then access the query results in a `SELECT` statement as if they were a table. Materialized views persist the query results in a table-like form. Although accessing the data stored in a materialized view can be much faster than accessing the underlying tables directly or through a regular view, the data is not always current. The materialized view data cannot be directly updated. To refresh the materialized view data, use the `REFRESH MATERIALIZED VIEW` command. @@ -27,7 +27,7 @@ The materialized view might be useful for displaying a graph in the dashboard cr REFRESH MATERIALIZED VIEW sales_summary; ``` -The information about a materialized view in the Cloudberry Database system catalogs is exactly the same as it is for a table or view. A materialized view is a relation, just like a table or a view. When a materialized view is referenced in a query, the data is returned directly from the materialized view, just like from a table. The query in the materialized view definition is only used for populating the materialized view. +The information about a materialized view in the Apache Cloudberry system catalogs is exactly the same as it is for a table or view. A materialized view is a relation, just like a table or a view. When a materialized view is referenced in a query, the data is returned directly from the materialized view, just like from a table. The query in the materialized view definition is only used for populating the materialized view. If you can tolerate periodically updating the materialized view data, you can get great performance benefit from the view. diff --git a/docs/create-and-manage-schemas.md b/docs/create-and-manage-schemas.md index 70e686b225..363ee80a69 100644 --- a/docs/create-and-manage-schemas.md +++ b/docs/create-and-manage-schemas.md @@ -2,9 +2,9 @@ title: Create and Manage Schemas --- -# Create and Manage Schemas in Cloudberry Database +# Create and Manage Schemas in Apache Cloudberry -In Cloudberry Database, schemas logically organize objects and data in a database. Schemas allow you to have more than one object (such as tables) with the same name in the database without conflict if the objects are in different schemas. +In Apache Cloudberry, schemas logically organize objects and data in a database. Schemas allow you to have more than one object (such as tables) with the same name in the database without conflict if the objects are in different schemas. ## The Default "Public" Schema @@ -59,7 +59,7 @@ pg_catalog; ``` :::tip -When working with Cloudberry Database, it is recommended that you: +When working with Apache Cloudberry, it is recommended that you: - Explicitly specify the schema when creating new objects to ensure they are created in the intended schema. - Not rely on `search_path` to implicitly direct the creation of new objects to a specific schema other than `public`. Otherwise, the database might create the objects in an unintended schema. @@ -99,7 +99,7 @@ The following system-level schemas exist in every database: - `pg_catalog` contains the system catalog tables, built-in data types, functions, and operators. It is always part of the schema search path, even if it is not explicitly named in the search path. - `information_schema` consists of a standardized set of views that contain information about the objects in the database. These views get system information from the system catalog tables in a standardized way. -- `pg_toast` stores large objects such as records that exceed the page size. This schema is used internally by the Cloudberry Database system. -- `pg_bitmapindex` stores bitmap index objects such as lists of values. This schema is used internally by the Cloudberry Database system. -- `pg_aoseg` stores append-optimized table objects. This schema is used internally by the Cloudberry Database system. +- `pg_toast` stores large objects such as records that exceed the page size. This schema is used internally by the Apache Cloudberry system. +- `pg_bitmapindex` stores bitmap index objects such as lists of values. This schema is used internally by the Apache Cloudberry system. +- `pg_aoseg` stores append-optimized table objects. This schema is used internally by the Apache Cloudberry system. - `gp_toolkit` is an administrative schema that contains external tables, views, and functions that you can access with SQL commands. All database users can access `gp_toolkit` to view and query the system log files and other system metrics. diff --git a/docs/create-and-manage-tables.md b/docs/create-and-manage-tables.md index c9b1f7bdd5..cf9f20875f 100644 --- a/docs/create-and-manage-tables.md +++ b/docs/create-and-manage-tables.md @@ -2,9 +2,9 @@ title: Create and Manage Tables --- -# Create and Manage Tables in Cloudberry Database +# Create and Manage Tables in Apache Cloudberry -Cloudberry Database tables are similar to tables in any relational database, except that table rows are distributed across the different segments in the system. When you create a table, you specify the table's distribution policy. +Apache Cloudberry tables are similar to tables in any relational database, except that table rows are distributed across the different segments in the system. When you create a table, you specify the table's distribution policy. ## Create a table @@ -12,7 +12,7 @@ The `CREATE TABLE` command creates a table and defines its structure. When you c - The columns of the table and their associated data types. See [Choose column data types](#choose-column-data-types). - Any table or column constraints to limit the data that a column or table can contain. See [Setting table and column constraints](#set-table-and-column-constraints). -- The distribution policy of the table, which determines how Cloudberry Database divides data across the segments. +- The distribution policy of the table, which determines how Apache Cloudberry divides data across the segments. - The way the table is stored on disk. - The table partitioning strategy for large tables. @@ -20,7 +20,7 @@ The `CREATE TABLE` command creates a table and defines its structure. When you c The data type of a column determines the types of data values the column can contain. Choose the data type that uses the least possible space but can still accommodate your data and that best constrains the data. For example, use character data types for strings, date or timestamp data types for dates, and numeric data types for numbers. -For table columns that contain textual data, specify the data type `VARCHAR` or `TEXT`. Specifying the data type `CHAR` is not recommended. In Cloudberry Database, the data types `VARCHAR` or `TEXT` handle padding added to the data (space characters added after the last non-space character) as significant characters, the data type `CHAR` does not. +For table columns that contain textual data, specify the data type `VARCHAR` or `TEXT`. Specifying the data type `CHAR` is not recommended. In Apache Cloudberry, the data types `VARCHAR` or `TEXT` handle padding added to the data (space characters added after the last non-space character) as significant characters, the data type `CHAR` does not. Use the smallest numeric data type that will accommodate your numeric data and allow for future expansion. For example, using `BIGINT` for data that fits in `INT` or `SMALLINT` wastes storage space. If you expect that your data values will expand over time, consider that changing from a smaller datatype to a larger datatype after loading large amounts of data is costly. For example, if your current data values fit in a `SMALLINT` but it is likely that the values will expand, `INT` is the better long-term choice. @@ -28,7 +28,7 @@ Use the same data types for columns that you plan to use in cross-table joins. C ### Set table and column constraints -You can define constraints on columns and tables to restrict the data in your tables. Cloudberry Database support for constraints is the same as PostgreSQL with some limitations, including: +You can define constraints on columns and tables to restrict the data in your tables. Apache Cloudberry support for constraints is the same as PostgreSQL with some limitations, including: - `CHECK` constraints can refer only to the table on which they are defined. - `UNIQUE` and `PRIMARY KEY` constraints must be compatible with their table's distribution key and partitioning key, if any. @@ -91,7 +91,7 @@ A primary key constraint is a combination of a `UNIQUE` constraint and a `NOT NU Foreign keys are not supported. You can declare them, but referential integrity is not enforced. -Foreign key constraints specify that the values in a column or a group of columns must match the values appearing in some row of another table to maintain referential integrity between two related tables. Referential integrity checks cannot be enforced between the distributed table segments of a Cloudberry database. +Foreign key constraints specify that the values in a column or a group of columns must match the values appearing in some row of another table to maintain referential integrity between two related tables. Referential integrity checks cannot be enforced between the distributed table segments of a Apache Cloudberry. #### Exclusion constraints @@ -108,6 +108,6 @@ Similar to unique constraints, an exclusion constraint is permitted only for rep Exclusion constraints are not supported for partitioned tables. -See also [`CREATE TABLE ... CONSTRAINT ... EXCLUDE`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/create-table.md) for details. +See also [`CREATE TABLE ... CONSTRAINT ... EXCLUDE`](https://github.com/apache/cloudberry-site/blob/cbdb-doc-validation/docs/sql-stmts/create-table.md) for details. Adding an exclusion constraint automatically creates an index of the type specified in the constraint declaration. diff --git a/docs/create-and-manage-tablespaces.md b/docs/create-and-manage-tablespaces.md index 29a5ac67e2..9bd6439ff6 100644 --- a/docs/create-and-manage-tablespaces.md +++ b/docs/create-and-manage-tablespaces.md @@ -6,11 +6,11 @@ title: Create and Manage Tablespaces Tablespaces allow database administrators to have multiple file systems per machine and decide how to best use physical storage to store database objects. Tablespaces allow you to assign different storage for frequently and infrequently used database objects or to control the I/O performance on certain database objects. For example, place frequently-used tables on file systems that use high performance solid-state drives (SSD), and place other tables on standard hard drives. -A tablespace requires a host file system location to store its database files. In Cloudberry Database, the file system location must exist on all hosts including the hosts running the coordinator, standby coordinator, each primary segment, and each mirror segment. +A tablespace requires a host file system location to store its database files. In Apache Cloudberry, the file system location must exist on all hosts including the hosts running the coordinator, standby coordinator, each primary segment, and each mirror segment. -A tablespace is Cloudberry Database system object (a global object), you can use a tablespace from any database if you have appropriate privileges. +A tablespace is Apache Cloudberry system object (a global object), you can use a tablespace from any database if you have appropriate privileges. -> **Note** Cloudberry Database does not support different tablespace locations for a primary-mirror pair with the same content ID. It is only possible to configure different locations for different content IDs. Do not modify symbolic links under the `pg_tblspc` directory so that primary-mirror pairs point to different file locations; this will lead to erroneous behavior. +> **Note** Apache Cloudberry does not support different tablespace locations for a primary-mirror pair with the same content ID. It is only possible to configure different locations for different content IDs. Do not modify symbolic links under the `pg_tblspc` directory so that primary-mirror pairs point to different file locations; this will lead to erroneous behavior. ## Creating a Tablespace @@ -51,11 +51,11 @@ There is also the `temp_tablespaces` configuration parameter, which determines t The tablespace associated with a database stores that database's system catalogs, temporary files created by server processes using that database, and is the default tablespace selected for tables and indexes created within the database, if no `TABLESPACE` is specified when the objects are created. If you do not specify a tablespace when you create a database, the database uses the same tablespace used by its template database. -You can use a tablespace from any database in the Cloudberry Database system if you have appropriate privileges. +You can use a tablespace from any database in the Apache Cloudberry system if you have appropriate privileges. ## Viewing Existing Tablespaces -Every Cloudberry Database system has the following default tablespaces. +Every Apache Cloudberry system has the following default tablespaces. - `pg_global` for shared system catalogs. - `pg_default`, the default tablespace. Used by the *template1* and *template0* databases. @@ -121,7 +121,7 @@ You cannot drop a tablespace if it is not empty or if it stores temporary or tra You can move temporary or transaction files to a specific tablespace to improve database performance when running queries, creating backups, and to store data more sequentially. -The Cloudberry Database server configuration parameter `temp_tablespaces` controls the location for both temporary tables and temporary spill files for hash aggregate and hash join queries. Temporary files for purposes such as sorting large data sets are also created in these tablespaces. +The Apache Cloudberry server configuration parameter `temp_tablespaces` controls the location for both temporary tables and temporary spill files for hash aggregate and hash join queries. Temporary files for purposes such as sorting large data sets are also created in these tablespaces. `temp_tablespaces` specifies tablespaces in which to create temporary objects (temp tables and indexes on temp tables) when a `CREATE` command does not explicitly specify a tablespace. diff --git a/docs/create-and-manage-views.md b/docs/create-and-manage-views.md index 3591a35755..2a886885ef 100644 --- a/docs/create-and-manage-views.md +++ b/docs/create-and-manage-views.md @@ -2,9 +2,9 @@ title: Create and Manage Views --- -# Create and Manage Views in Cloudberry Database +# Create and Manage Views in Apache Cloudberry -In Cloudberry Database, views enable you to save frequently used or complex queries, then access them in a `SELECT` statement as if they were a table. A view is not physically materialized on disk: the query runs as a subquery when you access the view. +In Apache Cloudberry, views enable you to save frequently used or complex queries, then access them in a `SELECT` statement as if they were a table. A view is not physically materialized on disk: the query runs as a subquery when you access the view. ## Creating views diff --git a/docs/data-loading/index.md b/docs/data-loading/index.md index 2715239e71..cdf9b96263 100644 --- a/docs/data-loading/index.md +++ b/docs/data-loading/index.md @@ -4,11 +4,11 @@ title: Data Loading Overview # Data Loading Overview -Cloudberry Database loads data mainly by transforming external data into external tables (or foreign tables) via loading tools. Then it reads data from these external tables or writes data into them to achieve external data loading. +Apache Cloudberry loads data mainly by transforming external data into external tables (or foreign tables) via loading tools. Then it reads data from these external tables or writes data into them to achieve external data loading. ## Loading process -The general process of loading external data into Cloudberry Database is as follows: +The general process of loading external data into Apache Cloudberry is as follows: 1. Assess the data loading scenario (such as data source location, data type, and data volume) and select an appropriate loading tool. 2. Set up and enable the loading tool. @@ -17,7 +17,7 @@ The general process of loading external data into Cloudberry Database is as foll ## Loading methods and scenarios -Cloudberry Database offers multiple data loading solutions, and you can select different data loading methods according to different data sources. +Apache Cloudberry offers multiple data loading solutions, and you can select different data loading methods according to different data sources. | Loading method | Data source | Data format | Parallel or not | | -------------------------- | ----------------------------------------------------------- | ------------------------------------------------------------ | -------- | diff --git a/docs/data-loading/load-data-from-web-services.md b/docs/data-loading/load-data-from-web-services.md index 62806dd486..e4e9be716e 100644 --- a/docs/data-loading/load-data-from-web-services.md +++ b/docs/data-loading/load-data-from-web-services.md @@ -4,9 +4,9 @@ title: Load Data from Web Services # Load Data from Web Services -In Cloudberry Database, to load data from web services or from any source accessible by command lines, you can create external web tables. The supported data formats are `TEXT` and `CSV`. +In Apache Cloudberry, to load data from web services or from any source accessible by command lines, you can create external web tables. The supported data formats are `TEXT` and `CSV`. -External web tables allow Cloudberry Database to treat dynamic data sources like regular database tables. Because web table data can change as a query runs, the data is not rescannable. +External web tables allow Apache Cloudberry to treat dynamic data sources like regular database tables. Because web table data can change as a query runs, the data is not rescannable. `CREATE EXTERNAL WEB TABLE` creates a web table definition. You can define command-based or URL-based external web tables. The definition forms are different. Do not mix command-based and URL-based definitions. @@ -39,7 +39,7 @@ The following command defines a web table that runs a script. The script runs on A URL-based web table accesses data from a web server using the HTTP protocol. Web table data is dynamic; the data is not rescannable. -Specify the `LOCATION` of files on a web server using `http://`. The web data file(s) must reside on a web server that Cloudberry Database segment hosts can access. The number of URLs specified corresponds to the number of segment instances that work in parallel to access the web table. For example, if you specify 2 external files to a Cloudberry Database system with 8 primary segments, 2 of the 8 segments access the web table in parallel at query runtime. +Specify the `LOCATION` of files on a web server using `http://`. The web data file(s) must reside on a web server that Apache Cloudberry segment hosts can access. The number of URLs specified corresponds to the number of segment instances that work in parallel to access the web table. For example, if you specify 2 external files to a Apache Cloudberry system with 8 primary segments, 2 of the 8 segments access the web table in parallel at query runtime. The following sample command defines a web table that gets data from several URLs. diff --git a/docs/data-loading/load-data-using-copy.md b/docs/data-loading/load-data-using-copy.md index 9c629a398f..06aca6565d 100644 --- a/docs/data-loading/load-data-using-copy.md +++ b/docs/data-loading/load-data-using-copy.md @@ -2,13 +2,13 @@ title: Load Data Using COPY --- -# Load Data into Cloudberry Database Using `COPY` +# Load Data into Apache Cloudberry Using `COPY` -`COPY FROM` copies data from a file or standard input in a local file system into a table and appends the data to the table contents. `COPY` is non-parallel: data is loaded in a single process using the Cloudberry Database coordinator instance. Using `COPY` is only recommended for very small data files. +`COPY FROM` copies data from a file or standard input in a local file system into a table and appends the data to the table contents. `COPY` is non-parallel: data is loaded in a single process using the Apache Cloudberry coordinator instance. Using `COPY` is only recommended for very small data files. The `COPY` source file must be accessible to the `postgres` process on the coordinator host. Specify the `COPY` source file name relative to the data directory on the coordinator host, or specify an absolute path. -Cloudberry Database copies data from `STDIN` or `STDOUT` using the connection between the client and the coordinator server. +Apache Cloudberry copies data from `STDIN` or `STDOUT` using the connection between the client and the coordinator server. ## Load from a file diff --git a/docs/data-loading/load-data-using-file-protocol.md b/docs/data-loading/load-data-using-file-protocol.md index d37c8ce0b4..3bbdb0aa7d 100644 --- a/docs/data-loading/load-data-using-file-protocol.md +++ b/docs/data-loading/load-data-using-file-protocol.md @@ -2,17 +2,17 @@ title: Load Data Using the File Protocol --- -# Load Data into Cloudberry Database Using the `file://` Protocol +# Load Data into Apache Cloudberry Using the `file://` Protocol -The `file://` protocol is a Cloudberry Database-specific protocol that allows you to load data from a local segment host server file into Cloudberry Database. +The `file://` protocol is a Apache Cloudberry-specific protocol that allows you to load data from a local segment host server file into Apache Cloudberry. The `file://` protocol is used in a URI that specifies the location of an operating system file. External tables that you create that specify the `file://` protocol are read-only tables. -The URI includes the host name, port, and path to the file. Each file must locate on a segment host in a location accessible by the Cloudberry Database superuser (`gpadmin`). The host name used in the URI must match a segment host name registered in the `gp_segment_configuration` system catalog table. +The URI includes the host name, port, and path to the file. Each file must locate on a segment host in a location accessible by the Apache Cloudberry superuser (`gpadmin`). The host name used in the URI must match a segment host name registered in the `gp_segment_configuration` system catalog table. The `LOCATION` clause can have multiple URIs, as shown in [Usage examples](#usage-examples). -The number of URIs you specify in the `LOCATION` clause is the number of segment instances that will work in parallel to access the external table. For each URI, Cloudberry Database assigns a primary segment on the specified host to the file. For maximum parallelism when loading data, divide the data into as many equally sized files as you have primary segments. This ensures that all segments participate in the load. The number of external files per segment host cannot exceed the number of primary segment instances on that host. For example, if your array has 4 primary segment instances per segment host, you can place 4 external files on each segment host. Tables based on the `file://` protocol can only be readable tables. +The number of URIs you specify in the `LOCATION` clause is the number of segment instances that will work in parallel to access the external table. For each URI, Apache Cloudberry assigns a primary segment on the specified host to the file. For maximum parallelism when loading data, divide the data into as many equally sized files as you have primary segments. This ensures that all segments participate in the load. The number of external files per segment host cannot exceed the number of primary segment instances on that host. For example, if your array has 4 primary segment instances per segment host, you can place 4 external files on each segment host. Tables based on the `file://` protocol can only be readable tables. The system view `pg_max_external_files` shows how many external table files are permitted per external table. This view lists the available file slots per segment host when using the `file://` protocol. The view is only applicable for the `file://` protocol. For example: diff --git a/docs/data-loading/load-data-using-gpfdist.md b/docs/data-loading/load-data-using-gpfdist.md index c36b4eb8fd..fa7714183f 100644 --- a/docs/data-loading/load-data-using-gpfdist.md +++ b/docs/data-loading/load-data-using-gpfdist.md @@ -2,11 +2,11 @@ title: Load Data Using gpfdist --- -# Load Data into Cloudberry Database Using `gpfdist` +# Load Data into Apache Cloudberry Using `gpfdist` -To load data from local host files or files accessible via internal network, you can use the `gpfdist` protocol in the `CREATE EXTERNAL TABLE` statement. gpfdist is a file server utility that runs on a host other than the Cloudberry Database coordinator or standby coordinator. `gpfdist` serves files from a directory on the host to Cloudberry Database segments. +To load data from local host files or files accessible via internal network, you can use the `gpfdist` protocol in the `CREATE EXTERNAL TABLE` statement. gpfdist is a file server utility that runs on a host other than the Apache Cloudberry coordinator or standby coordinator. `gpfdist` serves files from a directory on the host to Apache Cloudberry segments. -When external data is served by gpfdist, all segments in the Cloudberry Database system can read or write external table data in parallel. +When external data is served by gpfdist, all segments in the Apache Cloudberry system can read or write external table data in parallel. The supported data formats are: @@ -16,7 +16,7 @@ The supported data formats are: The general procedure for loading data using `gpfdist` is as follows: -1. Install gpfdist on a host other than the Cloudberry Database coordinator or standby coordinator. See [Install gpfdist](#step-1-install-gpfdist). +1. Install gpfdist on a host other than the Apache Cloudberry coordinator or standby coordinator. See [Install gpfdist](#step-1-install-gpfdist). 2. Start gpfdist on the host. See [Start and stop gpfdist](#step-2-start-and-stop-gpfdist). 3. Create an external table using the `gpfdist` protocol. See [Examples for using gpfdist with external tables](#step-3-use-gpfdist-with-external-tables-to-load-data). @@ -29,30 +29,30 @@ Before using gpfdist, you might need to know how it works. This section provides ### About gpfdist and external tables -The `gpfdist` file server utility is located in the `$GPHOME/bin` directory on your Cloudberry Database coordinator host and on each segment host. When you start a `gpfdist` instance you specify a listen port and the path to a directory containing files to read or where files are to be written. For example, this command runs `gpfdist` in the background, listening on port `8801`, and serving files in the `/home/gpadmin/external_files` directory: +The `gpfdist` file server utility is located in the `$GPHOME/bin` directory on your Apache Cloudberry coordinator host and on each segment host. When you start a `gpfdist` instance you specify a listen port and the path to a directory containing files to read or where files are to be written. For example, this command runs `gpfdist` in the background, listening on port `8801`, and serving files in the `/home/gpadmin/external_files` directory: ```shell $ gpfdist -p 8801 -d /home/gpadmin/external_files & ``` -The `CREATE EXTERNAL TABLE` command `LOCATION` clause connects an external table definition to one or more `gpfdist` instances. If the external table is readable, the `gpfdist` server reads data records from files from in specified directory, packs them into a block, and sends the block in a response to a Cloudberry Database segment's request. The segments unpack rows that they receive and distribute the rows according to the external table's distribution policy. If the external table is a writable table, segments send blocks of rows in a request to gpfdist and gpfdist writes them to the external file. +The `CREATE EXTERNAL TABLE` command `LOCATION` clause connects an external table definition to one or more `gpfdist` instances. If the external table is readable, the `gpfdist` server reads data records from files from in specified directory, packs them into a block, and sends the block in a response to a Apache Cloudberry segment's request. The segments unpack rows that they receive and distribute the rows according to the external table's distribution policy. If the external table is a writable table, segments send blocks of rows in a request to gpfdist and gpfdist writes them to the external file. External data files can contain rows in CSV format or any delimited text format supported by the `FORMAT` clause of the `CREATE EXTERNAL TABLE` command. - + For readable external tables, `gpfdist` uncompresses `gzip` (`.gz`) and `bzip2` (.`bz2`) files automatically. You can use the wildcard character (`*`) or other C-style pattern matching to denote multiple files to read. External files are assumed to be relative to the directory specified when you started the `gpfdist` instance. ### About gpfdist setup and performance -You can run `gpfdist` instances on multiple hosts and you can run multiple `gpfdist` instances on each host. This allows you to deploy `gpfdist` servers strategically so that you can attain fast data load and unload rates by utilizing all of the available network bandwidth and Cloudberry Database's parallelism. +You can run `gpfdist` instances on multiple hosts and you can run multiple `gpfdist` instances on each host. This allows you to deploy `gpfdist` servers strategically so that you can attain fast data load and unload rates by utilizing all of the available network bandwidth and Apache Cloudberry's parallelism. - Allow network traffic to use all ETL host network interfaces simultaneously. Run one instance of gpfdist for each interface on the ETL host, then declare the host name of each NIC in the `LOCATION` clause of your external table definition (see [Examples for Creating External Tables](#example-1---run-single-gpfdist-instance-on-a-single-nic-machine)). - Divide external table data equally among multiple gpfdist instances on the ETL host. For example, on an ETL system with two NICs, run two gpfdist instances (one on each NIC) to optimize data load performance and divide the external table data files evenly between the two gpfdist servers. :::tip -Use pipes (`|`) to separate formatted text when you submit files to gpfdist. Cloudberry Database encloses comma-separated text strings in single or double quotes. gpfdist has to remove the quotes to parse the strings. Using pipes to separate formatted text avoids the extra step and improves performance. +Use pipes (`|`) to separate formatted text when you submit files to gpfdist. Apache Cloudberry encloses comma-separated text strings in single or double quotes. gpfdist has to remove the quotes to parse the strings. Using pipes to separate formatted text avoids the extra step and improves performance. ::: ### Control segment parallelism @@ -63,7 +63,7 @@ The `gp_external_max_segs` server configuration parameter controls the number of ## Step 1. Install gpfdist -gpfdist is installed in `$GPHOME/bin` of your Cloudberry Database coordinator host installation. Run gpfdist on a machine other than the Cloudberry Database coordinator or standby coordinator, such as on a machine devoted to ETL processing. Running gpfdist on the coordinator or standby coordinator can have a performance impact on query execution. +gpfdist is installed in `$GPHOME/bin` of your Apache Cloudberry coordinator host installation. Run gpfdist on a machine other than the Apache Cloudberry coordinator or standby coordinator, such as on a machine devoted to ETL processing. Running gpfdist on the coordinator or standby coordinator can have a performance impact on query execution. ## Step 2. Start and stop gpfdist @@ -110,7 +110,7 @@ $ kill 3457 ## Step 3. Use gpfdist with external tables to load data -The following examples show how to use gpfdist when creating an external table to load data into Cloudberry Database. +The following examples show how to use gpfdist when creating an external table to load data into Apache Cloudberry. :::tip When using IPv6, always enclose the numeric IP addresses in square brackets. @@ -158,7 +158,7 @@ Creates a readable external table, `ext_expenses`, from all files with the txt e Uses the gpfdist protocol to create a readable external table, `ext_expenses`, from all files with the txt extension. The column delimiter is a pipe (`|`) and NULL (' ') is a space. -Access to the external table is single row error isolation mode. Input data formatting errors are captured internally in Cloudberry Database with a description of the error. You can view the errors, fix the issues, and then reload the rejected data. If the error count on a segment is greater than `5` (the `SEGMENT REJECT LIMIT` value), the entire external table operation fails and no rows are processed. +Access to the external table is single row error isolation mode. Input data formatting errors are captured internally in Apache Cloudberry with a description of the error. You can view the errors, fix the issues, and then reload the rejected data. If the error count on a segment is greater than `5` (the `SEGMENT REJECT LIMIT` value), the entire external table operation fails and no rows are processed. ```sql =# CREATE EXTERNAL TABLE ext_expenses ( name text, diff --git a/docs/data-loading/load-data-using-gpload.md b/docs/data-loading/load-data-using-gpload.md index 80c468de69..9ec7168e1b 100644 --- a/docs/data-loading/load-data-using-gpload.md +++ b/docs/data-loading/load-data-using-gpload.md @@ -2,9 +2,9 @@ title: Load Data Using gpload --- -# Load Data into Cloudberry Database Using `gpload` +# Load Data into Apache Cloudberry Using `gpload` -The `gpload` utility of Cloudberry Database loads data using readable external tables and the Cloudberry Database parallel file server (gpfdist). It handles parallel file-based external table setup and allows users to configure their data format, external table definition, and gpfdist setup in a single configuration file. +The `gpload` utility of Apache Cloudberry loads data using readable external tables and the Apache Cloudberry parallel file server (gpfdist). It handles parallel file-based external table setup and allows users to configure their data format, external table definition, and gpfdist setup in a single configuration file. :::tip In `gpload`, `MERGE` and `UPDATE` operations are not supported if the target table column name is a reserved keyword, has capital letters, or includes any character that requires quotes `" "` to identify the column. @@ -12,13 +12,13 @@ In `gpload`, `MERGE` and `UPDATE` operations are not supported if the target tab ## To use gpload -1. Ensure that your environment is set up to run `gpload`. Some dependent files from your Cloudberry Database installation are required, such as gpfdist and Python 3, as well as network access to the Cloudberry Database segment hosts. `gpload` also requires that you install the following packages: +1. Ensure that your environment is set up to run `gpload`. Some dependent files from your Apache Cloudberry installation are required, such as gpfdist and Python 3, as well as network access to the Apache Cloudberry segment hosts. `gpload` also requires that you install the following packages: ```shell pip install psycopg2 pyyaml ``` -2. Create your load control file. This is a YAML-formatted file that specifies the Cloudberry Database connection information, gpfdist configuration information, external table options, and data format. +2. Create your load control file. This is a YAML-formatted file that specifies the Apache Cloudberry connection information, gpfdist configuration information, external table options, and data format. For example: diff --git a/docs/data-types.md b/docs/data-types.md index 1e46a85569..f5dbf9d1b5 100644 --- a/docs/data-types.md +++ b/docs/data-types.md @@ -2,13 +2,13 @@ title: Data Types --- -# Cloudberry Database Data Types +# Apache Cloudberry Data Types -Cloudberry Database has a rich set of native data types available to users. This document shows some of the built-in data types. In addition to the types listed here, there are also some internally used data types, such as oid (object identifier), but those are not documented in this document. +Apache Cloudberry has a rich set of native data types available to users. This document shows some of the built-in data types. In addition to the types listed here, there are also some internally used data types, such as oid (object identifier), but those are not documented in this document. The following data types are specified by SQL: bit, bit varying, boolean, character varying, varchar, character, char, date, double precision, integer, interval, numeric, decimal, real, smallint, time (with or without time zone), and timestamp (with or without time zone). -Each data type has an external representation determined by its input and output functions. Many of the built-in types have obvious external formats. However, several types are either unique to PostgreSQL (and Cloudberry Database), such as geometric paths, or have several possibilities for formats, such as the date and time types. Some of the input and output functions are not invertible. That is, the result of an output function may lose accuracy when compared to the original input. +Each data type has an external representation determined by its input and output functions. Many of the built-in types have obvious external formats. However, several types are either unique to PostgreSQL (and Apache Cloudberry), such as geometric paths, or have several possibilities for formats, such as the date and time types. Some of the input and output functions are not invertible. That is, the result of an output function may lose accuracy when compared to the original input. | Name | Alias | Size | Range | Description | |----------------------------|------------------------|-----------------|---------------------------------------------|----------------------------------------------------------| diff --git a/docs/deploy-cbdb-with-single-node.md b/docs/deploy-cbdb-with-single-node.md index 4bae7b11f9..15c405cd98 100644 --- a/docs/deploy-cbdb-with-single-node.md +++ b/docs/deploy-cbdb-with-single-node.md @@ -2,11 +2,11 @@ title: Deploy with a Single Computing Node --- -# Deploy Cloudberry Database with a Single Computing Node (New in v1.5.0) +# Deploy Apache Cloudberry with a Single Computing Node (New in v1.5.0) -Cloudberry Database is not fully compatible with PostgreSQL, and some features and syntax are Cloudberry Database-specific. If your business already relies on Cloudberry Database and you want to use the Cloudberry Database-specific syntax and features on a single node to avoid compatibility issues with PostgreSQL, you can consider deploying Cloudberry Database free of segments. +Apache Cloudberry is not fully compatible with PostgreSQL, and some features and syntax are Apache Cloudberry-specific. If your business already relies on Apache Cloudberry and you want to use the Apache Cloudberry-specific syntax and features on a single node to avoid compatibility issues with PostgreSQL, you can consider deploying Apache Cloudberry free of segments. -Starting from v1.5.0, Cloudberry Database provides the single-computing-node deployment mode. This mode runs under the `utility` gp_role, with only one coordinator (QD) node and one coordinator standby node, without a segment node or data distribution. You can directly connect to the coordinator and run queries as if you were connecting to a regular multi-node cluster. Note that some SQL statements are not effective in this mode because data distribution does not exist, and some SQL statements are not supported. See [user behavior changes](#user-behavior-changes) for details. +Starting from v1.5.0, Apache Cloudberry provides the single-computing-node deployment mode. This mode runs under the `utility` gp_role, with only one coordinator (QD) node and one coordinator standby node, without a segment node or data distribution. You can directly connect to the coordinator and run queries as if you were connecting to a regular multi-node cluster. Note that some SQL statements are not effective in this mode because data distribution does not exist, and some SQL statements are not supported. See [user behavior changes](#user-behavior-changes) for details. ## How to deploy @@ -127,7 +127,7 @@ $echo $(expr $(getconf _PHYS_PAGES)/2 \*$(getconf PAGE_SIZE)) ##### IP segmentation settings -When the Cloudberry Database uses the UDP protocol for internal connection, the network card controls the fragmentation and reassembly of IP packets. If the size of a UDP message is larger than the maximum size of network transmission unit (MTU), the IP layer fragments the message. +When the Apache Cloudberry uses the UDP protocol for internal connection, the network card controls the fragmentation and reassembly of IP packets. If the size of a UDP message is larger than the maximum size of network transmission unit (MTU), the IP layer fragments the message. - `net.ipv4.ipfrag_high_thresh`: When the total size of IP fragments exceeds this threshold, the kernel will attempt to reorganize IP fragments. If the fragments exceed this threshold but all fragments have not arrived within the specified time, the kernel will not reorganize the fragments. This threshold is typically used to control whether larger shards are reorganized. The default value is `4194304` bytes (4 MB). - `net.ipv4.ipfrag_low_thresh`: Indicates that when the total size of IP fragments is below this threshold, the kernel will wait as long as possible for more fragments to arrive, to allow for larger reorganizations. This threshold is used to minimize unfinished reorganization operations and improve system performance. The default value is `3145728` bytes (3 MB). @@ -200,7 +200,7 @@ Edit the `/etc/security/limits.conf` file and add the following content, which w ##### Set mount options for the XFS file system -XFS is the file system for the data directory of Cloudberry Database. XFS has the following mount options: +XFS is the file system for the data directory of Apache Cloudberry. XFS has the following mount options: ``` rw,nodev,noatime,inode64 @@ -250,7 +250,7 @@ sudo/sbin/blockdev --setra16384/dev/vdc ##### I/O scheduling policy settings for disks -The disk type, operating system, and scheduling policies of Cloudberry Database are as follows: +The disk type, operating system, and scheduling policies of Apache Cloudberry are as follows: @@ -343,7 +343,7 @@ cat /sys/kernel/mm/*transparent_hugepage/enabled ##### Disable IPC object deletion -Disable IPC object deletion by setting the value of `RemoveIPC` to `no`. You can set this parameter in the `/etc/systemd/logind.conf` file of Cloudberry Database. +Disable IPC object deletion by setting the value of `RemoveIPC` to `no`. You can set this parameter in the `/etc/systemd/logind.conf` file of Apache Cloudberry. ``` RemoveIPC=no @@ -377,7 +377,7 @@ service sshd restart ##### Clock synchronization -Cloudberry Database requires the clock synchronization to be configured for all hosts, and the clock synchronization service should be started when the host starts. You can choose one of the following synchronization methods: +Apache Cloudberry requires the clock synchronization to be configured for all hosts, and the clock synchronization service should be started when the host starts. You can choose one of the following synchronization methods: - Use the coordinator node's time as the source, and other hosts synchronize the clock of the coordinator node host. - Synchronize clocks using an external clock source. @@ -396,7 +396,7 @@ After setting, you can run the following command to check the clock synchronizat systemctl status chronyd ``` -### Step 2. Install Cloudberry Database +### Step 2. Install Apache Cloudberry 1. Download the RPM package to the home directory of `gpadmin`. @@ -406,14 +406,14 @@ systemctl status chronyd 2. Install the RPM package in the `/home/gpadmin` directory. - When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry-db/` is automatically created. + When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry/` is automatically created. ```bash cd /home/gpadmin yum install ``` -3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry-db/` directory. +3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry/` directory. ```bash chown -R gpadmin:gpadmin /usr/local @@ -428,9 +428,9 @@ systemctl status chronyd ssh `hostname` # Makes sure that the local SSH connection works well. ``` -### Step 3. Deploy Cloudberry Database with a single computing node +### Step 3. Deploy Apache Cloudberry with a single computing node -Use the scripting tool [`gpdemo`](/docs/sys-utilities/gpdemo.md) to quickly deploy Cloudberry Database. `gpdemo` is included in the RPM package and will be installed in the `GPHOME/bin` directory along with the configuration scripts (gpinitsystem, gpstart, and gpstop). `gpdemo` supports quickly deploying Cloudberry Database with a single computing node. +Use the scripting tool [`gpdemo`](/docs/sys-utilities/gpdemo.md) to quickly deploy Apache Cloudberry. `gpdemo` is included in the RPM package and will be installed in the `GPHOME/bin` directory along with the configuration scripts (gpinitsystem, gpstart, and gpstop). `gpdemo` supports quickly deploying Apache Cloudberry with a single computing node. In the above [setting mount options for the XFS file system](#set-mount-options-for-the-xfs-file-system), the XFS file system's data directory is mounted on `/data0`. The following commands deploy a single-computing-node cluster in this data directory: @@ -439,13 +439,13 @@ cd /data0 NUM_PRIMARY_MIRROR_PAIRS=0 gpdemo # Uses gpdemo ``` -When `gpdemo` is running, a warning will be output `[WARNING]: -SinglenodeMode has been enabled, no segment will be created.`, which indicates that Cloudberry Database is currently being deployed in the single-computing-node mode. +When `gpdemo` is running, a warning will be output `[WARNING]: -SinglenodeMode has been enabled, no segment will be created.`, which indicates that Apache Cloudberry is currently being deployed in the single-computing-node mode. ## Common issues ### How to check the deployment mode of a cluster -Perform the following steps to confirm the deployment mode of the current Cloudberry Database cluster: +Perform the following steps to confirm the deployment mode of the current Apache Cloudberry cluster: 1. Connect to the coordinator node. 2. Execute `SHOW gp_role;` to view the operating mode of the cluster. @@ -468,11 +468,11 @@ Perform the following steps to confirm the deployment mode of the current Cloudb ## How it works -When you are deploying Cloudberry Database in the single-computing-node mode, the deployment script `gpdemo` writes `gp_internal_is_singlenode = true` to the configuration file `postgresql.conf` and starts a coordinator and a coordinator standby node with the `gp_role = utility` parameter setting. All data is written locally without a segment or data distribution. +When you are deploying Apache Cloudberry in the single-computing-node mode, the deployment script `gpdemo` writes `gp_internal_is_singlenode = true` to the configuration file `postgresql.conf` and starts a coordinator and a coordinator standby node with the `gp_role = utility` parameter setting. All data is written locally without a segment or data distribution. ## User-behavior changes -In the single-computing-node mode, the product behavior of Cloudberry Database has the following changes. You should pay attention to these changes before performing related operations: +In the single-computing-node mode, the product behavior of Apache Cloudberry has the following changes. You should pay attention to these changes before performing related operations: - When you execute `CREATE TABLE` to create a table, the `DISTRIBUTED BY` clause no longer takes effect. A warning is output: `WARNING: DISTRIBUTED BY clause has no effect in singlenode mode`. - The `SCATTER BY` clause of the `SELECT` statement is no longer effective. A warning is output: `WARNING: SCATTER BY clause has no effect in singlenode mode`. diff --git a/docs/functions/advanced-aggregate-functions.md b/docs/functions/advanced-aggregate-functions.md index d312ef5e2b..f9709cc6e0 100644 --- a/docs/functions/advanced-aggregate-functions.md +++ b/docs/functions/advanced-aggregate-functions.md @@ -2,11 +2,11 @@ title: Advanced Aggregate Functions --- -# Advanced Aggregate Functions in Cloudberry Database +# Advanced Aggregate Functions in Apache Cloudberry The following built-in advanced analytic functions are Cloudberry extensions of the PostgreSQL database. Analytic functions are *immutable*. -> **Note** The Cloudberry MADlib Extension for Analytics provides additional advanced functions to perform statistical analysis and machine learning with Cloudberry Database data. +> **Note** The Cloudberry MADlib Extension for Analytics provides additional advanced functions to perform statistical analysis and machine learning with Apache Cloudberry data. | Function | Return Type | Full Syntax | Description | |----------|-------------|-------------|-------------| diff --git a/docs/functions/index.md b/docs/functions/index.md index 58fdb1f202..77f68c66a7 100644 --- a/docs/functions/index.md +++ b/docs/functions/index.md @@ -4,7 +4,7 @@ title: Summary of Built-in Functions # Summary of Built-in Functions -Cloudberry Database supports built-in functions and operators including analytic functions and window functions that can be used in window expressions. +Apache Cloudberry supports built-in functions and operators including analytic functions and window functions that can be used in window expressions. ```mdx-code-block @@ -13,9 +13,9 @@ import DocCardList from '@theme/DocCardList'; ``` -## Cloudberry Database function types +## Apache Cloudberry function types -Cloudberry Database evaluates functions and operators used in SQL expressions. Some functions and operators are only allowed to run on the coordinator since they could lead to inconsistencies in Cloudberry Database segment instances. This table describes the Cloudberry Database Function Types. +Apache Cloudberry evaluates functions and operators used in SQL expressions. Some functions and operators are only allowed to run on the coordinator since they could lead to inconsistencies in Apache Cloudberry segment instances. This table describes the Apache Cloudberry Function Types. |Function Type|Cloudberry Support|Description|Comments| |-------------|-----------------|-----------|--------| @@ -23,7 +23,7 @@ Cloudberry Database evaluates functions and operators used in SQL expressions. S |STABLE|Yes, in most cases|Within a single table scan, returns the same result for same argument values, but results change across SQL statements.|Results depend on database lookups or parameter values. `current_timestamp` family of functions is `STABLE`; values do not change within an execution.| |VOLATILE|Restricted|Function values can change within a single table scan. For example: `random()`, `timeofday()`.|Any function with side effects is volatile, even if its result is predictable. For example: `setval()`.| -In Cloudberry Database, data is divided up across segments — each segment is a distinct PostgreSQL database. To prevent inconsistent or unexpected results, do not run functions classified as `VOLATILE` at the segment level if they contain SQL commands or modify the database in any way. For example, functions such as `setval()` are not allowed to run on distributed data in Cloudberry Database because they can cause inconsistent data between segment instances. +In Apache Cloudberry, data is divided up across segments — each segment is a distinct PostgreSQL database. To prevent inconsistent or unexpected results, do not run functions classified as `VOLATILE` at the segment level if they contain SQL commands or modify the database in any way. For example, functions such as `setval()` are not allowed to run on distributed data in Apache Cloudberry because they can cause inconsistent data between segment instances. To ensure data consistency, you can safely use `VOLATILE` and `STABLE` functions in statements that are evaluated on and run from the coordinator. For example, the following statements run on the coordinator (statements without a `FROM` clause): @@ -38,11 +38,11 @@ If a statement has a `FROM` clause containing a distributed table *and* the func SELECT * from foo(); ``` -Cloudberry Database does not support functions that return a table reference (`rangeFuncs`) or functions that use the `refCursor` datatype. +Apache Cloudberry does not support functions that return a table reference (`rangeFuncs`) or functions that use the `refCursor` datatype. ## Built-in functions and operators -The following table lists the categories of built-in functions and operators supported by PostgreSQL. All functions and operators are supported in Cloudberry Database as in PostgreSQL with the exception of `STABLE` and `VOLATILE` functions, which are subject to the restrictions noted in [Cloudberry Database Function Types](#topic27). See the [Functions and Operators](https://www.postgresql.org/docs/14/functions.html) section of the PostgreSQL documentation for more information about these built-in functions and operators. +The following table lists the categories of built-in functions and operators supported by PostgreSQL. All functions and operators are supported in Apache Cloudberry as in PostgreSQL with the exception of `STABLE` and `VOLATILE` functions, which are subject to the restrictions noted in [Apache Cloudberry Function Types](#topic27). See the [Functions and Operators](https://www.postgresql.org/docs/14/functions.html) section of the PostgreSQL documentation for more information about these built-in functions and operators. |Operator/Function Category|VOLATILE Functions|STABLE Functions|Restrictions| |--------------------------|------------------|----------------|------------| diff --git a/docs/functions/json-functions-and-operators.md b/docs/functions/json-functions-and-operators.md index fd8e26367c..5b266f151f 100644 --- a/docs/functions/json-functions-and-operators.md +++ b/docs/functions/json-functions-and-operators.md @@ -2,7 +2,7 @@ title: JSON Functions and Operators --- -# JSON Functions and Operators in Cloudberry Database +# JSON Functions and Operators in Apache Cloudberry This document describes: @@ -11,7 +11,7 @@ This document describes: ## Process and creating JSON data -Cloudberry Database includes built-in functions and operators that create and manipulate JSON data: +Apache Cloudberry includes built-in functions and operators that create and manipulate JSON data: - [JSON operators](#json-operators) - [JSON creation functions](#json-creation-functions) @@ -96,7 +96,7 @@ This table shows the functions that aggregate records to an array of JSON object ### SQL/JSON path language -SQL/JSON path expressions specify the items to be retrieved from the JSON data, similar to XPath expressions used for SQL access to XML. In Cloudberry Database, path expressions are implemented as the `jsonpath` data type. +SQL/JSON path expressions specify the items to be retrieved from the JSON data, similar to XPath expressions used for SQL access to XML. In Apache Cloudberry, path expressions are implemented as the `jsonpath` data type. JSON query functions and operators pass the provided path expression to the *path engine* for evaluation. If the expression matches the queried JSON data, the corresponding SQL/JSON item is returned. Path expressions are written in the SQL/JSON path language and can also include arithmetic expressions and functions. Query functions treat the provided expression as a text string, so it must be enclosed in single quotes. @@ -194,10 +194,10 @@ This expression returns the size of the track if it contains any segments with h ### Deviations from standard -Cloudberry Database's implementation of SQL/JSON path language has the following deviations from the SQL/JSON standard: +Apache Cloudberry's implementation of SQL/JSON path language has the following deviations from the SQL/JSON standard: -- `.datetime()` item method is not implemented yet mainly because immutable `jsonpath` functions and operators cannot reference session timezone, which is used in some datetime operations. Datetime support will be added to `jsonpath` in future versions of Cloudberry Database. -- A path expression can be a Boolean predicate, although the SQL/JSON standard allows predicates only in filters. This is necessary for implementation of the `@@` operator. For example, the following `jsonpath` expression is valid in Cloudberry Database: +- `.datetime()` item method is not implemented yet mainly because immutable `jsonpath` functions and operators cannot reference session timezone, which is used in some datetime operations. Datetime support will be added to `jsonpath` in future versions of Apache Cloudberry. +- A path expression can be a Boolean predicate, although the SQL/JSON standard allows predicates only in filters. This is necessary for implementation of the `@@` operator. For example, the following `jsonpath` expression is valid in Apache Cloudberry: ```shell '$.track.segments[*].HR < 70' @@ -252,7 +252,7 @@ SQL/JSON path expressions allow matching text to a regular expression with the ` The optional flag string may include one or more of the characters `i` for case-insensitive match, `m` to allow `^` and `$` to match at newlines, `s` to allow `.` to match a newline, and `q` to quote the whole pattern (reducing the behavior to a simple substring match). -The SQL/JSON standard borrows its definition for regular expressions from the `LIKE_REGEX` operator, which in turn uses the XQuery standard. Cloudberry Database does not currently support the `LIKE_REGEX` operator. Therefore, the `like_regex` filter is implemented using the POSIX regular expression engine as described in [POSIX Regular Expressions](https://www.postgresql.org/docs/12/functions-matching.html#FUNCTIONS-POSIX-REGEXP). This leads to various minor discrepancies from standard SQL/JSON behavior which are catalogued in [Differences From XQuery (LIKE_REGEX)](https://www.postgresql.org/docs/12/functions-matching.html#POSIX-VS-XQUERY). Note, however, that the flag-letter incompatibilities described there do not apply to SQL/JSON, as it translates the XQuery flag letters to match what the POSIX engine expects. +The SQL/JSON standard borrows its definition for regular expressions from the `LIKE_REGEX` operator, which in turn uses the XQuery standard. Apache Cloudberry does not currently support the `LIKE_REGEX` operator. Therefore, the `like_regex` filter is implemented using the POSIX regular expression engine as described in [POSIX Regular Expressions](https://www.postgresql.org/docs/12/functions-matching.html#FUNCTIONS-POSIX-REGEXP). This leads to various minor discrepancies from standard SQL/JSON behavior which are catalogued in [Differences From XQuery (LIKE_REGEX)](https://www.postgresql.org/docs/12/functions-matching.html#POSIX-VS-XQUERY). Note, however, that the flag-letter incompatibilities described there do not apply to SQL/JSON, as it translates the XQuery flag letters to match what the POSIX engine expects. Keep in mind that the pattern argument of `like_regex` is a JSON path string literal, written according to the rules given in jsonpath Type. This means in particular that any backslashes you want to use in the regular expression must be doubled. For example, to match string values of the root document that contain only digits: diff --git a/docs/functions/range-functions-and-operators.md b/docs/functions/range-functions-and-operators.md index a79cb1846d..46859f3d05 100644 --- a/docs/functions/range-functions-and-operators.md +++ b/docs/functions/range-functions-and-operators.md @@ -2,7 +2,7 @@ title: Range Functions and Operators --- -# Range Functions and Operators in Cloudberry Database +# Range Functions and Operators in Apache Cloudberry The following table shows the operators available for range types. diff --git a/docs/functions/text-search-functions-and-operators.md b/docs/functions/text-search-functions-and-operators.md index 074210380f..151e9f356c 100644 --- a/docs/functions/text-search-functions-and-operators.md +++ b/docs/functions/text-search-functions-and-operators.md @@ -2,7 +2,7 @@ title: Text Search Functions and Operators --- -# Text Search Functions and Operators in Cloudberry Database +# Text Search Functions and Operators in Apache Cloudberry The following tables summarize the functions and operators that are provided for full text searching. diff --git a/docs/functions/window-functions.md b/docs/functions/window-functions.md index 08c4fe89f5..7aa553c59e 100644 --- a/docs/functions/window-functions.md +++ b/docs/functions/window-functions.md @@ -2,9 +2,9 @@ title: Window Functions --- -# Window Functions in Cloudberry Database +# Window Functions in Apache Cloudberry -The following are Cloudberry Database built-in window functions. All window functions are *immutable*. +The following are Apache Cloudberry built-in window functions. All window functions are *immutable*. |Function|Return Type|Full Syntax|Description| |--------|-----------|-----------|-----------| diff --git a/docs/insert-update-delete-rows.md b/docs/insert-update-delete-rows.md index 8271a4e02c..47978a060e 100644 --- a/docs/insert-update-delete-rows.md +++ b/docs/insert-update-delete-rows.md @@ -2,9 +2,9 @@ title: Insert, Update, and Delete Rows --- -# Insert, Update, and Delete Row Data in Cloudberry Database +# Insert, Update, and Delete Row Data in Apache Cloudberry -This document introduces how to manipulate row data in Cloudberry Database, including: +This document introduces how to manipulate row data in Apache Cloudberry, including: - [Inserting rows](#insert-rows) - [Updating existing rows](#update-existing-rows) @@ -65,7 +65,7 @@ The above statement will automatically insert the data row into the correct part To insert large amounts of data, use external tables or the `COPY` command. These load mechanisms are more efficient than `INSERT` for inserting many rows. -The storage model of append-optimized tables in Cloudberry Database is designed for efficient bulk data loading rather than single row `INSERT` statements. For high-volume data insertions, it is recommended to use batch loading methods such as the `COPY` command. Cloudberry Database can support multiple concurrent `INSERT` transactions on append-optimized tables; however, this capability is typically intended for batch insertions rather than single-row operations. +The storage model of append-optimized tables in Apache Cloudberry is designed for efficient bulk data loading rather than single row `INSERT` statements. For high-volume data insertions, it is recommended to use batch loading methods such as the `COPY` command. Apache Cloudberry can support multiple concurrent `INSERT` transactions on append-optimized tables; however, this capability is typically intended for batch insertions rather than single-row operations. ## Update existing rows @@ -105,7 +105,7 @@ Use the `TRUNCATE` command to quickly remove all rows in a table. For example: TRUNCATE mytable; ``` -This command empties a table of all rows in one operation. Note that in Cloudberry Database, the `TRUNCATE` command will affect inherited child tables by default, even without using the `CASCADE` option. In addition, because Cloudberry Database does not support foreign key constraints, the `TRUNCATE` command will not trigger any `ON DELETE` actions or rewrite rules. The command truncates only rows in the named table. +This command empties a table of all rows in one operation. Note that in Apache Cloudberry, the `TRUNCATE` command will affect inherited child tables by default, even without using the `CASCADE` option. In addition, because Apache Cloudberry does not support foreign key constraints, the `TRUNCATE` command will not trigger any `ON DELETE` actions or rewrite rules. The command truncates only rows in the named table. ## Vacuum the database @@ -117,7 +117,7 @@ VACUUM mytable; The `VACUUM` command collects table-level statistics such as the number of rows and pages. Vacuum all tables after loading data, including append-optimized tables. -You need to use the `VACUUM`, `VACUUM FULL`, and `VACUUM ANALYZE` commands to maintain the data in a Cloudberry Database especially if updates and deletes are frequently performed on your database data. +You need to use the `VACUUM`, `VACUUM FULL`, and `VACUUM ANALYZE` commands to maintain the data in a Apache Cloudberry especially if updates and deletes are frequently performed on your database data. ## See also diff --git a/docs/performance/index.md b/docs/performance/index.md index f880448f23..eca146ad25 100644 --- a/docs/performance/index.md +++ b/docs/performance/index.md @@ -2,20 +2,20 @@ title: Query Performance Overview --- -# Query Performance in Cloudberry Database +# Query Performance in Apache Cloudberry -Cloudberry Database dynamically eliminates irrelevant partitions in a table and optimally allocates memory for different operators in a query.These enhancements scan less data for a query, accelerate query processing, and support more concurrency. +Apache Cloudberry dynamically eliminates irrelevant partitions in a table and optimally allocates memory for different operators in a query.These enhancements scan less data for a query, accelerate query processing, and support more concurrency. - Dynamic partition elimination - In Cloudberry Database, values available only when a query runs are used to dynamically prune partitions, which improves query processing speed. Enable or deactivate dynamic partition elimination by setting the server configuration parameter `gp_dynamic_partition_pruning` to `ON` or `OFF`; it is `ON` by default. + In Apache Cloudberry, values available only when a query runs are used to dynamically prune partitions, which improves query processing speed. Enable or deactivate dynamic partition elimination by setting the server configuration parameter `gp_dynamic_partition_pruning` to `ON` or `OFF`; it is `ON` by default. - Memory optimizations - Cloudberry Database allocates memory optimally for different operators in a query and frees and re-allocates memory during the stages of processing a query. + Apache Cloudberry allocates memory optimally for different operators in a query and frees and re-allocates memory during the stages of processing a query. :::info -Cloudberry Database uses GPORCA by default. GPORCA extends the planning and optimization capabilities of the Postgres optimizer. +Apache Cloudberry uses GPORCA by default. GPORCA extends the planning and optimization capabilities of the Postgres optimizer. ::: ```mdx-code-block diff --git a/docs/performance/parallel-create-ao-refresh-mv.md b/docs/performance/parallel-create-ao-refresh-mv.md index 1d9385709e..e10edacee6 100644 --- a/docs/performance/parallel-create-ao-refresh-mv.md +++ b/docs/performance/parallel-create-ao-refresh-mv.md @@ -4,7 +4,7 @@ title: Create AO/AOCO Tables and Refresh Materialized Views in Parallel # Create AO/AOCO Tables and Refresh Materialized Views in Parallel (New in v1.5.0) -Since v1.5.0, Cloudberry Database supports creating append-optimized (AO) tables and append-optimized column-oriented (AOCO) tables in parallel by using the `CREATE TABLE AS` statement, and supports refreshing materialized views in parallel based on the AO or AOCO tables. Parallel processing accelerates table creation and materialized view refresh. +Since v1.5.0, Apache Cloudberry supports creating append-optimized (AO) tables and append-optimized column-oriented (AOCO) tables in parallel by using the `CREATE TABLE AS` statement, and supports refreshing materialized views in parallel based on the AO or AOCO tables. Parallel processing accelerates table creation and materialized view refresh. To use the parallel feature, you need to first set the value of the system parameter `enable_parallel` to `ON`. @@ -126,4 +126,4 @@ You can use tools to record the refresh durations, and compare the difference in ## See also - [Use Automatic Materialized Views for Query Optimization](./use-auto-materialized-view-to-answer-queries.md) -- [Incremental Materialized View in Cloudberry Database](./use-incremental-materialized-view.md) \ No newline at end of file +- [Incremental Materialized View in Apache Cloudberry](./use-incremental-materialized-view.md) \ No newline at end of file diff --git a/docs/performance/parallel-query-execution.md b/docs/performance/parallel-query-execution.md index 74695c8b97..71b48f4828 100644 --- a/docs/performance/parallel-query-execution.md +++ b/docs/performance/parallel-query-execution.md @@ -4,7 +4,7 @@ title: Parallel Queries Execution # Use Parallel Queries Execution -This document introduces the usage scenarios, usage examples, restrictions, and common issues of parallel query execution in Cloudberry Database. When Cloudberry Database executes a query, multiple CPU cores are used to process a single query, thereby improving query performance. The database dynamically adjusts the number of computing nodes (including the `SeqScan` operator) according to the data volume change. +This document introduces the usage scenarios, usage examples, restrictions, and common issues of parallel query execution in Apache Cloudberry. When Apache Cloudberry executes a query, multiple CPU cores are used to process a single query, thereby improving query performance. The database dynamically adjusts the number of computing nodes (including the `SeqScan` operator) according to the data volume change. ## Applicable scenarios @@ -14,7 +14,7 @@ When the host CPU and disk load are not high, enabling parallel sessions for ope ## How to use -Cloudberry Database supports parallel query execution on AO/AOCO and heap tables. +Apache Cloudberry supports parallel query execution on AO/AOCO and heap tables. ### Parallel queries on heap tables @@ -88,7 +88,7 @@ Cloudberry Database supports parallel query execution on AO/AOCO and heap tables ## Frequently asked questions -- Currently, Cloudberry Database supports executing queries in parallel that contain the following operators. If a query contains other operators, it will not be executed in parallel. +- Currently, Apache Cloudberry supports executing queries in parallel that contain the following operators. If a query contains other operators, it will not be executed in parallel. ```sql sequence scan diff --git a/docs/performance/update-stats-using-analyze.md b/docs/performance/update-stats-using-analyze.md index adef09d0b1..b42813cfee 100644 --- a/docs/performance/update-stats-using-analyze.md +++ b/docs/performance/update-stats-using-analyze.md @@ -2,7 +2,7 @@ title: Update Statistics --- -# Update Statistics in Cloudberry Database +# Update Statistics in Apache Cloudberry The most important prerequisite for good query performance is to begin with accurate statistics for the tables. Updating statistics with the `ANALYZE` statement enables the query planner to generate optimal query plans. When a table is analyzed, information about the data is stored in the system catalog tables. If the stored information is out of date, the planner can generate inefficient plans. @@ -20,11 +20,11 @@ WHERE relname = 'test_analyze'; ## Generate statistics selectively -Running [`ANALYZE`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/analyze.md) with no arguments updates statistics for all tables in the database. This can be a very long-running process and it is not recommended. You should `ANALYZE` tables selectively when data has changed or use the [analyzedb](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sys-utilities/analyzedb.md) utility. +Running [`ANALYZE`](https://github.com/apache/cloudberry-site/blob/cbdb-doc-validation/docs/sql-stmts/analyze.md) with no arguments updates statistics for all tables in the database. This can be a very long-running process and it is not recommended. You should `ANALYZE` tables selectively when data has changed or use the [analyzedb](https://github.com/apache/cloudberry-site/blob/cbdb-doc-validation/docs/sys-utilities/analyzedb.md) utility. Running `ANALYZE` on a large table can take a long time. If it is not feasible to run `ANALYZE` on all columns of a very large table, you can generate statistics for selected columns only using `ANALYZE table(column, ...)`. Be sure to include columns used in joins, `WHERE` clauses, `SORT` clauses, `GROUP BY` clauses, or `HAVING` clauses. -For a partitioned table, you can run `ANALYZE` just on partitions that have changed, for example, if you add a new partition. Note that for partitioned tables, you can run `ANALYZE` on the root partitioned table, or on the leaf partitions (files where data and statistics are actually stored). In Cloudberry Database, running `ANALYZE` on a single partition of a partitioned table also updates the statistical information of the root table, indicating that statistics gathering for one partition might affect the entire partitioned table's optimizer statistics. You can find the names of the leaf partitions using the `pg_partition_tree()` function: +For a partitioned table, you can run `ANALYZE` just on partitions that have changed, for example, if you add a new partition. Note that for partitioned tables, you can run `ANALYZE` on the root partitioned table, or on the leaf partitions (files where data and statistics are actually stored). In Apache Cloudberry, running `ANALYZE` on a single partition of a partitioned table also updates the statistical information of the root table, indicating that statistics gathering for one partition might affect the entire partitioned table's optimizer statistics. You can find the names of the leaf partitions using the `pg_partition_tree()` function: ```sql SELECT * FROM pg_partition_tree( 'parent_table' ); @@ -58,7 +58,7 @@ By default, the value of `gp_autostats_mode` is `none`. Setting this parameter t Setting `gp_autostats_mode` to `on_change` triggers statistics collection only when the number of rows affected exceeds the threshold defined by `gp_autostats_on_change_threshold`, which has a default value of `2147483647`. The these operations invoked on a table by its owner can trigger automatic statistics collection with `on_change`: `CREATE TABLE AS SELECT`, `UPDATE`, `DELETE`, `INSERT`, and `COPY`. -Setting the `gp_autostats_allow_nonowner` server configuration parameter to `true` also instructs Cloudberry Database to trigger automatic statistics collection on a table when: +Setting the `gp_autostats_allow_nonowner` server configuration parameter to `true` also instructs Apache Cloudberry to trigger automatic statistics collection on a table when: - `gp_autostats_mode=on_no_stats` and the first user to `INSERT` or `COPY` into the table is a non-owner. diff --git a/docs/performance/use-aggre-pushdown-to-speed-up-queries.md b/docs/performance/use-aggre-pushdown-to-speed-up-queries.md index 0d244e9a54..7594a7e0f7 100644 --- a/docs/performance/use-aggre-pushdown-to-speed-up-queries.md +++ b/docs/performance/use-aggre-pushdown-to-speed-up-queries.md @@ -4,12 +4,12 @@ title: Use Aggregation Pushdown to Speed Up Query Execution # Use aggregation pushdown to speed up query execution -Aggregation pushdown is an optimization technique that moves the aggregation operation closer to the data source. Cloudberry Database supports pushing down aggregation operations, which means that the aggregation operator is processed before the join operator. +Aggregation pushdown is an optimization technique that moves the aggregation operation closer to the data source. Apache Cloudberry supports pushing down aggregation operations, which means that the aggregation operator is processed before the join operator. In [applicable scenarios](#applicable-scenarios), aggregation pushdown can greatly reduce the size of the input set for join or aggregation operators, thereby enhancing their performance. :::tip -- In the native PostgreSQL kernel's optimization logic, aggregation operations in each query are always performed after all join operations have been completed (excluding subqueries). Therefore, Cloudberry Database introduces aggregation pushdown, permitting the early execution of aggregation operations in applicable scenarios. +- In the native PostgreSQL kernel's optimization logic, aggregation operations in each query are always performed after all join operations have been completed (excluding subqueries). Therefore, Apache Cloudberry introduces aggregation pushdown, permitting the early execution of aggregation operations in applicable scenarios. - To determine whether the optimizer's chosen execution plan applies the aggregation pushdown optimization, check the position relationship between aggregation and join in the execution plan tree. If a plan first executes `Partial Aggregation`, then performs `Join`, and finally performs `Final Aggregation`, it indicates that the optimizer has applied aggregation pushdown. ::: @@ -49,7 +49,7 @@ Optimizer: Postgres query optimizer (13 rows) ``` -From the execution plan result of the above example, you can see that before performing the HashJoin operation, Cloudberry Database first performs an aggregation operation on table `t1` based on the `id` column. This aggregation operation does not compromise the correctness of the statement's results and is likely to reduce the amount of data entering the HashJoin, thereby improving the efficiency of the statement execution. +From the execution plan result of the above example, you can see that before performing the HashJoin operation, Apache Cloudberry first performs an aggregation operation on table `t1` based on the `id` column. This aggregation operation does not compromise the correctness of the statement's results and is likely to reduce the amount of data entering the HashJoin, thereby improving the efficiency of the statement execution. ## Applicable scenarios @@ -69,7 +69,7 @@ SELECT o.order_id, SUM(price) ``` - Execution method in the native PostgreSQL optimizer: PostgreSQL's native optimizer can perform aggregation only after joining tables. Because every item in `order_line_tbl` corresponds to an order in `order_tbl`, the Join operator will not filter out any data. -- Execution method in Cloudberry Database: assuming that each order contains an average of 10 items, the volume of data is expected to decrease tenfold after aggregation. With aggregation pushdown enabled, the database first aggregates data in `order_line_tbl` based on `order_id`, reducing the volume of data entering the Join operator by tenfold, significantly reducing the cost of the Join operator. The corresponding execution plan is as follows: +- Execution method in Apache Cloudberry: assuming that each order contains an average of 10 items, the volume of data is expected to decrease tenfold after aggregation. With aggregation pushdown enabled, the database first aggregates data in `order_line_tbl` based on `order_id`, reducing the volume of data entering the Join operator by tenfold, significantly reducing the cost of the Join operator. The corresponding execution plan is as follows: ```sql EXPLAIN SELECT o.order_id, SUM(price) @@ -100,7 +100,7 @@ SELECT proj_name, sum(cost) GROUP BY proj_name; ``` -For this query, with aggregation pushdown enabled, Cloudberry Database performs pre-aggregation on the experiment table based on the `e_pid` column, aggregating information of the same project together. +For this query, with aggregation pushdown enabled, Apache Cloudberry performs pre-aggregation on the experiment table based on the `e_pid` column, aggregating information of the same project together. However, if this query also applies many filters on the project table, this might cause a high selectivity rate for join, leading to inefficient execution. Therefore, aggregation pushdown might not be suitable in such cases. @@ -201,8 +201,8 @@ SELECT id, sum1 * cnt2 FROM WHERE AT1.id = AT2.id GROUP BY id; ``` -In this example, the aggregation operation is pushed down to both sides of the join. For all tuples in the `t1` table with `id` 100, Cloudberry Database pre-aggregates their `val` s, resulting in a corresponding `sum1`. +In this example, the aggregation operation is pushed down to both sides of the join. For all tuples in the `t1` table with `id` 100, Apache Cloudberry pre-aggregates their `val` s, resulting in a corresponding `sum1`. -During the actual join process, for each tuple in the `t2` table with `id` 100, a join is performed with the tuple belonging to `sum1`, resulting in the corresponding tuples. This means that for each `id` 100 in `t2`, `sum1` will appear once in the final sum, allowing Cloudberry Database to pre-aggregate `t2`, calculating the total number of tuples with `id` 100 as `cnt2`. The final result can then be calculated through `sum1 * cnt2`. +During the actual join process, for each tuple in the `t2` table with `id` 100, a join is performed with the tuple belonging to `sum1`, resulting in the corresponding tuples. This means that for each `id` 100 in `t2`, `sum1` will appear once in the final sum, allowing Apache Cloudberry to pre-aggregate `t2`, calculating the total number of tuples with `id` 100 as `cnt2`. The final result can then be calculated through `sum1 * cnt2`. -This scenario involves relatively complex statement and expression rewriting, and is currently not supported in Cloudberry Database. +This scenario involves relatively complex statement and expression rewriting, and is currently not supported in Apache Cloudberry. diff --git a/docs/performance/use-auto-materialized-view-to-answer-queries.md b/docs/performance/use-auto-materialized-view-to-answer-queries.md index 0348db2bb5..ef06915a63 100644 --- a/docs/performance/use-auto-materialized-view-to-answer-queries.md +++ b/docs/performance/use-auto-materialized-view-to-answer-queries.md @@ -4,7 +4,7 @@ title: Use Auto Materialized Views for Query Optimization # Use Automatic Materialized Views for Query Optimization (New in v1.5.0) -Since v1.5.0, Cloudberry Database supports automatically using materialized views to process some or all queries (called AQUMV) during the query planning phase. This feature is suitable for queries on large tables and can greatly reduce query processing time. AQUMV uses incremental materialized views (IMVs) because IMVs usually keep the latest data when related tables have write operations. +Since v1.5.0, Apache Cloudberry supports automatically using materialized views to process some or all queries (called AQUMV) during the query planning phase. This feature is suitable for queries on large tables and can greatly reduce query processing time. AQUMV uses incremental materialized views (IMVs) because IMVs usually keep the latest data when related tables have write operations. ## Usage scenarios @@ -117,7 +117,7 @@ When the same query is executed, the data can be obtained directly from the mate AQUMV implements query optimization by equivalently transforming the query tree. -Cloudberry Database automatically uses materialized views only when the table query meets the following conditions: +Apache Cloudberry automatically uses materialized views only when the table query meets the following conditions: - Materialized views must contain all the rows required by the query expression. - If the materialized view contains more rows than the query, you might need to add additional filters. @@ -146,4 +146,4 @@ When there are multiple valid materialized view candidates, or the cost of query ## See also - [Create AO/AOCO Tables and Refresh Materialized Views in Parallel](./parallel-create-ao-refresh-mv.md) -- [Incremental Materialized View in Cloudberry Database](./use-incremental-materialized-view.md) +- [Incremental Materialized View in Apache Cloudberry](./use-incremental-materialized-view.md) diff --git a/docs/performance/use-incremental-materialized-view.md b/docs/performance/use-incremental-materialized-view.md index fc0455db18..788f1f8666 100644 --- a/docs/performance/use-incremental-materialized-view.md +++ b/docs/performance/use-incremental-materialized-view.md @@ -2,11 +2,11 @@ title: Incremental Materialized View --- -# Incremental Materialized View in Cloudberry Database (New in v1.5.0) +# Incremental Materialized View in Apache Cloudberry (New in v1.5.0) -This document introduces the usage scenarios of the incremental materialized view in Cloudberry Database, how to use it, the restrictions, and the things to note. +This document introduces the usage scenarios of the incremental materialized view in Apache Cloudberry, how to use it, the restrictions, and the things to note. -The incremental materialized view is a special form of materialized view. When data changes in a base table in Cloudberry Database (such as insertion, update, and deletion), the incremental materialized view does not need to recalculate all the data in the entire view. Instead, it only updates the parts that have changed since the last refresh. This can save a lot of computing resources and time, and significantly improve performance, especially when dealing with large datasets. +The incremental materialized view is a special form of materialized view. When data changes in a base table in Apache Cloudberry (such as insertion, update, and deletion), the incremental materialized view does not need to recalculate all the data in the entire view. Instead, it only updates the parts that have changed since the last refresh. This can save a lot of computing resources and time, and significantly improve performance, especially when dealing with large datasets. :::tip Comparison between incremental materialized view and regular view: @@ -23,7 +23,7 @@ Comparison between incremental materialized view and regular view: # Usage example :::info -Before using incremental materialized views, make sure that you are using Cloudberry Database v1.5.0 or a later version. Cloudberry Database v1.4.0 and earlier versions do not support incremental materialized views. +Before using incremental materialized views, make sure that you are using Apache Cloudberry v1.5.0 or a later version. Apache Cloudberry v1.4.0 and earlier versions do not support incremental materialized views. ::: To create an incremental materialized view, you can use the SQL command `CREATE INCREMENTAL MATERIALIZED VIEW`. The complete syntax support is as follows: @@ -38,7 +38,7 @@ CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name [ WITH [ NO ] DATA ] ``` -The following example shows how to create incremental materialized views for tables in Cloudberry Database. +The following example shows how to create incremental materialized views for tables in Apache Cloudberry. 1. Create tables `t0` and `t1`. @@ -102,7 +102,7 @@ To debug the statement executions related to incremental materialized views, you # Query performance comparison with regular view -The following example compares the query performance of a regular view and that of an incremental materialized view when processing large datasets in Cloudberry Database. The following example uses the Query 15 test dataset of TPC-H. +The following example compares the query performance of a regular view and that of an incremental materialized view when processing large datasets in Apache Cloudberry. The following example uses the Query 15 test dataset of TPC-H. ## With regular view @@ -221,7 +221,7 @@ From the above table, you can see: # Restrictions and notes -Currently, the incremental materialized views on Cloudberry Database have the following usage restrictions: +Currently, the incremental materialized views on Apache Cloudberry have the following usage restrictions: - Creating incremental materialized views for Append-Optimized (AO) tables is not supported. - Creating incremental materialized views for partitioned tables is not supported. @@ -237,7 +237,7 @@ Currently, the incremental materialized views on Cloudberry Database have the fo - Creating incremental materialized views on materialized views is not supported. -In addition, when using incremental materialized views on Cloudberry Database, you need to pay attention to the following issues: +In addition, when using incremental materialized views on Apache Cloudberry, you need to pay attention to the following issues: - Using incremental materialized views will slow down data insertion, deletion, and updates. In addition, a base table might have multiple incremental materialized views, and the performance degradation is proportional to the number of incremental materialized views. - Using incremental materialized views will generate temporary files to store calculation delta views, which might take up some storage space. diff --git a/docs/performance/use-index-scan-on-ao-tables.md b/docs/performance/use-index-scan-on-ao-tables.md index 27d3751f84..5927d10517 100644 --- a/docs/performance/use-index-scan-on-ao-tables.md +++ b/docs/performance/use-index-scan-on-ao-tables.md @@ -4,22 +4,22 @@ title: Use IndexScan on AO Tables # Use IndexScan on AO Tables -Cloudberry Database supports IndexScan when querying on App-Optimized (AO) tables to improve query efficiency in certain scenarios, such as the following query: +Apache Cloudberry supports IndexScan when querying on App-Optimized (AO) tables to improve query efficiency in certain scenarios, such as the following query: ```sql SELECT * FROM tbl WHERE val > 100 ORDER BY val LIMIT 10; ``` :::tip -Append-Optimized (AO) tables are an optimized storage method for scenarios that focus on batch insertion, such as big data analysis and data warehouse scenarios. When new data is inserted into an AO table, Cloudberry Database appends the new data to the end of the table, instead of seeking free space for insertion like in regular tables. This means that inserting data into AO tables only requires append operations to the file, thus achieving higher insertion efficiency. +Append-Optimized (AO) tables are an optimized storage method for scenarios that focus on batch insertion, such as big data analysis and data warehouse scenarios. When new data is inserted into an AO table, Apache Cloudberry appends the new data to the end of the table, instead of seeking free space for insertion like in regular tables. This means that inserting data into AO tables only requires append operations to the file, thus achieving higher insertion efficiency. ::: For the above query statement: -- If you use heap table storage, Cloudberry Database executes this query by finding 10 tuples with `val` greater than `100` through IndexScan. It only needs to read about 10 tuples from the index and the table. +- If you use heap table storage, Apache Cloudberry executes this query by finding 10 tuples with `val` greater than `100` through IndexScan. It only needs to read about 10 tuples from the index and the table. - If you use the AO table storage method, and assume that the `tbl` table has 1 billion rows of tuples, we specify through the `LIMIT` clause that only 10 tuples are to be returned: - - Cloudberry Database supports using IndexScan operations to scan AO tables, which can greatly reduce the amount of data to be scanned and greatly improve the efficiency of scanning. It is a better scanning method than SeqScan and BitmapScan. SeqScan or BitmapScan scans 100 million times more data than IndexScan. + - Apache Cloudberry supports using IndexScan operations to scan AO tables, which can greatly reduce the amount of data to be scanned and greatly improve the efficiency of scanning. It is a better scanning method than SeqScan and BitmapScan. SeqScan or BitmapScan scans 100 million times more data than IndexScan. ## Applicable scenarios diff --git a/docs/performance/use-runtimefilter-to-optimize-queries.md b/docs/performance/use-runtimefilter-to-optimize-queries.md index 76ccdb0769..9ef35bbe27 100644 --- a/docs/performance/use-runtimefilter-to-optimize-queries.md +++ b/docs/performance/use-runtimefilter-to-optimize-queries.md @@ -4,18 +4,18 @@ title: Use RuntimeFilter to Optimize Join Queries # Use RuntimeFilter to Optimize Join Queries -When performing join queries on large tables, the SQL optimizer of Cloudberry Database often uses the HashJoin operator. HashJoin builds a hash table based on the join key to match the join key, which might cause a memory access bottleneck and disk bottleneck. RuntimeFilter is an optimization technique that generates filters in real-time during HashJoin operations, which can pre-filter data before executing HashJoin to speed up HashJoin operations. In some scenarios, the RuntimeFilter optimization can double execution efficiency. +When performing join queries on large tables, the SQL optimizer of Apache Cloudberry often uses the HashJoin operator. HashJoin builds a hash table based on the join key to match the join key, which might cause a memory access bottleneck and disk bottleneck. RuntimeFilter is an optimization technique that generates filters in real-time during HashJoin operations, which can pre-filter data before executing HashJoin to speed up HashJoin operations. In some scenarios, the RuntimeFilter optimization can double execution efficiency. -HashJoin is commonly used for joining a smaller table with a larger table. In Cloudberry Database, when performing HashJoin, a hash table is typically built based on the smaller table to be joined. Then, by iterating over the tuples in the larger table, it finds matches in the hash table to perform the join. The smaller table used to build the hash table is called the inner table, while the other table used for iterative matching is called the outer table. +HashJoin is commonly used for joining a smaller table with a larger table. In Apache Cloudberry, when performing HashJoin, a hash table is typically built based on the smaller table to be joined. Then, by iterating over the tuples in the larger table, it finds matches in the hash table to perform the join. The smaller table used to build the hash table is called the inner table, while the other table used for iterative matching is called the outer table. The main bottlenecks in executing the HashJoin operator are: - Memory Access: For each tuple in the outer table, a matching tuple in the hash table needs to be found, involving one or more memory accesses. - Disk I/O: If the inner table is too large to fit in memory, it must be processed in batches using disk storage, causing significant disk I/O. -With RuntimeFilter optimization enabled, Cloudberry Database builds a RuntimeFilter alongside the hash table. This means filtering the tuples of the larger table before executing HashJoin. During the process, a Bloom Filter is used to implement the RuntimeFilter, occupying much less memory space than the hash table. When the Bloom Filter can be fully stored in the L3 cache, its filtering efficiency is twice that of HashJoin, greatly reducing memory access costs. +With RuntimeFilter optimization enabled, Apache Cloudberry builds a RuntimeFilter alongside the hash table. This means filtering the tuples of the larger table before executing HashJoin. During the process, a Bloom Filter is used to implement the RuntimeFilter, occupying much less memory space than the hash table. When the Bloom Filter can be fully stored in the L3 cache, its filtering efficiency is twice that of HashJoin, greatly reducing memory access costs. -The optimization will determine whether to generate a RuntimeFilter operator based on the filtering rate of the connection conditions of HashJoin and the size of the inner table. If the data volume deviates too much from the estimated result during the actual execution, Cloudberry Database will also stop using RuntimeFilter in time. +The optimization will determine whether to generate a RuntimeFilter operator based on the filtering rate of the connection conditions of HashJoin and the size of the inner table. If the data volume deviates too much from the estimated result during the actual execution, Apache Cloudberry will also stop using RuntimeFilter in time. ## Applicable scenarios @@ -24,7 +24,7 @@ If your scenario meets all the following conditions, you can consider using Runt - The size of the inner table in a single segment is within 16 million rows. - The original HashJoin join key has a selection rate lower than 60%, which means the result set meeting the Hash join condition is less than 60% of the outer table. This can also be understood as a filter rate greater than 40%. -In the above scenarios, the size of the Bloom Filter built by Cloudberry Database through RuntimeFilter is within 2 MB. It can be fully stored in the L3 cache, enabling the filtration of 40% of the tuples in the outer table with minimal overhead, thus yielding a positive impact. In certain scenarios, if the selectivity of the HashJoin join key is below 10%, the efficiency of execution can be doubled by optimizing with RuntimeFilter. +In the above scenarios, the size of the Bloom Filter built by Apache Cloudberry through RuntimeFilter is within 2 MB. It can be fully stored in the L3 cache, enabling the filtration of 40% of the tuples in the outer table with minimal overhead, thus yielding a positive impact. In certain scenarios, if the selectivity of the HashJoin join key is below 10%, the efficiency of execution can be doubled by optimizing with RuntimeFilter. ## Limitations diff --git a/docs/performance/use-unique-index-on-ao-tables.md b/docs/performance/use-unique-index-on-ao-tables.md index 5b424ccb7d..3a1d065576 100644 --- a/docs/performance/use-unique-index-on-ao-tables.md +++ b/docs/performance/use-unique-index-on-ao-tables.md @@ -4,11 +4,11 @@ title: Create Unique Index on AO Table # Create Unique Index on AO Table (New in v1.5.0) -Starting from v1.5.0, you can create a unique index on an Append-Optimized (AO) or Append-Optimized Column Store (AOCS) table in Cloudberry Database. With a unique index, Cloudberry Database checks the unique constraint when data is inserted into the AO table to ensure the uniqueness of the data. At the same time, the database optimizes specific queries with the optimizer to improve the query performance. However, this also brings some overhead for maintaining a unique index, especially when inserting data. +Starting from v1.5.0, you can create a unique index on an Append-Optimized (AO) or Append-Optimized Column Store (AOCS) table in Apache Cloudberry. With a unique index, Apache Cloudberry checks the unique constraint when data is inserted into the AO table to ensure the uniqueness of the data. At the same time, the database optimizes specific queries with the optimizer to improve the query performance. However, this also brings some overhead for maintaining a unique index, especially when inserting data. ## How it works -For scenarios where a unique index exists in an AO table, when a tuple is inserted into the AO table, Cloudberry Database inserts a placeholder into the auxiliary index structure BlockDirectory of the AO table, which blocks the insertion of the same key and achieves a unique index. +For scenarios where a unique index exists in an AO table, when a tuple is inserted into the AO table, Apache Cloudberry inserts a placeholder into the auxiliary index structure BlockDirectory of the AO table, which blocks the insertion of the same key and achieves a unique index. ## How to add a unique index on an AO table diff --git a/docs/security/client-auth.md b/docs/security/client-auth.md index ff9f2bf8df..badd1ec776 100644 --- a/docs/security/client-auth.md +++ b/docs/security/client-auth.md @@ -6,13 +6,13 @@ title: Configure Client Authentication This topic explains how to configure client connections and authentication for Apache Cloudberry. -When a Cloudberry Database system is first initialized, the system contains one predefined *superuser* role. This role will have the same name as the operating system user who initialized the Cloudberry Database system. This role is referred to as `gpadmin`. By default, the system is configured to only allow local connections to the database from the `gpadmin` role. If you want to allow any other roles to connect, or if you want to allow connections from remote hosts, you have to configure Cloudberry Database to allow such connections. This section explains how to configure client connections and authentication to Cloudberry Database. +When a Apache Cloudberry system is first initialized, the system contains one predefined *superuser* role. This role will have the same name as the operating system user who initialized the Apache Cloudberry system. This role is referred to as `gpadmin`. By default, the system is configured to only allow local connections to the database from the `gpadmin` role. If you want to allow any other roles to connect, or if you want to allow connections from remote hosts, you have to configure Apache Cloudberry to allow such connections. This section explains how to configure client connections and authentication to Apache Cloudberry. -## Allowing Connections to Cloudberry Database +## Allowing Connections to Apache Cloudberry Client access and authentication is controlled by a configuration file named `pg_hba.conf` (the standard PostgreSQL host-based authentication file). For detailed information about this file, see [The pg_hba.conf File](https://www.postgresql.org/docs/14/auth-pg-hba-conf.html) in the PostgreSQL documentation. -In Cloudberry Database, the `pg_hba.conf` file of the coordinator instance controls client access and authentication to your Cloudberry system. The segments also have `pg_hba.conf` files, but these are already correctly configured to only allow client connections from the coordinator host. The segments never accept outside client connections, so there is no need to alter the `pg_hba.conf` file on segments. +In Apache Cloudberry, the `pg_hba.conf` file of the coordinator instance controls client access and authentication to your Cloudberry system. The segments also have `pg_hba.conf` files, but these are already correctly configured to only allow client connections from the coordinator host. The segments never accept outside client connections, so there is no need to alter the `pg_hba.conf` file on segments. The general format of the `pg_hba.conf` file is a set of records, one per line. Blank lines are ignored, as is any text after a \# comment character. A record is made up of a number of fields which are separated by spaces and/or tabs. Fields can contain white space if the field value is quoted. Records cannot be continued across lines. @@ -36,7 +36,7 @@ Matches connection attempts using UNIX-domain sockets. Without a record of this **`host`** -Matches connection attempts made using TCP/IP. Remote TCP/IP connections will not be possible unless the server is started with an appropriate value for the `listen_addresses` server configuration parameter. Cloudberry Database by default allows connections from all hosts (`'*'`). +Matches connection attempts made using TCP/IP. Remote TCP/IP connections will not be possible unless the server is started with an appropriate value for the `listen_addresses` server configuration parameter. Apache Cloudberry by default allows connections from all hosts (`'*'`). **`hostssl`** @@ -107,10 +107,10 @@ For a more secure system, remove records for remote connections that use `trust` ## Editing the pg_hba.conf File -Initially, the `pg_hba.conf` file is set up with generous permissions for the gpadmin user and no database access for other Cloudberry Database roles. You will need to edit the `pg_hba.conf` file to enable users' access to databases and to secure the gpadmin user. Consider removing entries that have `trust` authentication, since they allow anyone with access to the server to connect with any role they choose. For local (UNIX socket) connections, use `ident` authentication, which requires the operating system user to match the role specified. For local and remote TCP connections, `ident` authentication requires the client's host to run an indent service. You could install an ident service on the coordinator host and then use `ident` authentication for local TCP connections, for example `127.0.0.1/28`. Using `ident` authentication for remote TCP connections is less secure because it requires you to trust the integrity of the ident service on the client's host. +Initially, the `pg_hba.conf` file is set up with generous permissions for the gpadmin user and no database access for other Apache Cloudberry roles. You will need to edit the `pg_hba.conf` file to enable users' access to databases and to secure the gpadmin user. Consider removing entries that have `trust` authentication, since they allow anyone with access to the server to connect with any role they choose. For local (UNIX socket) connections, use `ident` authentication, which requires the operating system user to match the role specified. For local and remote TCP connections, `ident` authentication requires the client's host to run an indent service. You could install an ident service on the coordinator host and then use `ident` authentication for local TCP connections, for example `127.0.0.1/28`. Using `ident` authentication for remote TCP connections is less secure because it requires you to trust the integrity of the ident service on the client's host. :::info -Cloudberry Command Center provides an interface for editing the `pg_hba.conf` file. It verifies entries before you save them, keeps a version history of the file so that you can reload a previous version of the file, and reloads the file into Cloudberry Database. +Cloudberry Command Center provides an interface for editing the `pg_hba.conf` file. It verifies entries before you save them, keeps a version history of the file so that you can reload a previous version of the file, and reloads the file into Apache Cloudberry. ::: This example shows how to edit the `pg_hba.conf` file on the coordinator host to allow remote client access to all databases from all roles using encrypted password authentication. @@ -219,7 +219,7 @@ Authentication method: ### OpenSSL Configuration -You can make changes to the OpenSSL configuration by updating the `openssl.cnf` file under your OpenSSL installation directory, or the file referenced by `$OPENSSL_CONF`, if present, and then restarting the Cloudberry Database server. +You can make changes to the OpenSSL configuration by updating the `openssl.cnf` file under your OpenSSL installation directory, or the file referenced by `$OPENSSL_CONF`, if present, and then restarting the Apache Cloudberry server. ### Creating a Self-Signed Certificate @@ -274,14 +274,14 @@ The following Server settings need to be specified in the `postgresql.conf` con It is possible to have authentication without encryption overhead by using `NULL-SHA` or `NULL-MD5` ciphers. However, a man-in-the-middle could read and pass communications between client and server. Also, encryption overhead is minimal compared to the overhead of authentication. For these reasons, NULL ciphers should not be used. ::: -The default location for the following SSL server files is the Cloudberry Database coordinator data directory (`$COORDINATOR_DATA_DIRECTORY`): +The default location for the following SSL server files is the Apache Cloudberry coordinator data directory (`$COORDINATOR_DATA_DIRECTORY`): - `server.crt` - Server certificate. - `server.key` - Server private key. - `root.crt` - Trusted certificate authorities. - `root.crl` - Certificates revoked by certificate authorities. -If Cloudberry Database coordinator mirroring is enabled with SSL client authentication, the SSL server files *should not be placed* in the default directory `$COORDINATOR_DATA_DIRECTORY`. If a `gpinitstandby` operation is performed, the contents of `$COORDINATOR_DATA_DIRECTORY` is copied from the coordinator to the standby coordinator and the incorrect SSL key, and cert files (the coordinator files, and not the standby coordinator files) will prevent standby coordinator start up. +If Apache Cloudberry coordinator mirroring is enabled with SSL client authentication, the SSL server files *should not be placed* in the default directory `$COORDINATOR_DATA_DIRECTORY`. If a `gpinitstandby` operation is performed, the contents of `$COORDINATOR_DATA_DIRECTORY` is copied from the coordinator to the standby coordinator and the incorrect SSL key, and cert files (the coordinator files, and not the standby coordinator files) will prevent standby coordinator start up. You can specify a different directory for the location of the SSL server files with the `postgresql.conf` parameters `sslcert`, `sslkey`, `sslrootcert`, and `sslcrl`. @@ -337,9 +337,9 @@ psql "sslmode=verify-ca host=localhost dbname=postgres" ## Limiting Concurrent Connections -Cloudberry Database allocates some resources on a per-connection basis, so setting the maximum number of connections allowed is recommended. +Apache Cloudberry allocates some resources on a per-connection basis, so setting the maximum number of connections allowed is recommended. -To limit the number of active concurrent sessions to your Cloudberry Database system, you can configure the `max_connections` server configuration parameter. This is a *local* parameter, meaning that you must set it in the `postgresql.conf` file of the coordinator, the standby coordinator, and each segment instance (primary and mirror). The recommended value of `max_connections` on segments is 5-10 times the value on the coordinator. +To limit the number of active concurrent sessions to your Apache Cloudberry system, you can configure the `max_connections` server configuration parameter. This is a *local* parameter, meaning that you must set it in the `postgresql.conf` file of the coordinator, the standby coordinator, and each segment instance (primary and mirror). The recommended value of `max_connections` on segments is 5-10 times the value on the coordinator. When you set `max_connections`, you must also set the dependent parameter `max_prepared_transactions`. This value must be at least as large as the value of `max_connections` on the coordinator, and segment instances should be set to the same value as the coordinator. @@ -360,11 +360,11 @@ For example: ``` -The following steps set the parameter values with the Cloudberry Database utility `gpconfig`. +The following steps set the parameter values with the Apache Cloudberry utility `gpconfig`. ### To change the number of allowed connections -1. Log into the Cloudberry Database coordinator host as the Cloudberry Database administrator and source the file `$GPHOME/greenplum_path.sh`. +1. Log into the Apache Cloudberry coordinator host as the Apache Cloudberry administrator and source the file `$GPHOME/greenplum_path.sh`. 2. Set the value of the `max_connections` parameter. This `gpconfig` command sets the value on the segments to 1000 and the value on the coordinator to 200. ```shell @@ -381,7 +381,7 @@ The following steps set the parameter values with the Cloudberry Database utilit The value of `max_prepared_transactions` must be greater than or equal to `max_connections` on the coordinator. -4. Stop and restart your Cloudberry Database system. +4. Stop and restart your Apache Cloudberry system. ```shell $ gpstop -r @@ -394,32 +394,32 @@ The following steps set the parameter values with the Cloudberry Database utilit ``` :::info -Raising the values of these parameters may cause Cloudberry Database to request more shared memory. To mitigate this effect, consider decreasing other memory-related parameters such as `gp_cached_segworkers_threshold`. +Raising the values of these parameters may cause Apache Cloudberry to request more shared memory. To mitigate this effect, consider decreasing other memory-related parameters such as `gp_cached_segworkers_threshold`. ::: ## Encrypting Client/Server Connections -Enable SSL for client connections to Cloudberry Database to encrypt the data passed over the network between the client and the database. +Enable SSL for client connections to Apache Cloudberry to encrypt the data passed over the network between the client and the database. -Cloudberry Database has native support for SSL connections between the client and the coordinator server. SSL connections prevent third parties from snooping on the packets, and also prevent man-in-the-middle attacks. SSL should be used whenever the client connection goes through an insecure link, and must be used whenever client certificate authentication is used. +Apache Cloudberry has native support for SSL connections between the client and the coordinator server. SSL connections prevent third parties from snooping on the packets, and also prevent man-in-the-middle attacks. SSL should be used whenever the client connection goes through an insecure link, and must be used whenever client certificate authentication is used. -Enabling Cloudberry Database in SSL mode requires the following items. +Enabling Apache Cloudberry in SSL mode requires the following items. - OpenSSL installed on both the client and the coordinator server hosts (coordinator and standby coordinator). - The SSL files server.key (server private key) and server.crt (server certificate) should be correctly generated for the coordinator host and standby coordinator host. - - The private key should not be protected with a passphrase. The server does not prompt for a passphrase for the private key, and Cloudberry Database start up fails with an error if one is required. + - The private key should not be protected with a passphrase. The server does not prompt for a passphrase for the private key, and Apache Cloudberry start up fails with an error if one is required. - On a production system, there should be a key and certificate pair for the coordinator host and a pair for the standby coordinator host with a subject CN (Common Name) for the coordinator host and standby coordinator host. A self-signed certificate can be used for testing, but a certificate signed by a certificate authority (CA) should be used in production, so the client can verify the identity of the server. Either a global or local CA can be used. If all the clients are local to the organization, a local CA is recommended. -- Ensure that Cloudberry Database can access server.key and server.crt, and any additional authentication files such as `root.crt` (for trusted certificate authorities). When starting in SSL mode, the Cloudberry Database coordinator looks for server.key and server.crt. As the default, Cloudberry Database does not start if the files are not in the coordinator data directory (`$COORDINATOR_DATA_DIRECTORY`). Also, if you use other SSL authentication files such as `root.crt` (trusted certificate authorities), the files must be on the coordinator host. +- Ensure that Apache Cloudberry can access server.key and server.crt, and any additional authentication files such as `root.crt` (for trusted certificate authorities). When starting in SSL mode, the Apache Cloudberry coordinator looks for server.key and server.crt. As the default, Apache Cloudberry does not start if the files are not in the coordinator data directory (`$COORDINATOR_DATA_DIRECTORY`). Also, if you use other SSL authentication files such as `root.crt` (trusted certificate authorities), the files must be on the coordinator host. - If Cloudberry Database coordinator mirroring is enabled with SSL client authentication, SSL authentication files must be on both the coordinator host and standby coordinator host and *should not be placed* in the default directory `$COORDINATOR_DATA_DIRECTORY`. When coordinator mirroring is enabled, an `initstandby` operation copies the contents of the `$COORDINATOR_DATA_DIRECTORY` from the coordinator to the standby coordinator and the incorrect SSL key, and cert files (the coordinator files, and not the standby coordinator files) will prevent standby coordinator start up. + If Apache Cloudberry coordinator mirroring is enabled with SSL client authentication, SSL authentication files must be on both the coordinator host and standby coordinator host and *should not be placed* in the default directory `$COORDINATOR_DATA_DIRECTORY`. When coordinator mirroring is enabled, an `initstandby` operation copies the contents of the `$COORDINATOR_DATA_DIRECTORY` from the coordinator to the standby coordinator and the incorrect SSL key, and cert files (the coordinator files, and not the standby coordinator files) will prevent standby coordinator start up. You can specify a different directory for the location of the SSL server files with the `postgresql.conf` parameters `sslcert`, `sslkey`, `sslrootcert`, and `sslcrl`. -Cloudberry Database can be started with SSL enabled by setting the server configuration parameter `ssl=on` in the `postgresql.conf` file on the coordinator and standby coordinator hosts. This `gpconfig` command sets the parameter: +Apache Cloudberry can be started with SSL enabled by setting the server configuration parameter `ssl=on` in the `postgresql.conf` file on the coordinator and standby coordinator hosts. This `gpconfig` command sets the parameter: ```shell gpconfig -c ssl -m on -v off @@ -439,7 +439,7 @@ Enter the information requested by the prompts. Be sure to enter the local host The program will generate a key that is passphrase protected, and does not accept a passphrase that is less than four characters long. -To use this certificate with Cloudberry Database, remove the passphrase with the following commands: +To use this certificate with Apache Cloudberry, remove the passphrase with the following commands: ``` # openssl rsa -in privkey.pem -out server.key diff --git a/docs/security/manage-roles-and-privileges.md b/docs/security/manage-roles-and-privileges.md index 457eabbda8..1389f7fbca 100644 --- a/docs/security/manage-roles-and-privileges.md +++ b/docs/security/manage-roles-and-privileges.md @@ -2,26 +2,26 @@ title: Manage Roles and Privileges --- -# Manage Roles and Privileges in Cloudberry Database +# Manage Roles and Privileges in Apache Cloudberry -The Cloudberry Database authorization mechanism stores roles and privileges to access database objects in the database and is administered using SQL statements or command-line utilities. +The Apache Cloudberry authorization mechanism stores roles and privileges to access database objects in the database and is administered using SQL statements or command-line utilities. -Cloudberry Database manages database access privileges using *roles*. The concept of roles subsumes the concepts of *users* and *groups*. A role can be a database user, a group, or both. Roles can own database objects (for example, tables) and can assign privileges on those objects to other roles to control access to the objects. Roles can be members of other roles, thus a member role can inherit the object privileges of its parent role. +Apache Cloudberry manages database access privileges using *roles*. The concept of roles subsumes the concepts of *users* and *groups*. A role can be a database user, a group, or both. Roles can own database objects (for example, tables) and can assign privileges on those objects to other roles to control access to the objects. Roles can be members of other roles, thus a member role can inherit the object privileges of its parent role. -Every Cloudberry Database system contains a set of database roles (users and groups). Those roles are separate from the users and groups managed by the operating system on which the server runs. However, for convenience you may want to maintain a relationship between operating system user names and Cloudberry Database role names, since many of the client applications use the current operating system user name as the default. +Every Apache Cloudberry system contains a set of database roles (users and groups). Those roles are separate from the users and groups managed by the operating system on which the server runs. However, for convenience you may want to maintain a relationship between operating system user names and Apache Cloudberry role names, since many of the client applications use the current operating system user name as the default. -In Cloudberry Database, users log in and connect through the coordinator instance, which then verifies their role and access privileges. The coordinator then issues commands to the segment instances behind the scenes as the currently logged-in role. +In Apache Cloudberry, users log in and connect through the coordinator instance, which then verifies their role and access privileges. The coordinator then issues commands to the segment instances behind the scenes as the currently logged-in role. Roles are defined at the system level, meaning they are valid for all databases in the system. -In order to bootstrap the Cloudberry Database system, a freshly initialized system always contains one predefined *superuser* role (also referred to as the system user). This role will have the same name as the operating system user that initialized the Cloudberry Database system. Customarily, this role is named `gpadmin`. In order to create more roles you first have to connect as this initial role. +In order to bootstrap the Apache Cloudberry system, a freshly initialized system always contains one predefined *superuser* role (also referred to as the system user). This role will have the same name as the operating system user that initialized the Apache Cloudberry system. Customarily, this role is named `gpadmin`. In order to create more roles you first have to connect as this initial role. ## Security best practices for roles and privileges -- **Secure the gpadmin system user.** Cloudberry Database requires a UNIX user ID to install and initialize the Cloudberry Database system. This system user is referred to as `gpadmin` in the Cloudberry Database documentation. This `gpadmin` user is the default database superuser in Cloudberry Database, as well as the file system owner of the Cloudberry Database installation and its underlying data files. This default administrator account is fundamental to the design of Cloudberry Database. The system cannot run without it, and there is no way to limit the access of this gpadmin user ID. Use roles to manage who has access to the database for specific purposes. You should only use the `gpadmin` account for system maintenance tasks such as expansion and upgrade. Anyone who logs on to a Cloudberry Database host as this user ID can read, alter or delete any data, including system catalog data and database access rights. Therefore, it is very important to secure the gpadmin user ID and only provide access to essential system administrators. Administrators should only log in to Cloudberry Database as `gpadmin` when performing certain system maintenance tasks (such as upgrade or expansion). Database users should never log on as `gpadmin`, and ETL or production workloads should never run as `gpadmin`. -- **Assign a distinct role to each user that logs in.** For logging and auditing purposes, each user that is allowed to log in to Cloudberry Database should be given their own database role. For applications or web services, consider creating a distinct role for each application or service. See [Create New Roles (Users)](#create-new-roles-users). +- **Secure the gpadmin system user.** Apache Cloudberry requires a UNIX user ID to install and initialize the Apache Cloudberry system. This system user is referred to as `gpadmin` in the Apache Cloudberry documentation. This `gpadmin` user is the default database superuser in Apache Cloudberry, as well as the file system owner of the Apache Cloudberry installation and its underlying data files. This default administrator account is fundamental to the design of Apache Cloudberry. The system cannot run without it, and there is no way to limit the access of this gpadmin user ID. Use roles to manage who has access to the database for specific purposes. You should only use the `gpadmin` account for system maintenance tasks such as expansion and upgrade. Anyone who logs on to a Apache Cloudberry host as this user ID can read, alter or delete any data, including system catalog data and database access rights. Therefore, it is very important to secure the gpadmin user ID and only provide access to essential system administrators. Administrators should only log in to Apache Cloudberry as `gpadmin` when performing certain system maintenance tasks (such as upgrade or expansion). Database users should never log on as `gpadmin`, and ETL or production workloads should never run as `gpadmin`. +- **Assign a distinct role to each user that logs in.** For logging and auditing purposes, each user that is allowed to log in to Apache Cloudberry should be given their own database role. For applications or web services, consider creating a distinct role for each application or service. See [Create New Roles (Users)](#create-new-roles-users). - **Use groups to manage access privileges.** See [Role membership](#role-membership). -- **Limit users who have the SUPERUSER role attribute.** Roles that are superusers bypass all access privilege checks in Cloudberry Database, as well as resource queuing. Only system administrators should be given superuser rights. See [Altering Role Attributes](#alter-role-attributes). +- **Limit users who have the SUPERUSER role attribute.** Roles that are superusers bypass all access privilege checks in Apache Cloudberry, as well as resource queuing. Only system administrators should be given superuser rights. See [Altering Role Attributes](#alter-role-attributes). ## Create new roles (users) @@ -65,7 +65,7 @@ A role can also have role-specific defaults for many of the server configuration ## Role membership -It is frequently convenient to group users together to ease management of object privileges: that way, privileges can be granted to, or revoked from, a group as a whole. In Cloudberry Database, this is done by creating a role that represents the group, and then granting membership in the group role to individual user roles. +It is frequently convenient to group users together to ease management of object privileges: that way, privileges can be granted to, or revoked from, a group as a whole. In Apache Cloudberry, this is done by creating a role that represents the group, and then granting membership in the group role to individual user roles. Use the `CREATE ROLE` SQL command to create a new group role. For example: @@ -96,7 +96,7 @@ The role attributes `LOGIN`, `SUPERUSER`, `CREATEDB`, `CREATEROLE`, `CREATEEXTTA ## Manage object privileges -When an object (table, view, sequence, database, function, language, schema, or tablespace) is created, it is assigned an owner. The owner is normally the role that ran the creation statement. For most kinds of objects, the initial state is that only the owner (or a superuser) can do anything with the object. To allow other roles to use it, privileges must be granted. Cloudberry Database supports the following privileges for each object type: +When an object (table, view, sequence, database, function, language, schema, or tablespace) is created, it is assigned an owner. The owner is normally the role that ran the creation statement. For most kinds of objects, the initial state is that only the owner (or a superuser) can do anything with the object. To allow other roles to use it, privileges must be granted. Apache Cloudberry supports the following privileges for each object type: | Object Type | privileges | | :-----------------------------| :--------------------------------------------------------------------------------------| @@ -145,7 +145,7 @@ You can also use the `DROP OWNED` and `REASSIGN OWNED` commands for managing obj ## Encrypt data -Cloudberry Database is installed with an optional module of encryption/decryption functions called `pgcrypto`. The `pgcrypto` functions allow database administrators to store certain columns of data in encrypted form. This adds an extra layer of protection for sensitive data, as data stored in Cloudberry Database in encrypted form cannot be read by anyone who does not have the encryption key, nor can it be read directly from the disks. +Apache Cloudberry is installed with an optional module of encryption/decryption functions called `pgcrypto`. The `pgcrypto` functions allow database administrators to store certain columns of data in encrypted form. This adds an extra layer of protection for sensitive data, as data stored in Apache Cloudberry in encrypted form cannot be read by anyone who does not have the encryption key, nor can it be read directly from the disks. :::info The `pgcrypto` functions run inside the database server, which means that all the data and passwords move between `pgcrypto` and the client application in clear-text. diff --git a/docs/security/protect-passwords.md b/docs/security/protect-passwords.md index a73a6f68a4..7c822768ac 100644 --- a/docs/security/protect-passwords.md +++ b/docs/security/protect-passwords.md @@ -2,9 +2,9 @@ title: Protect Passwords --- -# Protect passwords in Cloudberry Database +# Protect passwords in Apache Cloudberry -In its default configuration, Cloudberry Database saves MD5 or SCRAM-SHA-256 hashes of login users' passwords in the `pg_authid` system catalog rather than saving clear text passwords. Anyone who is able to view the `pg_authid` table can see hash strings, but no passwords. This also ensures that passwords are obscured when the database is dumped to backup files. +In its default configuration, Apache Cloudberry saves MD5 or SCRAM-SHA-256 hashes of login users' passwords in the `pg_authid` system catalog rather than saving clear text passwords. Anyone who is able to view the `pg_authid` table can see hash strings, but no passwords. This also ensures that passwords are obscured when the database is dumped to backup files. The hash function runs when the password is set by using any of the following commands: diff --git a/docs/security/set-password-profile.md b/docs/security/set-password-profile.md index a5a08816c0..5fd7ace988 100644 --- a/docs/security/set-password-profile.md +++ b/docs/security/set-password-profile.md @@ -2,12 +2,12 @@ title: Set Password Profile --- -# Set password policy in Cloudberry Database (New in v1.5.0) +# Set password policy in Apache Cloudberry (New in v1.5.0) -Profile refers to the password policy configuration, which is used to control the password security policy of users in Cloudberry Database. You can bind a profile to one or more users to control the password security policy of database users. Profile defines the rules for user management and password reuse. With Profile, the database administrator can use SQL to force some constraints, such as locking accounts after login failures or controlling the number of password reuses. +Profile refers to the password policy configuration, which is used to control the password security policy of users in Apache Cloudberry. You can bind a profile to one or more users to control the password security policy of database users. Profile defines the rules for user management and password reuse. With Profile, the database administrator can use SQL to force some constraints, such as locking accounts after login failures or controlling the number of password reuses. :::info -- In general, Profile includes password policy and user resource usage restrictions. Profile in Cloudberry Database only supports password policy. "Profile" mentioned in this document refers to password policy configuration. +- In general, Profile includes password policy and user resource usage restrictions. Profile in Apache Cloudberry only supports password policy. "Profile" mentioned in this document refers to password policy configuration. - Only superusers can create or modify Profile policies, and superusers are not restricted by any Profile policies. Profile policies will take effect only when regular users are allowed to use Profile. ::: @@ -24,7 +24,7 @@ gpstop -ra ## Implementation principle -Similar to the Autovacuum mechanism, Profile introduces the Login Monitor Launcher and Login Monitor Worker processes. When user login verification fails, Cloudberry Database will send a signal to the postmaster. After receiving the signal, the postmaster will send a signal to the launcher process. After receiving the signal, the launcher process will notify the postmaster to launch a worker process to perform the metadata write-back operation, and notify the user process and the launcher process after completion. +Similar to the Autovacuum mechanism, Profile introduces the Login Monitor Launcher and Login Monitor Worker processes. When user login verification fails, Apache Cloudberry will send a signal to the postmaster. After receiving the signal, the postmaster will send a signal to the launcher process. After receiving the signal, the launcher process will notify the postmaster to launch a worker process to perform the metadata write-back operation, and notify the user process and the launcher process after completion. ## Set password policies using SQL @@ -126,7 +126,7 @@ ALTER USER user ACCOUNT ## Check password policy information in system tables -After applying the password configuration policy, Cloudberry Database will update metadata: add two system tables `pg_profile` and `pg_password_history`, and add some fields to the system tables/views `pg_authid` and `pg_roles`. For example: +After applying the password configuration policy, Apache Cloudberry will update metadata: add two system tables `pg_profile` and `pg_password_history`, and add some fields to the system tables/views `pg_authid` and `pg_roles`. For example: - **pg_catalog.pg_roles**: In `pg_roles`, the `rolprofile`, `rolaccountstatus`, and `rolfailedlogins` fields are added to record database users who use Profile, the account status, and the number of failed logins. @@ -261,7 +261,7 @@ After applying the password configuration policy, Cloudberry Database will updat ## Default password policy -When you create a user, Cloudberry Database applies the default Profile to the user by default if no specific password policy is specified. The default Profile is the default password policy during system initialization. The default Profile in Cloudberry Database is the `pg_default` row in the `pg_profile` table. The `pg_default` row defines default values for the Profile parameters, and only superusers can modify these parameters. +When you create a user, Apache Cloudberry applies the default Profile to the user by default if no specific password policy is specified. The default Profile is the default password policy during system initialization. The default Profile in Apache Cloudberry is the `pg_default` row in the `pg_profile` table. The `pg_default` row defines default values for the Profile parameters, and only superusers can modify these parameters. If a user sets a parameter with the default value `-1`, the parameter will get its value from `pg_default`. The default values of `pg_default` are as follows. Refer to [Scenario 3](#scenario-3-use-default-profile) for how to use the default Profile. @@ -444,7 +444,7 @@ If `PASSWORD_REUSE_MAX` is set to `0`, the password can never be changed. If set ### Scenario 3: Use DEFAULT PROFILE -If you do not explicitly specify a parameter value when creating a profile, the parameter value in the `pg_profile` table is `-1` by default, which means that Cloudberry Database will obtain the value of this parameter from `pg_default`. +If you do not explicitly specify a parameter value when creating a profile, the parameter value in the `pg_profile` table is `-1` by default, which means that Apache Cloudberry will obtain the value of this parameter from `pg_default`. Take `FAILED_LOGIN_ATTEMPTS` as an example: diff --git a/docs/security/transparent-data-encryption.md b/docs/security/transparent-data-encryption.md index 301ff3ca10..bfe43b278e 100644 --- a/docs/security/transparent-data-encryption.md +++ b/docs/security/transparent-data-encryption.md @@ -4,7 +4,7 @@ title: Transparent Data Encryption # Transparent Data Encryption -To meet the requirements for protecting user data security, Cloudberry Database supports Transparent Data Encryption (TDE). +To meet the requirements for protecting user data security, Apache Cloudberry supports Transparent Data Encryption (TDE). TDE is a technology used to encrypt database data files: @@ -32,7 +32,7 @@ Encryption algorithms are divided into the following types: - Symmetric encryption: The same key is used for both encryption and decryption. - Asymmetric encryption: Public key for encryption, private key for decryption, suitable for one-to-many and many-to-one encryption needs. -Block encryption algorithms in symmetric encryption are the mainstream choice, offering better performance than stream encryption and asymmetric encryption. Cloudberry Database supports two block encryption algorithms: AES and SM4. +Block encryption algorithms in symmetric encryption are the mainstream choice, offering better performance than stream encryption and asymmetric encryption. Apache Cloudberry supports two block encryption algorithms: AES and SM4. #### AES encryption algorithm @@ -56,10 +56,10 @@ More ISO/IEC encryption algorithms include: Before using the TDE feature, ensure the following conditions are met: -- Install OpenSSL: OpenSSL is expected to be installed on the Cloudberry Database node. Typically, Linux distributions come with OpenSSL pre-installed. -- Cloudberry Database version: Make sure your Cloudberry Database version is not less than v1.6.0, which is when TDE support was introduced. +- Install OpenSSL: OpenSSL is expected to be installed on the Apache Cloudberry node. Typically, Linux distributions come with OpenSSL pre-installed. +- Apache Cloudberry version: Make sure your Apache Cloudberry version is not less than v1.6.0, which is when TDE support was introduced. -When deploying Cloudberry Database, you can enable the TDE feature through settings, making all subsequent data encryption operations completely transparent to users. To enable TDE during database initialization, use the `gpinitsystem` command with the `-T` parameter. Cloudberry Database supports two encryption algorithms: AES and SM4. Here are examples of enabling TDE: +When deploying Apache Cloudberry, you can enable the TDE feature through settings, making all subsequent data encryption operations completely transparent to users. To enable TDE during database initialization, use the `gpinitsystem` command with the `-T` parameter. Apache Cloudberry supports two encryption algorithms: AES and SM4. Here are examples of enabling TDE: - Using the AES256 encryption algorithm: diff --git a/docs/sql-stmts/abort.md b/docs/sql-stmts/abort.md index 189c6ed4c6..9d1f9aa795 100644 --- a/docs/sql-stmts/abort.md +++ b/docs/sql-stmts/abort.md @@ -43,7 +43,7 @@ ABORT; ## Compatibility -This command is a Cloudberry Database extension present for historical reasons. `ROLLBACK` is the equivalent standard SQL command. +This command is a Apache Cloudberry extension present for historical reasons. `ROLLBACK` is the equivalent standard SQL command. ## See also diff --git a/docs/sql-stmts/alter-database.md b/docs/sql-stmts/alter-database.md index 923ee8f41a..32340c9ba2 100644 --- a/docs/sql-stmts/alter-database.md +++ b/docs/sql-stmts/alter-database.md @@ -41,7 +41,7 @@ The third form changes the owner of the database. To alter the owner, you must o The fourth form changes the default tablespace of the database. Only the database owner or a superuser can do this; you must also have create privilege for the new tablespace. This command physically moves any tables or indexes in the database's old default tablespace to the new tablespace. The new default tablespace must be empty for this database, and no one can be connected to the database. Note that tables and indexes in non-default tablespaces are not affected. -The remaining forms change the session default for a configuration parameter for a Cloudberry Database. Whenever a new session is subsequently started in that database, the specified value becomes the session default value. The database-specific default overrides whatever setting is present in the server configuration file (`postgresql.conf`). Only the database owner or a superuser can change the session defaults for a database. Certain parameters cannot be set this way, or can only be set by a superuser. +The remaining forms change the session default for a configuration parameter for a Apache Cloudberry. Whenever a new session is subsequently started in that database, the specified value becomes the session default value. The database-specific default overrides whatever setting is present in the server configuration file (`postgresql.conf`). Only the database owner or a superuser can change the session defaults for a database. Certain parameters cannot be set this way, or can only be set by a superuser. ## Parameters @@ -55,7 +55,7 @@ If `false`, then no one can connect to this database. **`connlimit`** -The maximum number of concurrent connections allowed to this database on the coordinator. The default is `-1`, no limit. Cloudberry Database superusers are exempt from this limit. +The maximum number of concurrent connections allowed to this database on the coordinator. The default is `-1`, no limit. Apache Cloudberry superusers are exempt from this limit. **`istemplate`** @@ -101,7 +101,7 @@ ALTER DATABASE mydatabase SET search_path TO myschema, public, pg_catalog; ## Compatibility -The `ALTER DATABASE` statement is a Cloudberry Database extension. +The `ALTER DATABASE` statement is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/alter-default-privileges.md b/docs/sql-stmts/alter-default-privileges.md index be72297742..a6d9491cb5 100644 --- a/docs/sql-stmts/alter-default-privileges.md +++ b/docs/sql-stmts/alter-default-privileges.md @@ -74,7 +74,7 @@ REVOKE [ GRANT OPTION FOR ] ## Description -`ALTER DEFAULT PRIVILEGES` allows you to set the privileges that will be applied to objects created in the future. (It does not affect privileges assigned to already-existing objects.) Currently, only the privileges for schemas, tables (including views and foreign tables), sequences, functions, and types (including domains) can be altered. For this command, functions include aggregates and procedures. The words `FUNCTIONS` and `ROUTINES` are equivalent in this command. (`ROUTINES` is preferred going forward as the standard term for functions and procedures taken together. In earlier Cloudberry Database releases, only the word `FUNCTIONS` was allowed. It is not possible to set default privileges for functions and procedures separately.) +`ALTER DEFAULT PRIVILEGES` allows you to set the privileges that will be applied to objects created in the future. (It does not affect privileges assigned to already-existing objects.) Currently, only the privileges for schemas, tables (including views and foreign tables), sequences, functions, and types (including domains) can be altered. For this command, functions include aggregates and procedures. The words `FUNCTIONS` and `ROUTINES` are equivalent in this command. (`ROUTINES` is preferred going forward as the standard term for functions and procedures taken together. In earlier Apache Cloudberry releases, only the word `FUNCTIONS` was allowed. It is not possible to set default privileges for functions and procedures separately.) You can change default privileges only for objects that will be created by yourself or by roles that you are a member of. The privileges can be set globally (i.e., for all objects created in the current database), or just for objects created in specified schemas. diff --git a/docs/sql-stmts/alter-domain.md b/docs/sql-stmts/alter-domain.md index 0c78f4b335..5a5f82c541 100644 --- a/docs/sql-stmts/alter-domain.md +++ b/docs/sql-stmts/alter-domain.md @@ -132,7 +132,7 @@ ALTER DOMAIN zipcode SET SCHEMA customers; ## Compatibility -`ALTER DOMAIN` conforms to the SQL standard, except for the `OWNER`, `RENAME`, `SET SCHEMA`, and `VALIDATE CONSTRAINT` variants, which are Cloudberry Database extensions. The `NOT VALID` clause of the `ADD CONSTRAINT` variant is also a Cloudberry Database extension. +`ALTER DOMAIN` conforms to the SQL standard, except for the `OWNER`, `RENAME`, `SET SCHEMA`, and `VALIDATE CONSTRAINT` variants, which are Apache Cloudberry extensions. The `NOT VALID` clause of the `ADD CONSTRAINT` variant is also a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/alter-extension.md b/docs/sql-stmts/alter-extension.md index b5e9d6a015..05230a99d0 100644 --- a/docs/sql-stmts/alter-extension.md +++ b/docs/sql-stmts/alter-extension.md @@ -158,7 +158,7 @@ ALTER EXTENSION hstore ADD FUNCTION populate_record(anyelement, hstore); ## Compatibility -`ALTER EXTENSION` is a Cloudberry Database extension. +`ALTER EXTENSION` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/alter-external-table.md b/docs/sql-stmts/alter-external-table.md index f362fce709..2a45e1144e 100644 --- a/docs/sql-stmts/alter-external-table.md +++ b/docs/sql-stmts/alter-external-table.md @@ -26,7 +26,7 @@ where action is one of: `ALTER EXTERNAL TABLE` changes the definition of an existing external table. These are the supported `ALTER EXTERNAL TABLE` actions: - **ADD COLUMN** — Adds a new column to the external table definition. -- **DROP COLUMN** — Drops a column from the external table definition. If you drop readable external table columns, it only changes the table definition in Cloudberry Database. The `CASCADE` keyword is required if anything outside the table depends on the column, such as a view that references the column. +- **DROP COLUMN** — Drops a column from the external table definition. If you drop readable external table columns, it only changes the table definition in Apache Cloudberry. The `CASCADE` keyword is required if anything outside the table depends on the column, such as a view that references the column. - **ALTER COLUMN TYPE** — Changes the data type of a table column. - **OWNER** — Changes the owner of the external table to the specified user. @@ -95,7 +95,7 @@ ALTER EXTERNAL TABLE ext_leads ALTER COLUMN acct_code TYPE integer; ## Compatibility -`ALTER EXTERNAL TABLE` is a Cloudberry Database extension. There is no `ALTER EXTERNAL TABLE` statement in the SQL standard or regular PostgreSQL. +`ALTER EXTERNAL TABLE` is a Apache Cloudberry extension. There is no `ALTER EXTERNAL TABLE` statement in the SQL standard or regular PostgreSQL. ## See also diff --git a/docs/sql-stmts/alter-foreign-data-wrapper.md b/docs/sql-stmts/alter-foreign-data-wrapper.md index 20bbfb1b8d..868cf73bc8 100644 --- a/docs/sql-stmts/alter-foreign-data-wrapper.md +++ b/docs/sql-stmts/alter-foreign-data-wrapper.md @@ -20,7 +20,7 @@ ALTER FOREIGN DATA WRAPPER RENAME TO ## Description -`ALTER FOREIGN DATA WRAPPER` changes the definition of a foreign-data wrapper. The first form of the command changes the support functions or generic options of the foreign-data wrapper. Cloudberry Database requires at least one clause. The second and third forms of the command change the owner or name of the foreign-data wrapper. +`ALTER FOREIGN DATA WRAPPER` changes the definition of a foreign-data wrapper. The first form of the command changes the support functions or generic options of the foreign-data wrapper. Apache Cloudberry requires at least one clause. The second and third forms of the command change the owner or name of the foreign-data wrapper. Only superusers can alter foreign-data wrappers. Additionally, only superusers can own foreign-data wrappers @@ -44,7 +44,7 @@ Specifies that the foreign-data wrapper should no longer have a handler function Specifies a new validator function for the foreign-data wrapper. -Note that it is possible that pre-existing options of the foreign-data wrapper, or of dependent servers, user mappings, or foreign tables, may become invalid when you change the validator function. Cloudberry Database does not check for this. You must make sure that these options are correct before using the modified foreign-data wrapper. However, Cloudberry Database will check any options specified in this `ALTER FOREIGN DATA WRAPPER` command using the new validator. +Note that it is possible that pre-existing options of the foreign-data wrapper, or of dependent servers, user mappings, or foreign tables, may become invalid when you change the validator function. Apache Cloudberry does not check for this. You must make sure that these options are correct before using the modified foreign-data wrapper. However, Apache Cloudberry will check any options specified in this `ALTER FOREIGN DATA WRAPPER` command using the new validator. **`NO VALIDATOR`** @@ -52,7 +52,7 @@ Specifies that the foreign-data wrapper should no longer have a validator functi **`OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ] )`** -Change the foreign-data wrapper's options. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique. Cloudberry Database validates names and values using the foreign-data wrapper's validator function, if any. +Change the foreign-data wrapper's options. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique. Apache Cloudberry validates names and values using the foreign-data wrapper's validator function, if any. **`OWNER TO new_owner`** @@ -78,7 +78,7 @@ ALTER FOREIGN DATA WRAPPER dbi VALIDATOR bob.myvalidator; ## Compatibility -`ALTER FOREIGN DATA WRAPPER` conforms to ISO/IEC 9075-9 (SQL/MED), with the exception that the `HANDLER`, `VALIDATOR`, `OWNER TO`, and `RENAME TO` clauses are Cloudberry Database extensions. +`ALTER FOREIGN DATA WRAPPER` conforms to ISO/IEC 9075-9 (SQL/MED), with the exception that the `HANDLER`, `VALIDATOR`, `OWNER TO`, and `RENAME TO` clauses are Apache Cloudberry extensions. ## See also diff --git a/docs/sql-stmts/alter-foreign-table.md b/docs/sql-stmts/alter-foreign-table.md index 27ad19223a..4ce6be914e 100644 --- a/docs/sql-stmts/alter-foreign-table.md +++ b/docs/sql-stmts/alter-foreign-table.md @@ -55,15 +55,15 @@ This form adds a new column to the foreign table, using the same syntax as [CREA **`DROP COLUMN [ IF EXISTS ]`** -This form drops a column from a foreign table. You must specify `CASCADE` if any objects outside of the table depend on the column; for example, views. If you specify `IF EXISTS` and the column does not exist, no error is thrown. In this case, Cloudberry Database issues a notice instead. +This form drops a column from a foreign table. You must specify `CASCADE` if any objects outside of the table depend on the column; for example, views. If you specify `IF EXISTS` and the column does not exist, no error is thrown. In this case, Apache Cloudberry issues a notice instead. **`IF EXISTS`** -If you specify `IF EXISTS` and the foreign table does not exist, no error is thrown. Cloudberry Database issues a notice instead. +If you specify `IF EXISTS` and the foreign table does not exist, no error is thrown. Apache Cloudberry issues a notice instead. **`SET DATA TYPE`** -This form changes the type of a column of a foreign table. Again, this has no effect on any underlying storage: this action simply changes the type that Cloudberry Database believes the column to have. +This form changes the type of a column of a foreign table. Again, this has no effect on any underlying storage: this action simply changes the type that Apache Cloudberry believes the column to have. **`SET/DROP DEFAULT`** @@ -130,9 +130,9 @@ This form moves the foreign table into another schema. **`OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ] )`** -Change options for the foreign table. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique (although it's OK for a table option and a column option to have the same name). Cloudberry Database also validates names and values using the server's foreign-data wrapper. +Change options for the foreign table. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique (although it's OK for a table option and a column option to have the same name). Apache Cloudberry also validates names and values using the server's foreign-data wrapper. -You can combine all of the actions except `RENAME` and `SET SCHEMA` into a list of multiple alterations for Cloudberry Database to apply in parallel. For example, it is possible to add several columns and/or alter the type of several columns in a single command. +You can combine all of the actions except `RENAME` and `SET SCHEMA` into a list of multiple alterations for Apache Cloudberry to apply in parallel. For example, it is possible to add several columns and/or alter the type of several columns in a single command. If the command is written as `ALTER FOREIGN TABLE IF EXISTS ...` and the foreign table does not exist, no error is thrown. A notice is issued in this case. @@ -225,7 +225,7 @@ ALTER FOREIGN TABLE myschema.distributors ## Compatibility -The forms `ADD`, `DROP`, and `SET DATA TYPE` conform with the SQL standard. The other forms are Cloudberry Database extensions of the SQL standard. The ability to specify more than one manipulation in a single `ALTER FOREIGN TABLE` command is also a Cloudberry Database extension. +The forms `ADD`, `DROP`, and `SET DATA TYPE` conform with the SQL standard. The other forms are Apache Cloudberry extensions of the SQL standard. The ability to specify more than one manipulation in a single `ALTER FOREIGN TABLE` command is also a Apache Cloudberry extension. You can use `ALTER FOREIGN TABLE ... DROP COLUMN` to drop the only column of a foreign table, leaving a zero-column table. This is an extension of SQL, which disallows zero-column foreign tables. diff --git a/docs/sql-stmts/alter-function.md b/docs/sql-stmts/alter-function.md index 1226c00844..b662ec7bd0 100644 --- a/docs/sql-stmts/alter-function.md +++ b/docs/sql-stmts/alter-function.md @@ -113,13 +113,13 @@ Change whether the function is considered leakproof or not. See [CREATE FUNCTION The `EXECUTE ON` attributes specify where (coordinator or segment instance) a function runs when it is invoked during the query execution process. -`EXECUTE ON ANY` (the default) indicates that the function can be run on the coordinator, or any segment instance, and it returns the same result regardless of where it is run. Cloudberry Database determines where the function runs. +`EXECUTE ON ANY` (the default) indicates that the function can be run on the coordinator, or any segment instance, and it returns the same result regardless of where it is run. Apache Cloudberry determines where the function runs. `EXECUTE ON COORDINATOR` indicates that the function must run only on the coordinator instance. `EXECUTE ON ALL SEGMENTS` indicates that the function must run on all primary segment instances, but not the coordinator, for each invocation. The overall result of the function is the `UNION ALL` of the results from all segment instances. -`EXECUTE ON INITPLAN` indicates that the function contains an SQL command that dispatches queries to the segment instances and requires special processing on the coordinator instance by Cloudberry Database when possible. +`EXECUTE ON INITPLAN` indicates that the function contains an SQL command that dispatches queries to the segment instances and requires special processing on the coordinator instance by Apache Cloudberry when possible. For more information about the `EXECUTE ON` attributes, see [CREATE FUNCTION](/docs/sql-stmts/create-function.md). @@ -149,7 +149,7 @@ Ignored for conformance with the SQL standard. ## Notes -Cloudberry Database has limitations on the use of functions defined as `STABLE` or `VOLATILE`. See [CREATE FUNCTION](/docs/sql-stmts/create-function.md) for more information. +Apache Cloudberry has limitations on the use of functions defined as `STABLE` or `VOLATILE`. See [CREATE FUNCTION](/docs/sql-stmts/create-function.md) for more information. ## Examples @@ -193,7 +193,7 @@ The function will now execute with whatever search path is used by its caller. ## Compatibility -This statement is partially compatible with the `ALTER FUNCTION` statement in the SQL standard. The standard allows more properties of a function to be modified, but does not provide the ability to rename a function, make a function a security definer, attach configuration parameter values to a function, or change the owner, schema, or volatility of a function. The standard also requires the `RESTRICT` key word, which is optional in Cloudberry Database. +This statement is partially compatible with the `ALTER FUNCTION` statement in the SQL standard. The standard allows more properties of a function to be modified, but does not provide the ability to rename a function, make a function a security definer, attach configuration parameter values to a function, or change the owner, schema, or volatility of a function. The standard also requires the `RESTRICT` key word, which is optional in Apache Cloudberry. ## See also diff --git a/docs/sql-stmts/alter-index.md b/docs/sql-stmts/alter-index.md index 2de5cc7dea..5d0a64b253 100644 --- a/docs/sql-stmts/alter-index.md +++ b/docs/sql-stmts/alter-index.md @@ -59,7 +59,7 @@ This form sets the per-column statistics-gathering target for subsequent `ANALYZ **`IF EXISTS`** -Do not throw an error if the index does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the index does not exist. Apache Cloudberry issues a notice in this case. **`column_number`** @@ -123,7 +123,7 @@ ALTER INDEX coord_idx ALTER COLUMN 3 SET STATISTICS 1000; ## Compatibility -`ALTER INDEX` is a Cloudberry Database extension to the SQL standard. +`ALTER INDEX` is a Apache Cloudberry extension to the SQL standard. ## See also diff --git a/docs/sql-stmts/alter-materialized-view.md b/docs/sql-stmts/alter-materialized-view.md index a201b09c39..a6fa62e535 100644 --- a/docs/sql-stmts/alter-materialized-view.md +++ b/docs/sql-stmts/alter-materialized-view.md @@ -85,7 +85,7 @@ ALTER MATERIALIZED VIEW foo RENAME TO bar; ## Compatibility -`ALTER MATERIALIZED VIEW` is a Cloudberry Database extension of the SQL standard. +`ALTER MATERIALIZED VIEW` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/alter-operator-family.md b/docs/sql-stmts/alter-operator-family.md index cf56251e2c..6934b379bf 100644 --- a/docs/sql-stmts/alter-operator-family.md +++ b/docs/sql-stmts/alter-operator-family.md @@ -102,7 +102,7 @@ Because the index machinery does not check access permissions on functions befor The operators should not be defined by SQL functions. A SQL function is likely to be inlined into the calling query, which will prevent the optimizer from recognizing that the query matches an index. -Before Cloudberry Database 6.0, the `OPERATOR` clause could include a `RECHECK` option. This option is no longer supported. Cloudberry Database now determines whether an index operator is "lossy" on-the-fly at run time. This allows more efficient handling of cases where an operator might or might not be lossy. +Before Apache Cloudberry 6.0, the `OPERATOR` clause could include a `RECHECK` option. This option is no longer supported. Apache Cloudberry now determines whether an index operator is "lossy" on-the-fly at run time. This allows more efficient handling of cases where an operator might or might not be lossy. ## Examples diff --git a/docs/sql-stmts/alter-policy.md b/docs/sql-stmts/alter-policy.md index e96bf2a0cb..ce81a06b3f 100644 --- a/docs/sql-stmts/alter-policy.md +++ b/docs/sql-stmts/alter-policy.md @@ -53,7 +53,7 @@ The `WITH CHECK` expression for the policy. See [CREATE POLICY](/docs/sql-stmts/ ## Compatibility -`ALTER POLICY` is a Cloudberry Database extension to the SQL standard. +`ALTER POLICY` is a Apache Cloudberry extension to the SQL standard. ## See also diff --git a/docs/sql-stmts/alter-procedure.md b/docs/sql-stmts/alter-procedure.md index 2e1083df24..9b839b133c 100644 --- a/docs/sql-stmts/alter-procedure.md +++ b/docs/sql-stmts/alter-procedure.md @@ -92,7 +92,7 @@ Ignored for conformance with the SQL standard. ## Notes -Cloudberry Database has limitations on the use of functions defined as `STABLE` or `VOLATILE`. See [CREATE FUNCTION](/docs/sql-stmts/create-function.md) for more information. +Apache Cloudberry has limitations on the use of functions defined as `STABLE` or `VOLATILE`. See [CREATE FUNCTION](/docs/sql-stmts/create-function.md) for more information. ## Examples @@ -136,7 +136,7 @@ The procedure will now execute with whatever search path is used by its caller. ## Compatibility -This statement is partially compatible with the `ALTER PROCEDURE` statement in the SQL standard. The standard allows more properties of a procedure to be modified, but does not provide the ability to rename a procedure, make a procedure a security definer, attach configuration parameter values to a procedure, or change the owner, schema, or volatility of a procedure. The standard also requires the `RESTRICT` key word, which is optional in Cloudberry Database. +This statement is partially compatible with the `ALTER PROCEDURE` statement in the SQL standard. The standard allows more properties of a procedure to be modified, but does not provide the ability to rename a procedure, make a procedure a security definer, attach configuration parameter values to a procedure, or change the owner, schema, or volatility of a procedure. The standard also requires the `RESTRICT` key word, which is optional in Apache Cloudberry. ## See also diff --git a/docs/sql-stmts/alter-resource-group.md b/docs/sql-stmts/alter-resource-group.md index a7c97a99a6..bf82af1db8 100644 --- a/docs/sql-stmts/alter-resource-group.md +++ b/docs/sql-stmts/alter-resource-group.md @@ -30,7 +30,7 @@ You can set or reset the concurrency limit of a resource group that you create f When you alter the CPU resource management mode or limit of a resource group, the new mode or limit is immediately applied. -When you alter a memory limit of a resource group that you create for roles, the new resource limit is immediately applied if current resource usage is less than or equal to the new value and there are no running transactions in the resource group. If the current resource usage exceeds the new memory limit value, or if there are running transactions in other resource groups that hold some of the resource, then Cloudberry Database defers assigning the new limit until resource usage falls within the range of the new value. +When you alter a memory limit of a resource group that you create for roles, the new resource limit is immediately applied if current resource usage is less than or equal to the new value and there are no running transactions in the resource group. If the current resource usage exceeds the new memory limit value, or if there are running transactions in other resource groups that hold some of the resource, then Apache Cloudberry defers assigning the new limit until resource usage falls within the range of the new value. When you increase the memory limit of a resource group that you create for external components, the new resource limit is phased in as system memory resources become available. If you decrease the memory limit of a resource group that you create for external components, the behavior is component-specific. For example, if you decrease the memory limit of a resource group that you create for a PL/Container runtime, queries in a running container may fail with an out of memory error. @@ -66,7 +66,7 @@ The scheduling priority of the current group. The value range is `1-500`, the de Specify cores as a comma-separated list of single core numbers or core number intervals. Define the coordinator host cores first, followed by segment host cores, and separate the two with a semicolon. You must enclose the full core configuration in single quotes. For example, '1;1,3-4' configures core 1 for the coordinator host, and cores 1, 3, and 4 for the segment hosts. -> **Note** You can configure `CPUSET` for a resource group only after you have enabled resource group-based resource management for your Cloudberry Database cluster. +> **Note** You can configure `CPUSET` for a resource group only after you have enabled resource group-based resource management for your Apache Cloudberry cluster. **`MEMORY_LIMIT integer`** @@ -120,7 +120,7 @@ ALTER RESOURCE GROUP rgroup5 SET CPUSET '1;1'; ## Compatibility -The `ALTER RESOURCE GROUP` statement is a Cloudberry Database extension. This command does not exist in standard PostgreSQL. +The `ALTER RESOURCE GROUP` statement is a Apache Cloudberry extension. This command does not exist in standard PostgreSQL. ## See also diff --git a/docs/sql-stmts/alter-resource-queue.md b/docs/sql-stmts/alter-resource-queue.md index 4f31011177..9f379baab3 100644 --- a/docs/sql-stmts/alter-resource-queue.md +++ b/docs/sql-stmts/alter-resource-queue.md @@ -77,9 +77,9 @@ Sets the priority of queries associated with a resource queue. Queries or statem ## Notes -GPORCA and the Postgres planner utilize different query costing models and may compute different costs for the same query. The Cloudberry Database resource queue resource management scheme neither differentiates nor aligns costs between GPORCA and the Postgres Planner; it uses the literal cost value returned from the optimizer to throttle queries. +GPORCA and the Postgres planner utilize different query costing models and may compute different costs for the same query. The Apache Cloudberry resource queue resource management scheme neither differentiates nor aligns costs between GPORCA and the Postgres Planner; it uses the literal cost value returned from the optimizer to throttle queries. -When resource queue-based resource management is active, use the `MEMORY_LIMIT` and `ACTIVE_STATEMENTS` limits for resource queues rather than configuring cost-based limits. Even when using GPORCA, Cloudberry Database may fall back to using the Postgres Planner for certain queries, so using cost-based limits can lead to unexpected results. +When resource queue-based resource management is active, use the `MEMORY_LIMIT` and `ACTIVE_STATEMENTS` limits for resource queues rather than configuring cost-based limits. Even when using GPORCA, Apache Cloudberry may fall back to using the Postgres Planner for certain queries, so using cost-based limits can lead to unexpected results. ## Examples @@ -123,7 +123,7 @@ ALTER RESOURCE QUEUE myqueue WITHOUT (MAX_COST, MEMORY_LIMIT); ## Compatibility -The `ALTER RESOURCE QUEUE` statement is a Cloudberry Database extension. This command does not exist in standard PostgreSQL. +The `ALTER RESOURCE QUEUE` statement is a Apache Cloudberry extension. This command does not exist in standard PostgreSQL. ## See also diff --git a/docs/sql-stmts/alter-role.md b/docs/sql-stmts/alter-role.md index 877a1501be..209469e3bb 100644 --- a/docs/sql-stmts/alter-role.md +++ b/docs/sql-stmts/alter-role.md @@ -54,7 +54,7 @@ where `` can be: ## Description -`ALTER ROLE` changes the attributes of a Cloudberry Database role. There are several variants of this command. +`ALTER ROLE` changes the attributes of a Apache Cloudberry role. There are several variants of this command. **`WITH option`** @@ -228,7 +228,7 @@ ALTER ROLE parttime_user RESOURCE GROUP rg_light; ## Compatibility -The `ALTER ROLE` statement is a Cloudberry Database extension. +The `ALTER ROLE` statement is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/alter-routine.md b/docs/sql-stmts/alter-routine.md index 9a46a65415..dbea3aed7e 100644 --- a/docs/sql-stmts/alter-routine.md +++ b/docs/sql-stmts/alter-routine.md @@ -55,7 +55,7 @@ This command will work independent of whether `foo` is an aggregate, function, o ## Compatibility -This statement is partially compatible with the `ALTER ROUTINE` statement in the SQL standard. Refer to [ALTER FUNCTION](/docs/sql-stmts/alter-function.md) and [ALTER PROCEDURE](/docs/sql-stmts/alter-procedure.md) for more details. Allowing routine names to refer to aggregate functions is a Cloudberry Database extension. +This statement is partially compatible with the `ALTER ROUTINE` statement in the SQL standard. Refer to [ALTER FUNCTION](/docs/sql-stmts/alter-function.md) and [ALTER PROCEDURE](/docs/sql-stmts/alter-procedure.md) for more details. Allowing routine names to refer to aggregate functions is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/alter-rule.md b/docs/sql-stmts/alter-rule.md index 8de57866b1..2ff99ed02a 100644 --- a/docs/sql-stmts/alter-rule.md +++ b/docs/sql-stmts/alter-rule.md @@ -42,7 +42,7 @@ ALTER RULE notify_all ON emp RENAME TO notify_me; ## Compatibility -`ALTER RULE` is a Cloudberry Database extension, as is the entire query rewrite system. +`ALTER RULE` is a Apache Cloudberry extension, as is the entire query rewrite system. ## See also diff --git a/docs/sql-stmts/alter-sequence.md b/docs/sql-stmts/alter-sequence.md index df03e0df4e..1d5efeed86 100644 --- a/docs/sql-stmts/alter-sequence.md +++ b/docs/sql-stmts/alter-sequence.md @@ -42,13 +42,13 @@ The name (optionally schema-qualified) of a sequence to be altered. **`IF EXISTS`** -Do not throw an error if the sequence does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the sequence does not exist. Apache Cloudberry issues a notice in this case. **`data_type`** The optional clause `AS data_type` changes the data type of the sequence. Valid types are `smallint`, `integer`, and `bigint`. -Changing the data type automatically changes the minimum and maximum values of the sequence if and only if the previous minimum and maximum values were the minimum or maximum value of the old data type (in other words, if the sequence had been created using `NO MINVALUE` or `NO MAXVALUE`, implicitly or explicitly). Otherwise, the minimum and maximum values are preserved, unless new values are given as part of the same command. If the minimum and maximum values do not fit into the new data type, Cloudberry Database generates an error. +Changing the data type automatically changes the minimum and maximum values of the sequence if and only if the previous minimum and maximum values were the minimum or maximum value of the old data type (in other words, if the sequence had been created using `NO MINVALUE` or `NO MAXVALUE`, implicitly or explicitly). Otherwise, the minimum and maximum values are preserved, unless new values are given as part of the same command. If the minimum and maximum values do not fit into the new data type, Apache Cloudberry generates an error. **`increment`** @@ -78,7 +78,7 @@ In contrast to a `setval()` call, a `RESTART` operation on a sequence is transac The clause `CACHE cache` enables sequence numbers to be preallocated and stored in memory for faster access. The minimum value is 1 (only one value can be generated at a time, i.e., no cache). If unspecified, the old cache value will be maintained. -> **Note** When operating with a cache of sequence numbers (`cache > 1`), Cloudberry Database may discard some cached sequence values. If you require consecutive values, you must explicitly set `CACHE 1` when you create or alter the sequence. +> **Note** When operating with a cache of sequence numbers (`cache > 1`), Apache Cloudberry may discard some cached sequence values. If you require consecutive values, you must explicitly set `CACHE 1` when you create or alter the sequence. **`CYCLE`** @@ -123,7 +123,7 @@ ALTER SEQUENCE serial RESTART WITH 105; ## Compatibility -`ALTER SEQUENCE` conforms to the SQL standard, except for the `AS`, `START WITH`, `OWNED BY`, `OWNER TO`, `RENAME TO`, and `SET SCHEMA` clauses, which are Cloudberry Database extensions. +`ALTER SEQUENCE` conforms to the SQL standard, except for the `AS`, `START WITH`, `OWNED BY`, `OWNER TO`, `RENAME TO`, and `SET SCHEMA` clauses, which are Apache Cloudberry extensions. ## See also diff --git a/docs/sql-stmts/alter-server.md b/docs/sql-stmts/alter-server.md index 30eea3a7c6..ef3b652bc2 100644 --- a/docs/sql-stmts/alter-server.md +++ b/docs/sql-stmts/alter-server.md @@ -19,7 +19,7 @@ ALTER SERVER RENAME TO ## Description -`ALTER SERVER` changes the definition of a foreign server. The first form of the command changes the version string or the generic options of the server. Cloudberry Database requires at least one clause. The second and third forms of the command change the owner or the name of the server. +`ALTER SERVER` changes the definition of a foreign server. The first form of the command changes the version string or the generic options of the server. Apache Cloudberry requires at least one clause. The second and third forms of the command change the owner or the name of the server. To alter the server, you must be the owner of the server. To alter the owner you must: @@ -41,7 +41,7 @@ The new server version. **`OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ] )`** -Change the server's options. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique. Cloudberry Database validates names and values using the server's foreign-data wrapper library. +Change the server's options. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique. Apache Cloudberry validates names and values using the server's foreign-data wrapper library. **`new_owner`** @@ -67,7 +67,7 @@ ALTER SERVER foo VERSION '9.1' OPTIONS (SET host 'baz'); ## Compatibility -`ALTER SERVER` conforms to ISO/IEC 9075-9 (SQL/MED). The `OWNER TO` and `RENAME` forms are Cloudberry Database extensions. +`ALTER SERVER` conforms to ISO/IEC 9075-9 (SQL/MED). The `OWNER TO` and `RENAME` forms are Apache Cloudberry extensions. ## See also diff --git a/docs/sql-stmts/alter-table.md b/docs/sql-stmts/alter-table.md index 591095b074..a705f7c68b 100644 --- a/docs/sql-stmts/alter-table.md +++ b/docs/sql-stmts/alter-table.md @@ -204,7 +204,7 @@ and is: ## Description -`ALTER TABLE` changes the definition of an existing table. There are several subforms described below. Note that the lock level required may differ for each subform. An `ACCESS EXCLUSIVE` lock is acquired unless explicitly noted. When multiple subcommands are provided, Cloudberry Database acquires the strictest lock required by any subcommand. +`ALTER TABLE` changes the definition of an existing table. There are several subforms described below. Note that the lock level required may differ for each subform. An `ACCESS EXCLUSIVE` lock is acquired unless explicitly noted. When multiple subcommands are provided, Apache Cloudberry acquires the strictest lock required by any subcommand. **`ADD COLUMN [ IF NOT EXISTS ]`** @@ -212,13 +212,13 @@ Adds a new column to the table, using the same syntax as [CREATE TABLE](/docs/sq **`DROP COLUMN [ IF EXISTS ]`** -Drops a column from a table. Note that if you drop table columns that are being used as the Cloudberry Database distribution key, the distribution policy for the table will be changed to `DISTRIBUTED RANDOMLY`. Indexes and table constraints involving the column are automatically dropped as well. Multivariate statistics referencing the dropped column will also be removed if the removal of the column would cause the statistics to contain data for only a single column. You need to specify `CASCADE` if anything outside of the table depends on the column, such as views. If `IF EXISTS` is specified and the column does not exist, no error is thrown; Cloudberry Database issues a notice instead. +Drops a column from a table. Note that if you drop table columns that are being used as the Apache Cloudberry distribution key, the distribution policy for the table will be changed to `DISTRIBUTED RANDOMLY`. Indexes and table constraints involving the column are automatically dropped as well. Multivariate statistics referencing the dropped column will also be removed if the removal of the column would cause the statistics to contain data for only a single column. You need to specify `CASCADE` if anything outside of the table depends on the column, such as views. If `IF EXISTS` is specified and the column does not exist, no error is thrown; Apache Cloudberry issues a notice instead. **`SET DATA TYPE`** This form changes the data type of a column of a table. Note that you cannot alter column data types that are being used as distribution or partitioning keys. Indexes and simple table constraints involving the column will be automatically converted to use the new column type by reparsing the originally supplied expression. The optional `COLLATE` clause specifies a collation for the new column; if omitted, the collation is the default for the new column type. The optional `USING` clause specifies how to compute the new column value from the old. If omitted, the default conversion is the same as an assignment cast from old data type to new. A `USING` clause must be provided if there is no implicit or assignment cast from old to new type. -> **Note** The Cloudberry Query Optimizer (GPORCA) supports collation only when all columns in the query use the same collation. If columns in the query use different collations, then Cloudberry Database uses the Postgres Planner. +> **Note** The Cloudberry Query Optimizer (GPORCA) supports collation only when all columns in the query use the same collation. If columns in the query use different collations, then Apache Cloudberry uses the Postgres Planner. Changing a column data type may or may not require a table rewrite. For information about table rewrites performed by `ALTER TABLE`, see [Notes](#notes). @@ -232,7 +232,7 @@ Sets or removes the default value for a column. Default values apply only in sub Changes whether a column is marked to allow null values or to reject null values. -`SET NOT NULL` may only be applied to a column provided none of the records in the table contain a `NULL` value for the column. This is typically checked during the `ALTER TABLE` by scanning the entire table; however, if a valid `CHECK` constraint is found which proves no `NULL` can exist, then Cloudberry Database skips the table scan. +`SET NOT NULL` may only be applied to a column provided none of the records in the table contain a `NULL` value for the column. This is typically checked during the `ALTER TABLE` by scanning the entire table; however, if a valid `CHECK` constraint is found which proves no `NULL` can exist, then Apache Cloudberry skips the table scan. If this table is a partition, you cannot `DROP NOT NULL` on a column if it is marked `NOT NULL` in the parent table. To drop the `NOT NULL` constraint from all the partitions, perform `DROP NOT NULL` on the parent table. Even if there is no `NOT NULL` constraint on the parent, such a constraint can still be added to individual partitions, if desired; that is, the children can disallow nulls even if the parent allows them, but not the other way around. @@ -242,7 +242,7 @@ If this table is a partition, you cannot `DROP NOT NULL` on a column if it is ma These forms change whether a column is an identity column or change the generation attribute of an existing identity column. See [CREATE TABLE](/docs/sql-stmts/create-table.md) for details. -If `DROP IDENTITY IF EXISTS` is specified and the column is not an identity column, no error is thrown. In this case Cloudberry Database issues a notice instead. +If `DROP IDENTITY IF EXISTS` is specified and the column is not an identity column, no error is thrown. In this case Apache Cloudberry issues a notice instead. **`SET sequence_option`**
**`RESTART`** @@ -276,7 +276,7 @@ This form sets column encoding options for append-optimized, column-oriented tab Adds a new constraint to a table using the same syntax as [CREATE TABLE](/docs/sql-stmts/create-table.md). The `NOT VALID` option is currently allowed only for foreign key and `CHECK` constraints. -Normally, this form causes a scan of the table to verify that all existing rows in the table satisfy the new constraint. If the constraint is marked `NOT VALID`, Cloudberry Database skips the potentially-lengthy initial check to verify that all rows in the table satisfy the constraint. The constraint will still be enforced against subsequent inserts or updates (that is, they'll fail unless there is a matching row in the referenced table, in the case of foreign keys; and they'll fail unless the new row matches the specified check constraints). But the database will not assume that the constraint holds for all rows in the table, until it is validated by using the `VALIDATE CONSTRAINT` option. See the [Notes](#notes) for more information about using the `NOT VALID` option. +Normally, this form causes a scan of the table to verify that all existing rows in the table satisfy the new constraint. If the constraint is marked `NOT VALID`, Apache Cloudberry skips the potentially-lengthy initial check to verify that all rows in the table satisfy the constraint. The constraint will still be enforced against subsequent inserts or updates (that is, they'll fail unless there is a matching row in the referenced table, in the case of foreign keys; and they'll fail unless the new row matches the specified check constraints). But the database will not assume that the constraint holds for all rows in the table, until it is validated by using the `VALIDATE CONSTRAINT` option. See the [Notes](#notes) for more information about using the `NOT VALID` option. Most forms of `ADD ` require an `ACCESS EXCLUSIVE` lock. @@ -290,7 +290,7 @@ The index cannot have expression columns nor be a partial index. Also, it must b If `PRIMARY KEY` is specified, and the index's columns are not already marked `NOT NULL`, then this command attempts to `ALTER COLUMN SET NOT NULL` against each such column. That requires a full table scan to verify the column(s) contain no nulls. In all other cases, this is a fast operation. -If a constraint name is provided then Cloudberry Database renames the index to match the constraint name. Otherwise the constraint will be named the same as the index. +If a constraint name is provided then Apache Cloudberry renames the index to match the constraint name. Otherwise the constraint will be named the same as the index. After this command is executed, the index is "owned" by the constraint, in the same way as if the index had been built by a regular `ADD PRIMARY KEY` or `ADD UNIQUE` command. In particular, dropping the constraint will make the index disappear too. @@ -298,7 +298,7 @@ This form is not currently supported on partitioned tables. **`ALTER CONSTRAINT`** -This form alters the attributes of a constraint that was previously created. Currently only foreign key constraints may be altered, which Cloudberry Database will accept, but not enforce. +This form alters the attributes of a constraint that was previously created. Currently only foreign key constraints may be altered, which Apache Cloudberry will accept, but not enforce. **`VALIDATE CONSTRAINT`** @@ -308,12 +308,12 @@ This command acquires a `SHARE UPDATE EXCLUSIVE` lock. **`DROP CONSTRAINT [IF EXISTS]`** -Drops the specified constraint on a table, along with any index underlying the constraint. If `IF EXISTS` is specified and the constraint does not exist, no error is thrown. Cloudberry Database issues a notice in this case instead. +Drops the specified constraint on a table, along with any index underlying the constraint. If `IF EXISTS` is specified and the constraint does not exist, no error is thrown. Apache Cloudberry issues a notice in this case instead. **`DISABLE ROW LEVEL SECURITY`**
**`ENABLE ROW LEVEL SECURITY`** -These forms control the application of row security policies belonging to the table. If enabled and no policies exist for the table, then Cloudberry Database applies a default-deny policy. Note that policies can exist for a table even if row level security is disabled - in this case, the policies will NOT be applied and the policies will be ignored. See also [CREATE POLICY](/docs/sql-stmts/create-policy.md). +These forms control the application of row security policies belonging to the table. If enabled and no policies exist for the table, then Apache Cloudberry applies a default-deny policy. Note that policies can exist for a table even if row level security is disabled - in this case, the policies will NOT be applied and the policies will be ignored. See also [CREATE POLICY](/docs/sql-stmts/create-policy.md). **`NO FORCE ROW LEVEL SECURITY`**
**`FORCE ROW LEVEL SECURITY`** @@ -354,7 +354,7 @@ This form changes the table from unlogged to logged or vice-versa. It cannot be This form changes one or more table-level options. See [Storage Parameters](/docs/sql-stmts/create-table.md#storage-parameters) in the `CREATE TABLE` reference for details on the available parameters. Note that for heap tables, the table contents will not be modified immediately by this command; depending on the parameter, you may need to rewrite the table to get the desired effects. That can be done with [VACUUM FULL](/docs/sql-stmts/vacuum.md), [CLUSTER](/docs/sql-stmts/cluster.md) or one of the forms of `ALTER TABLE` that forces a table rewrite, see [Notes](#notes). For append-optimized column-oriented tables, changing a storage parameter always results in a table rewrite. For planner-related parameters, changes take effect from the next time the table is locked, so currently executing queries are not affected. -Cloudberry Database takes a `SHARE UPDATE EXCLUSIVE` lock when setting `fillfactor`, toast and autovacuum storage parameters, and the planner parameter `parallel_workers`. +Apache Cloudberry takes a `SHARE UPDATE EXCLUSIVE` lock when setting `fillfactor`, toast and autovacuum storage parameters, and the planner parameter `parallel_workers`. **`RESET ( storage_parameter [, ... ] )`** @@ -402,7 +402,7 @@ Moves the table into another schema. Associated indexes, constraints, and sequen Changes the distribution policy of a table. Changing a hash distribution policy, or changing to or from a replicated policy, will cause the table data to be physically redistributed on disk, which can be resource intensive. If you declare the same hash distribution policy or change from hash to random distribution, data will not be redistributed unless you declare `SET WITH (reorganize=true)`. -While Cloudberry Database permits changing the distribution policy of a writable external table, the operation never results in physical redistribution of the external data. +While Apache Cloudberry permits changing the distribution policy of a writable external table, the operation never results in physical redistribution of the external data. **`ATTACH PARTITION partition_name { FOR VALUES partition_bound_spec | DEFAULT }`** @@ -410,7 +410,7 @@ This form of the *modern partitioning syntax* attaches an existing table (which A partition using `FOR VALUES` uses the same syntax for partition_bound_spec> as [CREATE TABLE](/docs/sql-stmts/create-table.md). The partition bound specification must correspond to the partitioning strategy and partition key of the target table. The table to be attached must have all the same columns as the target table and no more; moreover, the column types must also match. Also, it must have all of the `NOT NULL` and `CHECK` constraints of the target table. Currently `FOREIGN KEY` constraints are not considered. `UNIQUE` and `PRIMARY KEY` constraints from the parent table will be created in the partition, if they don't already exist. If any of the `CHECK` constraints of the table being attached are marked `NO INHERIT`, the command will fail; such constraints must be recreated without the `NO INHERIT` clause. -If the new partition is a regular table, Cloudberry Database performs a full table scan to check that existing rows in the table do not violate the partition constraint. It is possible to avoid this scan by adding a valid `CHECK` constraint to the table that allows only rows satisfying the desired partition constraint before running this command. The `CHECK` constraint will be used to determine that the table need not be scanned to validate the partition constraint. This does not work, however, if any of the partition keys is an expression and the partition does not accept `NULL` values. If attaching a list partition that will not accept `NULL` values, also add a `NOT NULL` constraint to the partition key column, unless it's an expression. +If the new partition is a regular table, Apache Cloudberry performs a full table scan to check that existing rows in the table do not violate the partition constraint. It is possible to avoid this scan by adding a valid `CHECK` constraint to the table that allows only rows satisfying the desired partition constraint before running this command. The `CHECK` constraint will be used to determine that the table need not be scanned to validate the partition constraint. This does not work, however, if any of the partition keys is an expression and the partition does not accept `NULL` values. If attaching a list partition that will not accept `NULL` values, also add a `NOT NULL` constraint to the partition key column, unless it's an expression. If the new partition is a foreign table, nothing is done to verify that all of the rows in the foreign table obey the partition constraint. (See the discussion in [CREATE FOREIGN TABLE](/docs/sql-stmts/create-foreign-table.md) about constraints on the foreign table.) @@ -440,7 +440,7 @@ You must own the table to use `ALTER TABLE`. To change the schema or tablespace **`IF EXISTS`** -Do not throw an error if the table does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the table does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -450,7 +450,7 @@ The name (possibly schema-qualified) of an existing table to alter. If `ONLY` is **`column_name`** -Name of a new or existing column. Note that Cloudberry Database distribution key columns must be treated with special care. Altering or dropping these columns can change the distribution policy for the table. +Name of a new or existing column. Note that Apache Cloudberry distribution key columns must be treated with special care. Altering or dropping these columns can change the distribution policy for the table. **`new_column_name`** @@ -466,7 +466,7 @@ Data type of the new column, or new data type for an existing column. If changin **`table_constraint`** -New table constraint for the table. Note that foreign key constraints are currently not supported in Cloudberry Database. Also a table is only allowed one unique constraint and the uniqueness must be within the Cloudberry Database distribution key. +New table constraint for the table. Note that foreign key constraints are currently not supported in Apache Cloudberry. Also a table is only allowed one unique constraint and the uniqueness must be within the Apache Cloudberry distribution key. **`constraint_name`** @@ -476,7 +476,7 @@ Name of an existing constraint to drop. The `ENCODING` clause is valid only for append-optimized, column-oriented tables. -When you add a column to an append-optimized, column-oriented table, Cloudberry Database sets each data compression parameter for the column (`compresstype`, `compresslevel`, and `blocksize`) based on the following setting, in order of preference. +When you add a column to an append-optimized, column-oriented table, Apache Cloudberry sets each data compression parameter for the column (`compresstype`, `compresslevel`, and `blocksize`) based on the following setting, in order of preference. 1. The compression parameter setting specified in the `ALTER TABLE` command `ENCODING` clause. 2. The table's data compression setting specified in the `WITH` clause when the table was created. @@ -531,7 +531,7 @@ The partition bound specification for a new partition. Refer to [CREATE TABLE](/ **`access_method`** -The method to use for accessing the table. Refer to Choosing the Storage Model for more information on the table storage models and access methods available in Cloudberry Database. Set to `heap` to access the table as a heap-storage table, `ao_row` to access the table as an append-optimized table with row-oriented storage (AO), or `ao_column` to access the table as an append-optimized table with column-oriented storage (AO/CO). +The method to use for accessing the table. Refer to Choosing the Storage Model for more information on the table storage models and access methods available in Apache Cloudberry. Set to `heap` to access the table as a heap-storage table, `ao_row` to access the table as an append-optimized table with row-oriented storage (AO), or `ao_column` to access the table as an append-optimized table with column-oriented storage (AO/CO). > **Note:** > @@ -644,7 +644,7 @@ Adding a `CHECK` or `NOT NULL` constraint requires scanning the table to verify Similarly, when attaching a new partition it may be scanned to verify that existing rows meet the partition constraint. -Cloudberry Database provides the option to specify multiple changes in a single `ALTER TABLE` so that multiple table scans or rewrites can be combined into a single pass over the table. +Apache Cloudberry provides the option to specify multiple changes in a single `ALTER TABLE` so that multiple table scans or rewrites can be combined into a single pass over the table. Scanning a large table to verify a new check constraint can take a long time, and other updates to the table are locked out until the `ALTER TABLE ADD CONSTRAINT` command is committed. The main purpose of the `NOT VALID` constraint option is to reduce the impact of adding a constraint on concurrent updates. With `NOT VALID`, the `ADD CONSTRAINT` command does not scan the table and can be committed immediately. After that, a `VALIDATE CONSTRAINT` command can be issued to verify that existing rows satisfy the constraint. The validation step does not need to lock out concurrent updates, since it knows that other transactions will be enforcing the constraint for rows that they insert or update; only pre-existing rows need to be checked. Hence, validation acquires only a `SHARE UPDATE EXCLUSIVE` lock on the table being altered. In addition to improving concurrency, it can be useful to use `NOT VALID` and `VALIDATE CONSTRAINT` in cases where the table is known to contain pre-existing violations. Once the constraint is in place, no new violations can be inserted, and you can correct the existing problems until `VALIDATE CONSTRAINT` finally succeeds. @@ -662,7 +662,7 @@ This table lists the `ALTER TABLE` operations that require a table rewrite when > **Important** The forms of `ALTER TABLE` that perform a table rewrite are not MVCC-safe. After a table rewrite, the table will appear empty to concurrent transactions if they are using a snapshot taken before the rewrite occurred. See [MVCC Caveats](https://www.postgresql.org/docs/12/mvcc-caveats.html) for more details. -Take special care when altering or dropping columns that are part of the Cloudberry Database distribution key as this can change the distribution policy for the table. +Take special care when altering or dropping columns that are part of the Apache Cloudberry distribution key as this can change the distribution policy for the table. The `USING` option of `SET DATA TYPE` can actually specify any expression involving the old values of the row; that is, it can refer to other columns as well as the one being converted. This allows very general conversions to be done with the `SET DATA TYPE` syntax. Because of this flexibility, the `USING` expression is not applied to the column's default value (if any); the result might not be a constant expression as required for a default. This means that when there is no implicit or assignment cast from old to new type, `SET DATA TYPE` might fail to convert the default even though a `USING` clause is supplied. In such cases, drop the default with `DROP DEFAULT`, perform the `ALTER TYPE`, and then use `SET DEFAULT` to add a suitable new default. Similar considerations apply to indexes and constraints involving the column. @@ -672,9 +672,9 @@ A recursive `DROP COLUMN` operation will remove a descendant table's column only The actions for identity columns (`ADD GENERATED`, `SET` etc., `DROP IDENTITY`), as well as the actions `CLUSTER`, `OWNER`, and `TABLESPACE` never recurse to descendant tables; that is, they always act as though `ONLY` were specified. Adding a constraint recurses only for `CHECK` constraints that are not marked `NO INHERIT`. -Cloudberry Database does not currently support foreign key constraints. For a unique constraint to be enforced in Cloudberry Database, the table must be hash-distributed (not `DISTRIBUTED RANDOMLY`), and all of the distribution key columns must be the same as the initial columns of the unique constraint columns. +Apache Cloudberry does not currently support foreign key constraints. For a unique constraint to be enforced in Apache Cloudberry, the table must be hash-distributed (not `DISTRIBUTED RANDOMLY`), and all of the distribution key columns must be the same as the initial columns of the unique constraint columns. -Cloudberry Database does not permit changing any part of a system catalog table. +Apache Cloudberry does not permit changing any part of a system catalog table. Refer to [CREATE TABLE](/docs/sql-stmts/create-table.md) for a further description of valid parameters. @@ -992,7 +992,7 @@ In the previous command, the two `ALTER PARTITION` clauses identify which `regio ## Compatibility -The forms `ADD` (without `USING INDEX`), `DROP [COLUMN]`, `DROP IDENTITY`, `RESTART`, `SET DEFAULT`, `SET DATA TYPE` (without `USING`), `SET GENERATED`, and `SET ` conform with the SQL standard. The other forms are Cloudberry Database extensions of the SQL standard. Also, the ability to specify more than one manipulation in a single `ALTER TABLE` command is an extension. +The forms `ADD` (without `USING INDEX`), `DROP [COLUMN]`, `DROP IDENTITY`, `RESTART`, `SET DEFAULT`, `SET DATA TYPE` (without `USING`), `SET GENERATED`, and `SET ` conform with the SQL standard. The other forms are Apache Cloudberry extensions of the SQL standard. Also, the ability to specify more than one manipulation in a single `ALTER TABLE` command is an extension. `ALTER TABLE DROP COLUMN` can be used to drop the only column of a table, leaving a zero-column table. This is an extension of SQL, which disallows zero-column tables. diff --git a/docs/sql-stmts/alter-trigger.md b/docs/sql-stmts/alter-trigger.md index 98f166f92b..6f6be6fffe 100644 --- a/docs/sql-stmts/alter-trigger.md +++ b/docs/sql-stmts/alter-trigger.md @@ -34,7 +34,7 @@ The new name for the trigger. The ability to temporarily activate or deactivate a trigger is provided by [ALTER TABLE](/docs/sql-stmts/alter-table.md), not by `ALTER TRIGGER`, because `ALTER TRIGGER` has no convenient way to express the option of activating or deactivating all of a table's triggers at once. -Note that Cloudberry Database has limited support of triggers in this release. See [CREATE TRIGGER](/docs/sql-stmts/create-trigger.md) for more information. +Note that Apache Cloudberry has limited support of triggers in this release. See [CREATE TRIGGER](/docs/sql-stmts/create-trigger.md) for more information. ## Examples @@ -46,7 +46,7 @@ ALTER TRIGGER emp_stamp ON emp RENAME TO emp_track_chgs; ## Compatibility -`ALTER TRIGGER` is a Cloudberry Database extension of the SQL standard. +`ALTER TRIGGER` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/alter-type.md b/docs/sql-stmts/alter-type.md index 6073022115..40a9e334ae 100644 --- a/docs/sql-stmts/alter-type.md +++ b/docs/sql-stmts/alter-type.md @@ -41,7 +41,7 @@ Adds a new attribute to a composite type, using the same syntax as [CREATE TYPE] **`DROP ATTRIBUTE [ IF EXISTS ]`** -Drops an attribute from a composite type. If `IF EXISTS` is specified and the attribute does not exist, no error is thrown. In this case Cloudberry Database issues a notice instead. +Drops an attribute from a composite type. If `IF EXISTS` is specified and the attribute does not exist, no error is thrown. In this case Apache Cloudberry issues a notice instead. **`SET DATA TYPE`** @@ -63,7 +63,7 @@ Moves the type into another schema. Adds a new value to an enum type. The new value's place in the enum's ordering can be specified as being `BEFORE` or `AFTER` one of the existing values. Otherwise, the new item is added at the end of the list of values. -If `IF NOT EXISTS` is specified, it is not an error if the type already contains the new value; Cloudberry Database issues a notice but takes no other action. Otherwise, an error will occur if the new value is already present. +If `IF NOT EXISTS` is specified, it is not an error if the type already contains the new value; Apache Cloudberry issues a notice but takes no other action. Otherwise, an error will occur if the new value is already present. **`RENAME VALUE`** @@ -73,7 +73,7 @@ The `ADD ATTRIBUTE`, `DROP ATTRIBUTE`, and `ALTER ATTRIBUTE` actions can be comb You can change the name, the owner, and the schema of a type. You can also add or update storage options for a scalar type. -> **Note** Cloudberry Database does not support adding storage options for row or composite types. +> **Note** Apache Cloudberry does not support adding storage options for row or composite types. You must own the type to use `ALTER TYPE`. To change the schema of a type, you must also have `CREATE` privilege on the new schema. To alter the owner, you must also be a direct or indirect member of the new owning role, and that role must have `CREATE` privilege on the type's schema. (These restrictions enforce that altering the owner does not do anything that could be done by dropping and recreating the type. However, a superuser can alter ownership of any type.) To add an attribute or alter an attribute type, you must also have `USAGE` privilege on the data type. @@ -191,7 +191,7 @@ ALTER TYPE colors RENAME VALUE 'purple' TO 'mauve'; ## Compatibility -The variants to add and drop attributes are part of the SQL standard; the other variants are Cloudberry Database extensions. +The variants to add and drop attributes are part of the SQL standard; the other variants are Apache Cloudberry extensions. ## See also diff --git a/docs/sql-stmts/alter-user-mapping.md b/docs/sql-stmts/alter-user-mapping.md index 7b1af29d57..e76192debb 100644 --- a/docs/sql-stmts/alter-user-mapping.md +++ b/docs/sql-stmts/alter-user-mapping.md @@ -32,7 +32,7 @@ Server name of the user mapping. **`OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ] )`** -Change options for the user mapping. The new options override any previously specified options. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique. Cloudberry Database validates names and values using the server's foreign-data wrapper. +Change options for the user mapping. The new options override any previously specified options. `ADD`, `SET`, and `DROP` specify the action to perform. If no operation is explicitly specified, the default operation is `ADD`. Option names must be unique. Apache Cloudberry validates names and values using the server's foreign-data wrapper. ## Examples @@ -44,7 +44,7 @@ ALTER USER MAPPING FOR bob SERVER foo OPTIONS (SET password 'public'); ## Compatibility -`ALTER USER MAPPING` conforms to ISO/IEC 9075-9 (SQL/MED). There is a subtle syntax issue: The standard omits the `FOR` key word. Since both `CREATE USER MAPPING` and `DROP USER MAPPING` use `FOR` in analogous positions, Cloudberry Database diverges from the standard here in the interest of consistency and interoperability. +`ALTER USER MAPPING` conforms to ISO/IEC 9075-9 (SQL/MED). There is a subtle syntax issue: The standard omits the `FOR` key word. Since both `CREATE USER MAPPING` and `DROP USER MAPPING` use `FOR` in analogous positions, Apache Cloudberry diverges from the standard here in the interest of consistency and interoperability. ## See also diff --git a/docs/sql-stmts/alter-user.md b/docs/sql-stmts/alter-user.md index ca5bf2237f..ce7604defb 100644 --- a/docs/sql-stmts/alter-user.md +++ b/docs/sql-stmts/alter-user.md @@ -56,7 +56,7 @@ ALTER USER { | ALL } [ IN DATABASE ] RESET ## Compatibility -The `ALTER USER` statement is a Cloudberry Database extension. The SQL standard leaves the definition of users to the implementation. +The `ALTER USER` statement is a Apache Cloudberry extension. The SQL standard leaves the definition of users to the implementation. ## See also diff --git a/docs/sql-stmts/alter-view.md b/docs/sql-stmts/alter-view.md index fe86002ce1..10fb28ca67 100644 --- a/docs/sql-stmts/alter-view.md +++ b/docs/sql-stmts/alter-view.md @@ -38,7 +38,7 @@ The name (optionally schema-qualified) of an existing view. **`IF EXISTS`** -Do not throw an error if the view does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the view does not exist. Apache Cloudberry issues a notice in this case. **`SET/DROP DEFAULT`** @@ -93,7 +93,7 @@ INSERT INTO a_view(id) VALUES(2); -- ts will receive the current time ## Compatibility -`ALTER VIEW` is a Cloudberry Database extension of the SQL standard. +`ALTER VIEW` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/analyze.md b/docs/sql-stmts/analyze.md index 17277fe6d7..72ff4f3511 100644 --- a/docs/sql-stmts/analyze.md +++ b/docs/sql-stmts/analyze.md @@ -18,7 +18,7 @@ ANALYZE [VERBOSE] [SKIP_LOCKED] ROOTPARTITION {ALL | ## Description -`ANALYZE` collects statistics about the contents of tables in the database, and stores the results in the system table *pg_statistic*. Subsequently, Cloudberry Database uses these statistics to help determine the most efficient execution plans for queries. For information about the table statistics that are collected, see [Notes](#notes). +`ANALYZE` collects statistics about the contents of tables in the database, and stores the results in the system table *pg_statistic*. Subsequently, Apache Cloudberry uses these statistics to help determine the most efficient execution plans for queries. For information about the table statistics that are collected, see [Notes](#notes). With no parameter, `ANALYZE` collects statistics for every table in the current database. You can specify a table name to collect statistics for a single table. You can specify a set of column names in a specific table, in which case the statistics only for those columns from that table are collected. @@ -29,9 +29,9 @@ For partitioned tables, `ANALYZE` collects additional statistics, HyperLogLog (H - When aggregating NDV estimates across multiple leaf partitions, HLL statistics generate a more accurate NDV estimates than the standard table statistics. - When updating HLL statistics, `ANALYZE` operations are required only on leaf partitions that have changed. For example, `ANALYZE` is required if the leaf partition data has changed, or if the leaf partition has been exchanged with another table. For more information about updating partitioned table statistics, see [Notes](#notes). -> **Important** If you intend to run queries on partitioned tables with GPORCA enabled (the default), then you must collect statistics on the root partition of the partitioned table with the `ANALYZE` or `ANALYZE ROOTPARTITION` command. For information about collecting statistics on partitioned tables and when the `ROOTPARTITION` keyword is required, see [Notes](#notes). For information about GPORCA, see Overview of GPORCA in the *Cloudberry Database Administrator Guide*. +> **Important** If you intend to run queries on partitioned tables with GPORCA enabled (the default), then you must collect statistics on the root partition of the partitioned table with the `ANALYZE` or `ANALYZE ROOTPARTITION` command. For information about collecting statistics on partitioned tables and when the `ROOTPARTITION` keyword is required, see [Notes](#notes). For information about GPORCA, see Overview of GPORCA in the *Apache Cloudberry Administrator Guide*. -> **Note** You can also use the Cloudberry Database utility `analyzedb` to update table statistics. The `analyzedb` utility can update statistics for multiple tables concurrently. The utility can also check table statistics and update statistics only if the statistics are not current or do not exist. For information about the utility, see the *Cloudberry Database Utility Guide*. +> **Note** You can also use the Apache Cloudberry utility `analyzedb` to update table statistics. The `analyzedb` utility can update statistics for multiple tables concurrently. The utility can also check table statistics and update statistics only if the statistics are not current or do not exist. For information about the utility, see the *Apache Cloudberry Utility Guide*. ## Parameters @@ -53,7 +53,7 @@ For information about when the `ROOTPARTITION` keyword is required, see [Notes]( When you specify `ROOTPARTITION`, you must specify either `ALL` or the name of a partitioned table. -If you specify `ALL` with `ROOTPARTITION`, Cloudberry Database collects statistics for the root partition of all partitioned tables in the database. If there are no partitioned tables in the database, a message stating that there are no partitioned tables is returned. For tables that are not partitioned tables, statistics are not collected. +If you specify `ALL` with `ROOTPARTITION`, Apache Cloudberry collects statistics for the root partition of all partitioned tables in the database. If there are no partitioned tables in the database, a message stating that there are no partitioned tables is returned. For tables that are not partitioned tables, statistics are not collected. If you specify a table name with `ROOTPARTITION` and the table is not a partitioned table, no statistics are collected for the table and a warning message is returned. @@ -97,7 +97,7 @@ The name of a specific column to analyze. Defaults to all columns. Foreign tables are analyzed only when explicitly selected. Not all foreign data wrappers support `ANALYZE`. If the table's wrapper does not support `ANALYZE`, the command prints a warning and does nothing. -It is a good idea to run `ANALYZE` periodically, or just after making major changes in the contents of a table. Accurate statistics helps Cloudberry Database choose the most appropriate query plan, and thereby improve the speed of query processing. A common strategy for read-mostly databases is to run [VACUUM](/docs/sql-stmts/vacuum.md) and `ANALYZE` once a day during a low-usage time of day. (This will not be sufficient if there is heavy update activity.) You can check for tables with missing statistics using the `gp_stats_missing` view, which is in the `gp_toolkit` schema: +It is a good idea to run `ANALYZE` periodically, or just after making major changes in the contents of a table. Accurate statistics helps Apache Cloudberry choose the most appropriate query plan, and thereby improve the speed of query processing. A common strategy for read-mostly databases is to run [VACUUM](/docs/sql-stmts/vacuum.md) and `ANALYZE` once a day during a low-usage time of day. (This will not be sufficient if there is heavy update activity.) You can check for tables with missing statistics using the `gp_stats_missing` view, which is in the `gp_toolkit` schema: ```sql SELECT * from gp_toolkit.gp_stats_missing; @@ -109,9 +109,9 @@ If you run `ANALYZE` on a table that does not contain data, statistics are not c For a partitioned table, specifying which portion of the table to analyze, the root partition or sub-partitions (leaf partitions) can be useful if the partitioned table has a large number of partitions that have been analyzed and only a few leaf partitions have changed. -> **Note** When you create a partitioned table with the `CREATE TABLE` command, Cloudberry Database creates the table that you specify (the root partition or parent table), and also creates a hierarchy of tables based on the partition hierarchy that you specified (the child tables). +> **Note** When you create a partitioned table with the `CREATE TABLE` command, Apache Cloudberry creates the table that you specify (the root partition or parent table), and also creates a hierarchy of tables based on the partition hierarchy that you specified (the child tables). -- When you run `ANALYZE` on the root partitioned table, statistics are collected for all the leaf partitions. Leaf partitions are the lowest-level tables in the hierarchy of child tables created by Cloudberry Database for use by the partitioned table. +- When you run `ANALYZE` on the root partitioned table, statistics are collected for all the leaf partitions. Leaf partitions are the lowest-level tables in the hierarchy of child tables created by Apache Cloudberry for use by the partitioned table. - When you run `ANALYZE` on a leaf partition, statistics are collected only for that leaf partition and the root partition. If data in the leaf partition has changed (for example, you made significant updates to the leaf partition data or you exchanged the leaf partition), then you can run ANALYZE on the leaf partition to collect table statistics. By default, if all other leaf partitions have statistics, the command updates the root partition statistics. For example, if you collected statistics on a partitioned table with a large number partitions and then updated data in only a few leaf partitions, you can run `ANALYZE` only on those partitions to update statistics on the partitions and the statistics on the root partition. @@ -126,7 +126,7 @@ For a partitioned table that contains a leaf partition that has been exchanged t - If `ANALYZE` or `ANALYZE ROOTPARTITION` is run on the root partition, external table partitions are not sampled and root table statistics do not include external table partition. - If the `VERBOSE` clause is specified, an informational message is displayed: `skipping external table`. -The Cloudberry Database server configuration parameter optimizer_analyze_root_partition determines when statistics are collected on a root partitioned table. If the parameter is `on` (the default), the `ROOTPARTITION` keyword is not required to collect statistics on the root partition when you run `ANALYZE`. Root partition statistics are collected when you run `ANALYZE` on the root partition, or when you run `ANALYZE` on a leaf partition of the partitioned table and the other leaf partitions have statistics. If the parameter is `off`, you must run `ANALZYE ROOTPARTITION` to collect root partition statistics. +The Apache Cloudberry server configuration parameter optimizer_analyze_root_partition determines when statistics are collected on a root partitioned table. If the parameter is `on` (the default), the `ROOTPARTITION` keyword is not required to collect statistics on the root partition when you run `ANALYZE`. Root partition statistics are collected when you run `ANALYZE` on the root partition, or when you run `ANALYZE` on a leaf partition of the partitioned table and the other leaf partitions have statistics. If the parameter is `off`, you must run `ANALZYE ROOTPARTITION` to collect root partition statistics. The statistics collected by `ANALYZE` usually include a list of some of the most common values in each column and a histogram showing the approximate data distribution in each column. One or both of these may be omitted if `ANALYZE` deems them uninteresting (for example, in a unique-key column, there are no common values) or if the column data type does not support the appropriate operators. @@ -136,7 +136,7 @@ The largest statistics target among the columns being analyzed determines the nu One of the values estimated by `ANALYZE` is the number of distinct values that appear in each column. Because only a subset of the rows are examined, this estimate can sometimes be quite inaccurate, even with the largest possible statistics target. If this inaccuracy leads to bad query plans, a more accurate value can be determined manually and then installed with `ALTER TABLE ... ALTER COLUMN ... SET STATISTICS DISTINCT` (see [ALTER TABLE](/docs/sql-stmts/alter-table.md)). -When Cloudberry Database performs an `ANALYZE` operation to collect statistics for a table and detects that all the sampled table data pages are empty (do not contain valid data), Cloudberry Database displays a message that a `VACUUM FULL` operation should be performed. If the sampled pages are empty, the table statistics will be inaccurate. Pages become empty after a large number of changes to the table, for example deleting a large number of rows. A `VACUUM FULL` operation removes the empty pages and allows an `ANALYZE` operation to collect accurate statistics. +When Apache Cloudberry performs an `ANALYZE` operation to collect statistics for a table and detects that all the sampled table data pages are empty (do not contain valid data), Apache Cloudberry displays a message that a `VACUUM FULL` operation should be performed. If the sampled pages are empty, the table statistics will be inaccurate. Pages become empty after a large number of changes to the table, for example deleting a large number of rows. A `VACUUM FULL` operation removes the empty pages and allows an `ANALYZE` operation to collect accurate statistics. If there are no statistics for the table, the server configuration parameter gp_enable_relsize_collection controls whether the Postgres Planner uses a default statistics file or estimates the size of a table using the `pg_relation_size` function. By default, the Postgres Planner uses the default statistics file to estimate the number of rows if statistics are not available. diff --git a/docs/sql-stmts/begin.md b/docs/sql-stmts/begin.md index e519b7c430..91d10e2535 100644 --- a/docs/sql-stmts/begin.md +++ b/docs/sql-stmts/begin.md @@ -20,7 +20,7 @@ BEGIN [WORK | TRANSACTION] [] ## Description -`BEGIN` initiates a transaction block, that is, all statements after a `BEGIN` command will be run in a single transaction until an explicit [COMMIT](/docs/sql-stmts/commit.md) or [ROLLBACK](/docs/sql-stmts/rollback.md) is given. By default (without `BEGIN`), Cloudberry Database runs transactions in "autocommit" mode, that is, each statement is run in its own transaction and a commit is implicitly performed at the end of the statement (if execution was successful, otherwise a rollback is done). +`BEGIN` initiates a transaction block, that is, all statements after a `BEGIN` command will be run in a single transaction until an explicit [COMMIT](/docs/sql-stmts/commit.md) or [ROLLBACK](/docs/sql-stmts/rollback.md) is given. By default (without `BEGIN`), Apache Cloudberry runs transactions in "autocommit" mode, that is, each statement is run in its own transaction and a commit is implicitly performed at the end of the statement (if execution was successful, otherwise a rollback is done). Statements are run more quickly in a transaction block, because transaction start/commit requires significant CPU and disk activity. Execution of multiple statements inside a transaction is also useful to ensure consistency when making several related changes: other sessions will be unable to see the intermediate states wherein not all the related updates have been done. @@ -55,9 +55,9 @@ BEGIN; ## Compatibility -`BEGIN` is a Cloudberry Database language extension. It is equivalent to the SQL-standard command [START TRANSACTION](/docs/sql-stmts/start-transaction.md), whose reference page contains additional compatibility information. +`BEGIN` is a Apache Cloudberry language extension. It is equivalent to the SQL-standard command [START TRANSACTION](/docs/sql-stmts/start-transaction.md), whose reference page contains additional compatibility information. -The `DEFERRABLE` transaction_mode is a Cloudberry Database language extension. +The `DEFERRABLE` transaction_mode is a Apache Cloudberry language extension. Incidentally, the `BEGIN` key word is used for a different purpose in embedded SQL. You are advised to be careful about the transaction semantics when porting database applications. diff --git a/docs/sql-stmts/checkpoint.md b/docs/sql-stmts/checkpoint.md index b545872855..2809ef1e8a 100644 --- a/docs/sql-stmts/checkpoint.md +++ b/docs/sql-stmts/checkpoint.md @@ -24,4 +24,4 @@ Only superusers may call `CHECKPOINT`. ## Compatibility -The `CHECKPOINT` command is a Cloudberry Database extension. +The `CHECKPOINT` command is a Apache Cloudberry extension. diff --git a/docs/sql-stmts/close.md b/docs/sql-stmts/close.md index 4c01ae92f6..7770ceae30 100644 --- a/docs/sql-stmts/close.md +++ b/docs/sql-stmts/close.md @@ -30,7 +30,7 @@ Close all open cursors. ## Notes -Cloudberry Database does not have an explicit `OPEN` cursor statement. A cursor is considered open when it is declared. Use the [DECLARE](/docs/sql-stmts/declare.md) statement to declare (and open) a cursor. +Apache Cloudberry does not have an explicit `OPEN` cursor statement. A cursor is considered open when it is declared. Use the [DECLARE](/docs/sql-stmts/declare.md) statement to declare (and open) a cursor. You can see all available cursors by querying the `pg_cursors` system view. @@ -46,7 +46,7 @@ CLOSE portala; ## Compatibility -`CLOSE` is fully conforming with the SQL standard. `CLOSE ALL` is a Cloudberry Database extension. +`CLOSE` is fully conforming with the SQL standard. `CLOSE ALL` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/cluster.md b/docs/sql-stmts/cluster.md index 324db03065..b46fe220b6 100644 --- a/docs/sql-stmts/cluster.md +++ b/docs/sql-stmts/cluster.md @@ -18,15 +18,15 @@ CLUSTER [VERBOSE] ## Description -`CLUSTER` instructs Cloudberry Database to order the table specified by `` based on the index specified by ``. The index must already have been defined on ``. +`CLUSTER` instructs Apache Cloudberry to order the table specified by `` based on the index specified by ``. The index must already have been defined on ``. -Cloudberry Database supports `CLUSTER` operations on append-optimized tables only for B-tree indexes. +Apache Cloudberry supports `CLUSTER` operations on append-optimized tables only for B-tree indexes. -> **Note** Cloudberry Database 7 does not support `CLUSTER`ing a partitioned table. +> **Note** Apache Cloudberry does not support `CLUSTER`ing a partitioned table. When a table is clustered, it is physically reordered on disk based on the index information. Clustering is a one-time operation: when the table is subsequently updated, the changes are not clustered. That is, no attempt is made to store new or updated rows according to their index order. If you wish, you can periodically recluster by issuing the command again. Setting the table's `FILLFACTOR` storage parameter to less than 100% can aid in preserving cluster ordering during updates, because updated rows are kept on the same page if enough space is available there. -When a table is clustered using this command, Cloudberry Database remembers on which index it was clustered. The form `CLUSTER ` reclusters the table on the same index that it was clustered before. You can use the `CLUSTER` or `SET WITHOUT CLUSTER` forms of [ALTER TABLE](/docs/sql-stmts/alter-table.md) to set the index to use for future cluster operations, or to clear any previous setting. `CLUSTER` without any parameter reclusters all previously clustered tables in the current database that the calling user owns, or all tables if called by a superuser. This form of `CLUSTER` cannot be run inside a transaction block. +When a table is clustered using this command, Apache Cloudberry remembers on which index it was clustered. The form `CLUSTER ` reclusters the table on the same index that it was clustered before. You can use the `CLUSTER` or `SET WITHOUT CLUSTER` forms of [ALTER TABLE](/docs/sql-stmts/alter-table.md) to set the index to use for future cluster operations, or to clear any previous setting. `CLUSTER` without any parameter reclusters all previously clustered tables in the current database that the calling user owns, or all tables if called by a superuser. This form of `CLUSTER` cannot be run inside a transaction block. When a table is being clustered, an `ACCESS EXCLUSIVE` lock is acquired on it. This prevents any other database operations (both reads and writes) from operating on the table until the `CLUSTER` is finished. diff --git a/docs/sql-stmts/comment.md b/docs/sql-stmts/comment.md index f51bb9c9ee..4535b83b6d 100644 --- a/docs/sql-stmts/comment.md +++ b/docs/sql-stmts/comment.md @@ -65,7 +65,7 @@ COMMENT ON `COMMENT` stores a comment about a database object. Only one comment string is stored for each object, so to modify a comment, issue a new `COMMENT` command for the same object. To remove a comment, specify `NULL` in place of the text string. Comments are automatically dropped when the object is dropped. -Cloudberry Database acquires a `SHARE UPDATE EXCLUSIVE` lock on the object to be commented. +Apache Cloudberry acquires a `SHARE UPDATE EXCLUSIVE` lock on the object to be commented. For most kinds of object, only the object's owner can set the comment. Roles don't have owners, so the rule for `COMMENT ON ROLE` is that you must be superuser to comment on a superuser role, or have the `CREATEROLE` privilege to comment on non-superuser roles. Likewise, access methods don't have owners either; you must be superuser to comment on an access method. Of course, a superuser can comment on anything. @@ -87,7 +87,7 @@ You can view comments using the `psql` meta-commands `\dd`, `\d+`, and `\l+`. Ot The name of the object to be commented. Names of tables, aggregates, collations, conversions, domains, foreign tables, functions, indexes, operators, operator classes, operator families, procedures, routines, sequences, statistics, text search objects, types, views, and materialized views can be schema-qualified. When commenting on a column, relation_name must refer to a table, view, materialized view, composite type, or foreign table. -> **Note** Cloudberry Database does not support triggers. +> **Note** Apache Cloudberry does not support triggers. **`table_name`**
**`domain_name`** @@ -121,7 +121,7 @@ The data type(s) of the operator's arguments (optionally schema-qualified). Spec **`PROCEDURAL`** -Cloudberry Database ignores this noise word. +Apache Cloudberry ignores this noise word. **`type_name`** diff --git a/docs/sql-stmts/commit.md b/docs/sql-stmts/commit.md index 18e89ee12a..807c22a263 100644 --- a/docs/sql-stmts/commit.md +++ b/docs/sql-stmts/commit.md @@ -43,7 +43,7 @@ COMMIT; ## Compatibility -The command `COMMIT` conforms to the SQL standard. The form `COMMIT TRANSACTION` is a Cloudberry Database extension. +The command `COMMIT` conforms to the SQL standard. The form `COMMIT TRANSACTION` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/copy.md b/docs/sql-stmts/copy.md index 14cadea92a..e868af9564 100644 --- a/docs/sql-stmts/copy.md +++ b/docs/sql-stmts/copy.md @@ -40,11 +40,11 @@ IGNORE EXTERNAL PARTITIONS ## Description -`COPY` moves data between Cloudberry Database tables and standard file-system files. `COPY TO` copies the contents of a table to a file (or multiple files based on the segment ID if copying `ON SEGMENT`), while `COPY FROM` copies data from a file to a table (appending the data to whatever is in the table already). `COPY TO` can also copy the results of a `SELECT` query. +`COPY` moves data between Apache Cloudberry tables and standard file-system files. `COPY TO` copies the contents of a table to a file (or multiple files based on the segment ID if copying `ON SEGMENT`), while `COPY FROM` copies data from a file to a table (appending the data to whatever is in the table already). `COPY TO` can also copy the results of a `SELECT` query. If a list of columns is specified, `COPY` will only copy the data in the specified columns to or from the file. If there are any columns in the table that are not in the column list, `COPY FROM` will insert the default values for those columns. -`COPY` with a file name instructs the Cloudberry Database coordinator host to directly read from or write to a file. The file must be accessible to the coordinator host and the name must be specified from the viewpoint of the coordinator host. +`COPY` with a file name instructs the Apache Cloudberry coordinator host to directly read from or write to a file. The file must be accessible to the coordinator host and the name must be specified from the viewpoint of the coordinator host. When `COPY` is used with the `ON SEGMENT` clause, the `COPY TO` causes segments to create individual segment-oriented files, which remain on the segment hosts. The filename argument for `ON SEGMENT` takes the string literal `` (required) and uses either the absolute path or the `` string literal. When the `COPY` operation is run, the segment IDs and the paths of the segment data directories are substituted for the string literal values. @@ -58,7 +58,7 @@ When `PROGRAM` is specified, the server runs the given command and reads from th When `STDIN` or `STDOUT` is specified, data is transmitted via the connection between the client and the coordinator. `STDIN` and `STDOUT` cannot be used with the `ON SEGMENT` clause. -If `SEGMENT REJECT LIMIT` is used, then a `COPY FROM` operation will operate in single row error isolation mode. In this release, single row error isolation mode only applies to rows in the input file with format errors — for example, extra or missing attributes, attributes of a wrong data type, or invalid client encoding sequences. Constraint errors such as violation of a `NOT NULL`, `CHECK`, or `UNIQUE` constraint will still be handled in 'all-or-nothing' input mode. The user can specify the number of error rows acceptable (on a per-segment basis), after which the entire `COPY FROM` operation will be cancelled and no rows will be loaded. The count of error rows is per-segment, not per entire load operation. If the per-segment reject limit is not reached, then all rows not containing an error will be loaded and any error rows discarded. To keep error rows for further examination, specify the `LOG ERRORS` clause to capture error log information. The error information and the row is stored internally in Cloudberry Database. +If `SEGMENT REJECT LIMIT` is used, then a `COPY FROM` operation will operate in single row error isolation mode. In this release, single row error isolation mode only applies to rows in the input file with format errors — for example, extra or missing attributes, attributes of a wrong data type, or invalid client encoding sequences. Constraint errors such as violation of a `NOT NULL`, `CHECK`, or `UNIQUE` constraint will still be handled in 'all-or-nothing' input mode. The user can specify the number of error rows acceptable (on a per-segment basis), after which the entire `COPY FROM` operation will be cancelled and no rows will be loaded. The count of error rows is per-segment, not per entire load operation. If the per-segment reject limit is not reached, then all rows not containing an error will be loaded and any error rows discarded. To keep error rows for further examination, specify the `LOG ERRORS` clause to capture error log information. The error information and the row is stored internally in Apache Cloudberry. **Outputs** @@ -96,11 +96,11 @@ The path name of the input or output file. An input file name can be an absolute **`PROGRAM 'command'`** -Specify a command to run. In `COPY FROM`, the input is read from standard output of the command, and in `COPY TO`, the output is written to the standard input of the command. The command must be specified from the viewpoint of the Cloudberry Database coordinator host system, and must be executable by the Cloudberry Database administrator user (`gpadmin`). +Specify a command to run. In `COPY FROM`, the input is read from standard output of the command, and in `COPY TO`, the output is written to the standard input of the command. The command must be specified from the viewpoint of the Apache Cloudberry coordinator host system, and must be executable by the Apache Cloudberry administrator user (`gpadmin`). The command is invoked by a shell. When passing arguments to the shell, strip or escape any special characters that have a special meaning for the shell. For security reasons, it is best to use a fixed command string, or at least avoid passing any user input in the string. -When `ON SEGMENT` is specified, the command must be executable on all Cloudberry Database primary segment hosts by the Cloudberry Database administrator user (`gpadmin`). The command is run by each Cloudberry segment instance. The `` is required in the command. +When `ON SEGMENT` is specified, the command must be executable on all Apache Cloudberry primary segment hosts by the Apache Cloudberry administrator user (`gpadmin`). The command is run by each Cloudberry segment instance. The `` is required in the command. See the `ON SEGMENT` clause for information about command syntax requirements and the data that is copied when the clause is specified. @@ -200,7 +200,7 @@ For a `COPY FROM...ON SEGMENT` command, the table distribution policy is checked **`NEWLINE`** -Specifies the newline used in your data files — `LF` (Line feed, 0x0A), `CR` (Carriage return, 0x0D), or `CRLF` (Carriage return plus line feed, 0x0D 0x0A). If not specified, a Cloudberry Database segment will detect the newline type by looking at the first row of data it receives and using the first newline type encountered. +Specifies the newline used in your data files — `LF` (Line feed, 0x0A), `CR` (Carriage return, 0x0D), or `CRLF` (Carriage return plus line feed, 0x0D 0x0A). If not specified, a Apache Cloudberry segment will detect the newline type by looking at the first row of data it receives and using the first newline type encountered. **`CSV`** @@ -214,23 +214,23 @@ In `COPY FROM` more for both `TEXT` and `CSV`, specifying `FILL MISSING FIELDS` This is an optional clause that can precede a `SEGMENT REJECT LIMIT` clause to capture error log information about rows with formatting errors. -Error log information is stored internally and is accessed with the Cloudberry Database built-in SQL function `gp_read_error_log()`. +Error log information is stored internally and is accessed with the Apache Cloudberry built-in SQL function `gp_read_error_log()`. See [Notes](#notes) for information about the error log information and built-in functions for viewing and managing error log information. **`SEGMENT REJECT LIMIT count [ROWS | PERCENT]`** -Runs a `COPY FROM` operation in single row error isolation mode. If the input rows have format errors they will be discarded provided that the reject limit count is not reached on any Cloudberry Database segment instance during the load operation. The reject limit count can be specified as number of rows (the default) or percentage of total rows (1-100). If `PERCENT` is used, each segment starts calculating the bad row percentage only after the number of rows specified by the parameter `gp_reject_percent_threshold` has been processed. The default for `gp_reject_percent_threshold` is 300 rows. Constraint errors such as violation of a `NOT NULL`, `CHECK`, or `UNIQUE` constraint will still be handled in 'all-or-nothing' input mode. If the limit is not reached, all good rows will be loaded and any error rows discarded. +Runs a `COPY FROM` operation in single row error isolation mode. If the input rows have format errors they will be discarded provided that the reject limit count is not reached on any Apache Cloudberry segment instance during the load operation. The reject limit count can be specified as number of rows (the default) or percentage of total rows (1-100). If `PERCENT` is used, each segment starts calculating the bad row percentage only after the number of rows specified by the parameter `gp_reject_percent_threshold` has been processed. The default for `gp_reject_percent_threshold` is 300 rows. Constraint errors such as violation of a `NOT NULL`, `CHECK`, or `UNIQUE` constraint will still be handled in 'all-or-nothing' input mode. If the limit is not reached, all good rows will be loaded and any error rows discarded. -> **Note** Cloudberry Database limits the initial number of rows that can contain formatting errors if the `SEGMENT REJECT LIMIT` is not triggered first or is not specified. If the first 1000 rows are rejected, the `COPY` operation is stopped and rolled back. +> **Note** Apache Cloudberry limits the initial number of rows that can contain formatting errors if the `SEGMENT REJECT LIMIT` is not triggered first or is not specified. If the first 1000 rows are rejected, the `COPY` operation is stopped and rolled back. -The limit for the number of initial rejected rows can be changed with the Cloudberry Database server configuration parameter `gp_initial_bad_row_limit`. See Server Configuration Parameters for information about the parameter. +The limit for the number of initial rejected rows can be changed with the Apache Cloudberry server configuration parameter `gp_initial_bad_row_limit`. See Server Configuration Parameters for information about the parameter. **`IGNORE EXTERNAL PARTITIONS`** When copying data from partitioned tables, data are not copied from leaf partitions that are external tables. A message is added to the log file when data are not copied. -If this clause is not specified and Cloudberry Database attempts to copy data from a leaf partition that is an external table, an error is returned. +If this clause is not specified and Apache Cloudberry attempts to copy data from a leaf partition that is an external table, an error is returned. See the next section "Notes" for information about specifying an SQL query to copy data from leaf partitions that are external tables. @@ -246,15 +246,15 @@ Similarly, to copy data from a partitioned table with a leaf partition that is a COPY (SELECT * from my_sales ) TO stdout ``` -The `BINARY` keyword causes all data to be stored/read as binary format rather than as text. It is somewhat faster than the normal text mode, but a binary-format file is less portable across machine architectures and Cloudberry Database versions. Also, you cannot run `COPY FROM` in single row error isolation mode if the data is in binary format. +The `BINARY` keyword causes all data to be stored/read as binary format rather than as text. It is somewhat faster than the normal text mode, but a binary-format file is less portable across machine architectures and Apache Cloudberry versions. Also, you cannot run `COPY FROM` in single row error isolation mode if the data is in binary format. You must have `SELECT` privilege on the table whose values are read by `COPY TO`, and `INSERT` privilege on the table into which values are inserted by `COPY FROM`. It is sufficient to have column privileges on the columns listed in the command. -Files named in a `COPY` command are read or written directly by the database server, not by the client application. Therefore, they must reside on or be accessible to the Cloudberry Database coordinator host machine, not the client. They must be accessible to and readable or writable by the Cloudberry Database system user (the user ID the server runs as), not the client. Only database superusers are permitted to name files with `COPY`, because this allows reading or writing any file that the server has privileges to access. +Files named in a `COPY` command are read or written directly by the database server, not by the client application. Therefore, they must reside on or be accessible to the Apache Cloudberry coordinator host machine, not the client. They must be accessible to and readable or writable by the Apache Cloudberry system user (the user ID the server runs as), not the client. Only database superusers are permitted to name files with `COPY`, because this allows reading or writing any file that the server has privileges to access. `COPY FROM` will invoke any triggers and check constraints on the destination table. However, it will not invoke rewrite rules. Note that in this release, violations of constraints are not evaluated for single row error isolation mode. -`COPY` input and output is affected by `DateStyle`. To ensure portability to other Cloudberry Database installations that might use non-default `DateStyle` settings, `DateStyle` should be set to ISO before using `COPY TO`. It is also a good idea to avoid dumping data with IntervalStyle set to `sql_standard`, because negative interval values might be misinterpreted by a server that has a different setting for IntervalStyle. +`COPY` input and output is affected by `DateStyle`. To ensure portability to other Apache Cloudberry installations that might use non-default `DateStyle` settings, `DateStyle` should be set to ISO before using `COPY TO`. It is also a good idea to avoid dumping data with IntervalStyle set to `sql_standard`, because negative interval values might be misinterpreted by a server that has a different setting for IntervalStyle. Input data is interpreted according to `ENCODING` option or the current client encoding, and output data is encoded in `ENCODING` or the current client encoding, even if the data does not pass through the client but is read from or written to a file directly by the server. @@ -270,7 +270,7 @@ Data from a table that is generated by a `COPY TO...ON SEGMENT` command can be u > **Note** If you run `COPY FROM...ON SEGMENT` and the server configuration parameter `gp_enable_segment_copy_checking` is `false`, manual redistribution of table data might be required. See the `ALTER TABLE` clause `WITH REORGANIZE`. -When you specify the `LOG ERRORS` clause, Cloudberry Database captures errors that occur while reading the external table data. You can view and manage the captured error log data. +When you specify the `LOG ERRORS` clause, Apache Cloudberry captures errors that occur while reading the external table data. You can view and manage the captured error log data. - Use the built-in SQL function `gp_read_error_log('table_name')`. It requires `SELECT` privilege on table_name. This example displays the error log information for data loaded into table `ext_expenses` with a `COPY` command: @@ -292,7 +292,7 @@ When you specify the `LOG ERRORS` clause, Cloudberry Database captures errors th Specify the `*` wildcard character to delete error log information for existing tables in the current database. Specify the string `*.*` to delete all database error log information, including error log information that was not deleted due to previous database issues. If \* is specified, database owner privilege is required. If `*.*` is specified, operating system super-user privilege is required. -When a Cloudberry Database user who is not a superuser runs a `COPY` command, the command can be controlled by a resource queue. The resource queue must be configured with the `ACTIVE_STATEMENTS` parameter that specifies a maximum limit on the number of queries that can be run by roles assigned to that queue. Cloudberry Database does not apply a cost value or memory value to a `COPY` command, resource queues with only cost or memory limits do not affect the running of `COPY` commands. +When a Apache Cloudberry user who is not a superuser runs a `COPY` command, the command can be controlled by a resource queue. The resource queue must be configured with the `ACTIVE_STATEMENTS` parameter that specifies a maximum limit on the number of queries that can be run by roles assigned to that queue. Apache Cloudberry does not apply a cost value or memory value to a `COPY` command, resource queues with only cost or memory limits do not affect the running of `COPY` commands. A non-superuser can run only these types of `COPY` commands: @@ -334,15 +334,15 @@ The following characters must be preceded by the escape character if they appear **CSV Format** -This format option is used for importing and exporting the Comma Separated Value (CSV) file format used by many other programs, such as spreadsheets. Instead of the escaping rules used by Cloudberry Database standard text format, it produces and recognizes the common CSV escaping mechanism. +This format option is used for importing and exporting the Comma Separated Value (CSV) file format used by many other programs, such as spreadsheets. Instead of the escaping rules used by Apache Cloudberry standard text format, it produces and recognizes the common CSV escaping mechanism. The values in each record are separated by the `DELIMITER` character. If the value contains the delimiter character, the `QUOTE` character, the `ESCAPE` character (which is double quote by default), the `NULL` string, a carriage return, or line feed character, then the whole value is prefixed and suffixed by the `QUOTE` character. You can also use `FORCE_QUOTE` to force quotes when outputting non-`NULL` values in specific columns. -The CSV format has no standard way to distinguish a `NULL` value from an empty string. Cloudberry Database `COPY` handles this by quoting. A `NULL` is output as the `NULL` parameter string and is not quoted, while a non-`NULL` value matching the `NULL` string is quoted. For example, with the default settings, a `NULL` is written as an unquoted empty string, while an empty string data value is written with double quotes (`""`). Reading values follows similar rules. You can use `FORCE_NOT_NULL` to prevent `NULL` input comparisons for specific columns. You can also use `FORCE_NULL` to convert quoted null string data values to `NULL`. +The CSV format has no standard way to distinguish a `NULL` value from an empty string. Apache Cloudberry `COPY` handles this by quoting. A `NULL` is output as the `NULL` parameter string and is not quoted, while a non-`NULL` value matching the `NULL` string is quoted. For example, with the default settings, a `NULL` is written as an unquoted empty string, while an empty string data value is written with double quotes (`""`). Reading values follows similar rules. You can use `FORCE_NOT_NULL` to prevent `NULL` input comparisons for specific columns. You can also use `FORCE_NULL` to convert quoted null string data values to `NULL`. Because backslash is not a special character in the `CSV` format, `\.`, the end-of-data marker, could also appear as a data value. To avoid any misinterpretation, a `\.` data value appearing as a lone entry on a line is automatically quoted on output, and on input, if quoted, is not interpreted as the end-of-data marker. If you are loading a file created by another application that has a single unquoted column and might have a value of `\.`, you might need to quote that value in the input file. -> **Note** In `CSV` format, all characters are significant. A quoted value surrounded by white space, or any characters other than `DELIMITER`, will include those characters. This can cause errors if you import data from a system that pads CSV lines with white space out to some fixed width. If such a situation arises you might need to preprocess the CSV file to remove the trailing white space, before importing the data into Cloudberry Database. +> **Note** In `CSV` format, all characters are significant. A quoted value surrounded by white space, or any characters other than `DELIMITER`, will include those characters. This can cause errors if you import data from a system that pads CSV lines with white space out to some fixed width. If such a situation arises you might need to preprocess the CSV file to remove the trailing white space, before importing the data into Apache Cloudberry. `CSV` format will both recognize and produce CSV files with quoted values containing embedded carriage returns and line feeds. Thus the files are not strictly one line per table row like text-format files @@ -350,7 +350,7 @@ Because backslash is not a special character in the `CSV` format, `\.`, the end- **Binary Format** -The `binary` format option causes all data to be stored/read as binary format rather than as text. It is somewhat faster than the text and `CSV` formats, but a binary-format file is less portable across machine architectures and Cloudberry Database versions. Also, the binary format is very data type specific; for example it will not work to output binary data from a `smallint` column and read it into an `integer` column, even though that would work fine in text format. +The `binary` format option causes all data to be stored/read as binary format rather than as text. It is somewhat faster than the text and `CSV` formats, but a binary-format file is less portable across machine architectures and Apache Cloudberry versions. Also, the binary format is very data type specific; for example it will not work to output binary data from a `smallint` column and read it into an `integer` column, even though that would work fine in text format. The binary file format consists of a file header, zero or more tuples containing the row data, and a file trailer. Headers and data are in network byte order. @@ -447,7 +447,7 @@ This example uses a `SELECT` statement to copy data to files on each segment: COPY (SELECT * FROM testtbl) TO '/tmp/mytst' ON SEGMENT; ``` -This example copies the data from the `lineitem` table and uses the `PROGRAM` clause to add the data to the `/tmp/lineitem_program.csv` file with `cat` utility. The file is placed on the Cloudberry Database coordinator. +This example copies the data from the `lineitem` table and uses the `PROGRAM` clause to add the data to the `/tmp/lineitem_program.csv` file with `cat` utility. The file is placed on the Apache Cloudberry coordinator. ```sql COPY LINEITEM TO PROGRAM 'cat > /tmp/lineitem.csv' CSV; @@ -469,7 +469,7 @@ COPY LINEITEM_4 FROM PROGRAM 'cat /tmp/lineitem_program.csv' ON SEGMENT C There is no `COPY` statement in the SQL standard. -The following syntax was used in earlier versions of Cloudberry Database and is still supported: +The following syntax was used in earlier versions of Apache Cloudberry and is still supported: ```sql COPY [( [, ...])] FROM {'' | PROGRAM '' | STDIN} diff --git a/docs/sql-stmts/create-access-method.md b/docs/sql-stmts/create-access-method.md index 0cb64a4cd4..ced1d94ac9 100644 --- a/docs/sql-stmts/create-access-method.md +++ b/docs/sql-stmts/create-access-method.md @@ -47,7 +47,7 @@ CREATE ACCESS METHOD heptree TYPE INDEX HANDLER heptree_handler; ## Compatibility -`CREATE ACCESS METHOD` is a Cloudberry Database extension. +`CREATE ACCESS METHOD` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-aggregate.md b/docs/sql-stmts/create-aggregate.md index 9c21e2ff2f..638191af3a 100644 --- a/docs/sql-stmts/create-aggregate.md +++ b/docs/sql-stmts/create-aggregate.md @@ -76,7 +76,7 @@ CREATE [ OR REPLACE ] AGGREGATE ( [ ] [ ] next-internal-state ffunc( internal-state ) ---> aggregate-value ``` -Cloudberry Database creates a temporary variable of data type state_data_type to hold the current internal state of the aggregate function. At each input row, the aggregate argument value(s) are calculated and the state transition function is invoked with the current state value and the new argument value(s) to calculate a new internal state value. After all the rows have been processed, the final function is invoked once to calculate the aggregate's return value. If there is no final function then the ending state value is returned as-is. +Apache Cloudberry creates a temporary variable of data type state_data_type to hold the current internal state of the aggregate function. At each input row, the aggregate argument value(s) are calculated and the state transition function is invoked with the current state value and the new argument value(s) to calculate a new internal state value. After all the rows have been processed, the final function is invoked once to calculate the aggregate's return value. If there is no final function then the ending state value is returned as-is. An aggregate function can provide an initial condition, that is, an initial value for the internal state value. This is specified and stored in the database as a value of type `text`, but it must be a valid external representation of a constant of the state value data type. If it is not supplied then the state value starts out null. @@ -187,7 +187,7 @@ The combinefunc must be declared as taking two arguments of the state_data_type For aggregate functions whose state_data_type is `internal`, the combinefunc must not be strict. In this case the combinefunc must ensure that null states are handled correctly and that the state being returned is properly stored in the aggregate memory context. -In Cloudberry Database, if the result of the aggregate function is computed in a segmented fashion, the combine function is invoked on the individual internal states in order to combine them into an ending internal state. +In Apache Cloudberry, if the result of the aggregate function is computed in a segmented fashion, the combine function is invoked on the individual internal states in order to combine them into an ending internal state. Note that this function is also called in hash aggregate mode within a segment. Therefore, if you call this aggregate function without a combine function, hash aggregate is never chosen. Since hash aggregate is efficient, consider defining a combine function whenever possible. @@ -257,12 +257,12 @@ The parameters of `CREATE AGGREGATE` can be written in any order, not just the o The ordinary functions used to define a new aggregate function must be defined first. -If the value of the Cloudberry Database server configuration parameter `gp_enable_multiphase_agg` is `off`, only single-level aggregation is performed by the Postgres Planner. There is no equivalent parameter for the Pivotal Query Optimizer. +If the value of the Apache Cloudberry server configuration parameter `gp_enable_multiphase_agg` is `off`, only single-level aggregation is performed by the Postgres Planner. There is no equivalent parameter for the Pivotal Query Optimizer. -Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Cloudberry Database array (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. +Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Apache Cloudberry array (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. -In previous versions of Cloudberry Database, there was a concept of ordered aggregates. Since version 6, any aggregate can be called as an ordered aggregate, using the syntax: +In previous versions of Apache Cloudberry, there was a concept of ordered aggregates. Since version 6, any aggregate can be called as an ordered aggregate, using the syntax: ```sql name ( arg [ , ... ] [ORDER BY sortspec [ , ...]] ) @@ -270,7 +270,7 @@ name ( arg [ , ... ] [ORDER BY sortspec [ , ...]] ) The `ORDERED` keyword is accepted for backwards compatibility, but is ignored. -In previous versions of Cloudberry Database, the `COMBINEFUNC` option was called `PREFUNC`. It is still accepted for backwards compatibility, as a synonym for `COMBINEFUNC`. +In previous versions of Apache Cloudberry, the `COMBINEFUNC` option was called `PREFUNC`. It is still accepted for backwards compatibility, as a synonym for `COMBINEFUNC`. ## Example @@ -323,7 +323,7 @@ Refer to [User-Defined Aggregates](https://www.postgresql.org/docs/12/xaggr.html ## Compatibility -`CREATE AGGREGATE` is a Cloudberry Database language extension. The SQL standard does not provide for user-defined aggregate functions. +`CREATE AGGREGATE` is a Apache Cloudberry language extension. The SQL standard does not provide for user-defined aggregate functions. ## See also diff --git a/docs/sql-stmts/create-cast.md b/docs/sql-stmts/create-cast.md index 7add5d650c..9336293099 100644 --- a/docs/sql-stmts/create-cast.md +++ b/docs/sql-stmts/create-cast.md @@ -60,7 +60,7 @@ SELECT CAST ( 2 AS numeric ) + 4.0; The catalogs also provide a cast from `numeric` to `integer`. If that cast were marked `AS IMPLICIT`, which it is not, then the parser would be faced with choosing between the above interpretation and the alternative of casting the `numeric` constant to `integer` and applying the `integer + integer` operator. Lacking any knowledge of which choice to prefer, the parser would give up and declare the query ambiguous. The fact that only one of the two casts is implicit is the way in which we teach the parser to prefer resolution of a mixed `numeric`-and-`integer` expression as `numeric`; the parser has no built-in knowledge about that. -It is wise to be conservative about marking casts as implicit. An overabundance of implicit casting paths can cause Cloudberry Database to choose surprising interpretations of commands, or to be unable to resolve commands at all because there are multiple possible interpretations. A good general rule is to make a cast implicitly invokable only for information-preserving transformations between types in the same general type category. For example, the cast from `int2` to `int4` can reasonably be implicit, but the cast from `float8` to `int4` should probably be assignment-only. Cross-type-category casts, such as `text` to `int4`, are best made explicit-only. +It is wise to be conservative about marking casts as implicit. An overabundance of implicit casting paths can cause Apache Cloudberry to choose surprising interpretations of commands, or to be unable to resolve commands at all because there are multiple possible interpretations. A good general rule is to make a cast implicitly invokable only for information-preserving transformations between types in the same general type category. For example, the cast from `int2` to `int4` can reasonably be implicit, but the cast from `float8` to `int4` should probably be assignment-only. Cross-type-category casts, such as `text` to `int4`, are best made explicit-only. > **Note** Sometimes it is necessary for usability or standards-compliance reasons to provide multiple implicit casts among a set of types, resulting in ambiguity that cannot be avoided as described above. The parser uses a fallback heuristic based on type categories and preferred types that helps to provide desired behavior in such cases. See [CREATE TYPE](/docs/sql-stmts/create-type.md) for more information. @@ -78,7 +78,7 @@ The name of the target data type of the cast. **`funcname(argtype [, ...])`** -The function used to perform the cast. The function name may be schema-qualified. If it is not, Cloudberry Database looks for the function in the schema search path. The function's result data type must match the target type of the cast. +The function used to perform the cast. The function name may be schema-qualified. If it is not, Apache Cloudberry looks for the function in the schema search path. The function's result data type must match the target type of the cast. Cast implementation functions may have one to three arguments. The first argument type must be identical to or binary-coercible from the cast's source type. The second argument, if present, must be type `integer`; it receives the type modifier associated with the destination type, or `-1` if there is none. The third argument, if present, must be type `boolean`; it receives `true` if the cast is an explicit cast, `false` otherwise. The SQL specification demands different behaviors for explicit and implicit casts in some cases. This argument is supplied for functions that must implement such casts. It is not recommended that you design your own data types this way. @@ -108,15 +108,15 @@ Indicates that the cast may be invoked implicitly in any context. ## Notes -Note that in this release of Cloudberry Database, user-defined functions used in a user-defined cast must be defined as `IMMUTABLE`. Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Cloudberry Database array (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. +Note that in this release of Apache Cloudberry, user-defined functions used in a user-defined cast must be defined as `IMMUTABLE`. Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Apache Cloudberry array (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. Remember that if you want to be able to convert types both ways you need to declare casts both ways explicitly. -It is normally not necessary to create casts between user-defined types and the standard string types (`text`, `varchar`, and `char(*n*)`, as well as user-defined types that are defined to be in the string category). Cloudberry Database provides automatic I/O conversion casts for these. The automatic casts to string types are treated as assignment casts, while the automatic casts from string types are explicit-only. You can override this behavior by declaring your own cast to replace an automatic cast, but usually the only reason to do so is if you want the conversion to be more easily invokable than the standard assignment-only or explicit-only setting. Another possible reason is that you want the conversion to behave differently from the type's I/O function - think twice before doing this. (A small number of the built-in types do indeed have different behaviors for conversions, mostly because of requirements of the SQL standard.) +It is normally not necessary to create casts between user-defined types and the standard string types (`text`, `varchar`, and `char(*n*)`, as well as user-defined types that are defined to be in the string category). Apache Cloudberry provides automatic I/O conversion casts for these. The automatic casts to string types are treated as assignment casts, while the automatic casts from string types are explicit-only. You can override this behavior by declaring your own cast to replace an automatic cast, but usually the only reason to do so is if you want the conversion to be more easily invokable than the standard assignment-only or explicit-only setting. Another possible reason is that you want the conversion to behave differently from the type's I/O function - think twice before doing this. (A small number of the built-in types do indeed have different behaviors for conversions, mostly because of requirements of the SQL standard.) It is recommended that you follow the convention of naming cast implementation functions after the target data type, as the built-in cast implementation functions are named. Many users are used to being able to cast data types using a function-style notation, that is `typename(x)`. -There are two cases in which a function-call construct is treated as a cast request without having matched it to an actual function. If a function call `*name(x)*` does not exactly match any existing function, but `*name*` is the name of a data type and `pg_cast` provides a binary-coercible cast to this type from the type of `*x*`, then the call will be construed as a binary-coercible cast. Cloudberry Database makes this exception so that binary-coercible casts can be invoked using functional syntax, even though they lack any function. Likewise, if there is no `pg_cast` entry but the cast would be to or from a string type, the call is construed as an I/O conversion cast. This exception allows I/O conversion casts to be invoked using functional syntax. +There are two cases in which a function-call construct is treated as a cast request without having matched it to an actual function. If a function call `*name(x)*` does not exactly match any existing function, but `*name*` is the name of a data type and `pg_cast` provides a binary-coercible cast to this type from the type of `*x*`, then the call will be construed as a binary-coercible cast. Apache Cloudberry makes this exception so that binary-coercible casts can be invoked using functional syntax, even though they lack any function. Likewise, if there is no `pg_cast` entry but the cast would be to or from a string type, the call is construed as an I/O conversion cast. This exception allows I/O conversion casts to be invoked using functional syntax. There is an exception to the exception above: I/O conversion casts from composite types to string types cannot be invoked using functional syntax, but must be written in explicit cast syntax (either `CAST` or :: notation). This exception exists because after the introduction of automatically-provided I/O conversion casts, it was found to be too easy to accidentally invoke such a cast when you intended a function or column reference. @@ -130,7 +130,7 @@ CREATE CAST (bigint AS int4) WITH FUNCTION int4(bigint) AS ASSIGNMENT; ## Compatibility -The `CREATE CAST` command conforms to the SQL standard, except that SQL does not make provisions for binary-coercible types or extra arguments to implementation functions. `AS IMPLICIT` is a Cloudberry Database extension, too. +The `CREATE CAST` command conforms to the SQL standard, except that SQL does not make provisions for binary-coercible types or extra arguments to implementation functions. `AS IMPLICIT` is a Apache Cloudberry extension, too. ## See also diff --git a/docs/sql-stmts/create-collation.md b/docs/sql-stmts/create-collation.md index d92fa8a8a4..177549c531 100644 --- a/docs/sql-stmts/create-collation.md +++ b/docs/sql-stmts/create-collation.md @@ -54,7 +54,7 @@ Specifies the provider to use for locale services associated with this collation **`DETERMINISTIC`** -Specifies whether the collation should use deterministic comparisons. The default is `true`. A deterministic comparison considers strings that are not byte-wise equal to be unequal even if they are considered logically equal by the comparison. Cloudberry Database breaks ties using a byte-wise comparison. Comparison that is not deterministic can make the collation be, say, case- or accent-insensitive. For that, you need to choose an appropriate `LC_COLLATE` setting and set the collation to not deterministic here. +Specifies whether the collation should use deterministic comparisons. The default is `true`. A deterministic comparison considers strings that are not byte-wise equal to be unequal even if they are considered logically equal by the comparison. Apache Cloudberry breaks ties using a byte-wise comparison. Comparison that is not deterministic can make the collation be, say, case- or accent-insensitive. For that, you need to choose an appropriate `LC_COLLATE` setting and set the collation to not deterministic here. Nondeterministic collations are only supported with the `icu` provider. @@ -74,7 +74,7 @@ The name of an existing collation to copy. The new collation will have the same Use [DROP COLLATION](/docs/sql-stmts/drop-collation.md) to remove user-defined collations. -See [Collation Support](https://www.postgresql.org/docs/12/collation.html) in the PostgreSQL documentation for more information about collation support in Cloudberry Database. +See [Collation Support](https://www.postgresql.org/docs/12/collation.html) in the PostgreSQL documentation for more information about collation support in Apache Cloudberry. When using the `libc` collation provider, the, locale must be applicable to the current database encoding. See [CREATE DATABASE](/docs/sql-stmts/create-database.md) for the precise rules. @@ -102,7 +102,7 @@ This can be convenient to be able to use operating-system-independent collation ## Compatibility -There is a `CREATE COLLATION` statement in the SQL standard, but it is limited to copying an existing collation. The syntax to create a new collation is a Cloudberry Database extension. +There is a `CREATE COLLATION` statement in the SQL standard, but it is limited to copying an existing collation. The syntax to create a new collation is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-conversion.md b/docs/sql-stmts/create-conversion.md index 6ac0e2d5a5..0e7cae9b74 100644 --- a/docs/sql-stmts/create-conversion.md +++ b/docs/sql-stmts/create-conversion.md @@ -57,7 +57,7 @@ Use [DROP CONVERSION](/docs/sql-stmts/drop-conversion.md) to remove a user-defin The privileges required to create a conversion might change in a feature release. -Note that in this release of Cloudberry Database, user-defined functions used in a user-defined conversion must be defined as `IMMUTABLE`. Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Cloudberry Database array (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. +Note that in this release of Apache Cloudberry, user-defined functions used in a user-defined conversion must be defined as `IMMUTABLE`. Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Apache Cloudberry array (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. ## Examples @@ -70,7 +70,7 @@ CREATE CONVERSION myconv FOR 'UTF8' TO 'LATIN1' FROM myfunc; ## Compatibility -`CREATE CONVERSION` is a Cloudberry Database extension. There is no `CREATE CONVERSION` statement in the SQL standard, but there is a `CREATE TRANSLATION` statement that is very similar in purpose and syntax. +`CREATE CONVERSION` is a Apache Cloudberry extension. There is no `CREATE CONVERSION` statement in the SQL standard, but there is a `CREATE TRANSLATION` statement that is very similar in purpose and syntax. ## See also diff --git a/docs/sql-stmts/create-database.md b/docs/sql-stmts/create-database.md index de5768bce7..5e42d42ef6 100644 --- a/docs/sql-stmts/create-database.md +++ b/docs/sql-stmts/create-database.md @@ -26,7 +26,7 @@ To create a database, you must be a superuser or have the special `CREATEDB` pri The creator becomes the owner of the new database by default. Superusers can create databases owned by other users by using the `OWNER` clause. They can even create databases owned by users with no special privileges. Non-superusers with `CREATEDB` privilege can only create databases owned by themselves. -By default, the new database will be created by cloning the standard system database `template1`. A different template can be specified by writing `TEMPLATE name`. In particular, by writing `TEMPLATE template0`, you can create a clean database containing only the standard objects predefined by Cloudberry Database. This is useful if you wish to avoid copying any installation-local objects that may have been added to `template1`. +By default, the new database will be created by cloning the standard system database `template1`. A different template can be specified by writing `TEMPLATE name`. In particular, by writing `TEMPLATE template0`, you can create a clean database containing only the standard objects predefined by Apache Cloudberry. This is useful if you wish to avoid copying any installation-local objects that may have been added to `template1`. ## Parameters @@ -52,7 +52,7 @@ If `false`, then no one can connect to this database. The default is `true`, all **`connlimit`** -The maximum number of concurrent connections allowed to this database on the coordinator. The default is `-1`, no limit. Cloudberry Database superusers are exempt from this limit. +The maximum number of concurrent connections allowed to this database on the coordinator. The default is `-1`, no limit. Apache Cloudberry superusers are exempt from this limit. **`istemplate`** diff --git a/docs/sql-stmts/create-domain.md b/docs/sql-stmts/create-domain.md index 41ad137919..e871418a30 100644 --- a/docs/sql-stmts/create-domain.md +++ b/docs/sql-stmts/create-domain.md @@ -70,7 +70,7 @@ This clause is only intended for compatibility with nonstandard SQL databases. I Currently, `CHECK` expressions cannot contain subqueries nor refer to variables other than `VALUE`. -When a domain has multiple `CHECK` constraints, they will be tested in alphabetical order by name. (Cloudberry Database versions before 7.0 did not honor any particular firing order for `CHECK` constraints.) +When a domain has multiple `CHECK` constraints, they will be tested in alphabetical order by name. (Apache Cloudberry versions before 7.0 did not honor any particular firing order for `CHECK` constraints.) ## Notes @@ -84,9 +84,9 @@ The empty scalar sub-SELECT will produce a null value that is considered to be o It is very difficult to avoid such problems, because of SQL's general assumption that a null value is a valid value of every data type. Best practice therefore is to design a domain's constraints so that a null value is allowed, and then to apply column `NOT NULL` constraints to columns of the domain type as needed, rather than directly to the domain type. -Cloudberry Database assumes that `CHECK` constraints' conditions are immutable, that is, they will always give the same result for the same input value. This assumption is what justifies examining `CHECK` constraints only when a value is first converted to be of a domain type, and not at other times. (This is essentially the same as the treatment of table `CHECK` constraints.) +Apache Cloudberry assumes that `CHECK` constraints' conditions are immutable, that is, they will always give the same result for the same input value. This assumption is what justifies examining `CHECK` constraints only when a value is first converted to be of a domain type, and not at other times. (This is essentially the same as the treatment of table `CHECK` constraints.) -An example of a common way to break this assumption is to reference a user-defined function in a `CHECK` expression, and then change the behavior of that function. Cloudberry Database does not disallow that, but it will not notice if there are stored values of the domain type that now violate the `CHECK` constraint. That would cause a subsequent database dump and restore to fail. The recommended way to handle such a change is to drop the constraint (using `ALTER DOMAIN`), adjust the function definition, and re-add the constraint, thereby rechecking it against stored data. +An example of a common way to break this assumption is to reference a user-defined function in a `CHECK` expression, and then change the behavior of that function. Apache Cloudberry does not disallow that, but it will not notice if there are stored values of the domain type that now violate the `CHECK` constraint. That would cause a subsequent database dump and restore to fail. The recommended way to handle such a change is to drop the constraint (using `ALTER DOMAIN`), adjust the function definition, and re-add the constraint, thereby rechecking it against stored data. ## Examples diff --git a/docs/sql-stmts/create-extension.md b/docs/sql-stmts/create-extension.md index 2cf86cea52..2293b99a2a 100644 --- a/docs/sql-stmts/create-extension.md +++ b/docs/sql-stmts/create-extension.md @@ -4,7 +4,7 @@ title: CREATE EXTENSION # CREATE EXTENSION -Registers an extension in a Cloudberry Database. +Registers an extension in a Apache Cloudberry. ## Synopsis @@ -28,13 +28,13 @@ Loading an extension requires the same privileges that would be required to crea **`IF NOT EXISTS`** -Do not throw an error if an extension with the same name already exists. Cloudberry Database issues a notice in this case. Note that there is no guarantee that the existing extension is anything like the one that would have been created from the currently-available script file. +Do not throw an error if an extension with the same name already exists. Apache Cloudberry issues a notice in this case. Note that there is no guarantee that the existing extension is anything like the one that would have been created from the currently-available script file. **`extension_name`** -The name of the extension to be installed. Cloudberry Database will create the extension using details from the file `SHAREDIR/extension/.control`. +The name of the extension to be installed. Apache Cloudberry will create the extension using details from the file `SHAREDIR/extension/.control`. -`SHAREDIR` is the installation shared-data directory, for example `/usr/local/cloudberry-db/share/postgresql`. The command `pg_config --sharedir` displays the directory. +`SHAREDIR` is the installation shared-data directory, for example `/usr/local/cloudberry/share/postgresql`. The command `pg_config --sharedir` displays the directory. **`schema_name`** @@ -59,13 +59,13 @@ Automatically install any extensions that this extension depends on that are not ## Notes -Before you can use `CREATE EXTENSION` to load an extension into a database, the extension's supporting files must be installed. The supporting files must be installed in the same location on all Cloudberry Database hosts. For information about creating new extensions, see the PostgreSQL [Packaging Related Objects into an Extension](https://www.postgresql.org/docs/12/extend-extensions.html) documentation. +Before you can use `CREATE EXTENSION` to load an extension into a database, the extension's supporting files must be installed. The supporting files must be installed in the same location on all Apache Cloudberry hosts. For information about creating new extensions, see the PostgreSQL [Packaging Related Objects into an Extension](https://www.postgresql.org/docs/12/extend-extensions.html) documentation. The extensions currently available for loading can be identified from the pg_available_extensions or pg_available_extension_versions system views. > **Note:** Installing an extension as superuser requires trusting that the extension's author wrote the extension installation script in a secure fashion. It is not terribly difficult for a malicious user to create trojan-horse objects that will compromise later execution of a carelessly-written extension script, allowing that user to acquire superuser privileges. However, trojan-horse objects are only hazardous if they are in the `search_path` during script execution, meaning that they are in the extension's installation target schema or in the schema of some extension it depends on. Therefore, a good rule of thumb when dealing with extensions whose scripts have not been carefully vetted is to install them only into schemas for which `CREATE` privilege has not been and will not be granted to any untrusted users. Likewise for any extensions they depend on. d -The extensions supplied with Cloudberry Database are believed to be secure against installation-time attacks of this sort, except for a few that depend on other extensions. As stated in the documentation for those extensions, they should be installed into secure schemas, or installed into the same schemas as the extensions they depend on, or both. +The extensions supplied with Apache Cloudberry are believed to be secure against installation-time attacks of this sort, except for a few that depend on other extensions. As stated in the documentation for those extensions, they should be installed into secure schemas, or installed into the same schemas as the extensions they depend on, or both. ## Examples @@ -84,7 +84,7 @@ CREATE EXTENSION hstore; ## Compatibility -`CREATE EXTENSION` is a Cloudberry Database extension. +`CREATE EXTENSION` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-external-table.md b/docs/sql-stmts/create-external-table.md index 6c856875cf..9bc4a0c7da 100644 --- a/docs/sql-stmts/create-external-table.md +++ b/docs/sql-stmts/create-external-table.md @@ -129,9 +129,9 @@ CREATE WRITABLE EXTERNAL WEB [TEMPORARY | TEMP] TABLE ## Description -`CREATE EXTERNAL TABLE` or `CREATE EXTERNAL WEB TABLE` creates a new readable external table definition in Cloudberry Database. Readable external tables are typically used for fast, parallel data loading. Once an external table is defined, you can query its data directly (and in parallel) using SQL commands. For example, you can select, join, or sort external table data. You can also create views for external tables. DML operations (`UPDATE`, `INSERT`, `DELETE`, or `TRUNCATE`) are not allowed on readable external tables, and you cannot create indexes on readable external tables. +`CREATE EXTERNAL TABLE` or `CREATE EXTERNAL WEB TABLE` creates a new readable external table definition in Apache Cloudberry. Readable external tables are typically used for fast, parallel data loading. Once an external table is defined, you can query its data directly (and in parallel) using SQL commands. For example, you can select, join, or sort external table data. You can also create views for external tables. DML operations (`UPDATE`, `INSERT`, `DELETE`, or `TRUNCATE`) are not allowed on readable external tables, and you cannot create indexes on readable external tables. -`CREATE WRITABLE EXTERNAL TABLE` or `CREATE WRITABLE EXTERNAL WEB TABLE` creates a new writable external table definition in Cloudberry Database. Writable external tables are typically used for unloading data from the database into a set of files or named pipes. Writable external web tables can also be used to output data to an executable program. Once a writable external table is defined, data can be selected from database tables and inserted into the writable external table. Writable external tables only allow `INSERT` operations – `SELECT`, `UPDATE`, `DELETE` or `TRUNCATE` are not allowed. +`CREATE WRITABLE EXTERNAL TABLE` or `CREATE WRITABLE EXTERNAL WEB TABLE` creates a new writable external table definition in Apache Cloudberry. Writable external tables are typically used for unloading data from the database into a set of files or named pipes. Writable external web tables can also be used to output data to an executable program. Once a writable external table is defined, data can be selected from database tables and inserted into the writable external table. Writable external tables only allow `INSERT` operations – `SELECT`, `UPDATE`, `DELETE` or `TRUNCATE` are not allowed. The main difference between regular external tables and external web tables is their data sources. Regular readable external tables access static flat files, whereas external web tables access dynamic data sources – either on a web server or by running OS commands or scripts. @@ -139,17 +139,17 @@ The main difference between regular external tables and external web tables is t **`READABLE | WRITABLE`** -Specifies the type of external table, readable being the default. Readable external tables are used for loading data into Cloudberry Database. Writable external tables are used for unloading data. +Specifies the type of external table, readable being the default. Readable external tables are used for loading data into Apache Cloudberry. Writable external tables are used for unloading data. **`WEB`** -Creates a readable or writable external web table definition in Cloudberry Database. There are two forms of readable external web tables – those that access files via the `http://` protocol or those that access data by running OS commands. Writable external web tables output data to an executable program that can accept an input stream of data. External web tables are not rescannable during query execution. +Creates a readable or writable external web table definition in Apache Cloudberry. There are two forms of readable external web tables – those that access files via the `http://` protocol or those that access data by running OS commands. Writable external web tables output data to an executable program that can accept an input stream of data. External web tables are not rescannable during query execution. The `s3` protocol does not support external web tables. You can, however, create an external web table that runs a third-party tool to read data from or write data to S3 directly. **`TEMPORARY | TEMP`** -If specified, creates a temporary readable or writable external table definition in Cloudberry Database. Temporary external tables exist in a special schema; you cannot specify a schema name when you create the table. Temporary external tables are automatically dropped at the end of a session. +If specified, creates a temporary readable or writable external table definition in Apache Cloudberry. Temporary external tables exist in a special schema; you cannot specify a schema name when you create the table. Temporary external tables are automatically dropped at the end of a session. An existing permanent table with the same name is not visible to the current session while the temporary table exists, unless you reference the permanent table with its schema-qualified name. @@ -194,29 +194,29 @@ With the option `#transform=trans_name`, you can specify a transform to apply wh **`ON COORDINATOR`** -Restricts all table-related operations to the Cloudberry Database coordinator segment. Permitted only on readable and writable external tables created with the `s3` or custom protocols. The `gpfdist`, `gpfdists`, `pxf`, and `file` protocols do not support `ON COORDINATOR`. +Restricts all table-related operations to the Apache Cloudberry coordinator segment. Permitted only on readable and writable external tables created with the `s3` or custom protocols. The `gpfdist`, `gpfdists`, `pxf`, and `file` protocols do not support `ON COORDINATOR`. -> **Note** Be aware of potential resource impacts when reading from or writing to external tables you create with the `ON COORDINATOR` clause. You may encounter performance issues when you restrict table operations solely to the Cloudberry Database coordinator segment. +> **Note** Be aware of potential resource impacts when reading from or writing to external tables you create with the `ON COORDINATOR` clause. You may encounter performance issues when you restrict table operations solely to the Apache Cloudberry coordinator segment. **`EXECUTE 'command' [ON ...]`** Allowed for readable external web tables or writable external tables only. For readable external web tables, specifies the OS command to be run by the segment instances. The command can be a single OS command or a script. The `ON` clause is used to specify which segment instances will run the given command. -- `ON ALL` is the default. The command will be run by every active (primary) segment instance on all segment hosts in the Cloudberry Database system. If the command runs a script, that script must reside in the same location on all of the segment hosts and be executable by the Cloudberry superuser (`gpadmin`). +- `ON ALL` is the default. The command will be run by every active (primary) segment instance on all segment hosts in the Apache Cloudberry system. If the command runs a script, that script must reside in the same location on all of the segment hosts and be executable by the Cloudberry superuser (`gpadmin`). - `ON COORDINATOR` runs the command on the coordinator host only. > **Note** Logging is not supported for external web tables when the `ON COORDINATOR` clause is specified. -- `ON number` means the command will be run by the specified number of segments. The particular segments are chosen randomly at runtime by the Cloudberry Database system. If the command runs a script, that script must reside in the same location on all of the segment hosts and be executable by the Cloudberry superuser (`gpadmin`). +- `ON number` means the command will be run by the specified number of segments. The particular segments are chosen randomly at runtime by the Apache Cloudberry system. If the command runs a script, that script must reside in the same location on all of the segment hosts and be executable by the Cloudberry superuser (`gpadmin`). - `HOST` means the command will be run by one segment on each segment host (once per segment host), regardless of the number of active segment instances per host. - `HOST segment_hostname` means the command will be run by all active (primary) segment instances on the specified segment host. -- `SEGMENT segment_id` means the command will be run only once by the specified segment. You can determine a segment instance's ID by looking at the content number in the system catalog table gp_segment_configuration. The content ID of the Cloudberry Database coordinator is always `-1`. +- `SEGMENT segment_id` means the command will be run only once by the specified segment. You can determine a segment instance's ID by looking at the content number in the system catalog table gp_segment_configuration. The content ID of the Apache Cloudberry coordinator is always `-1`. For writable external tables, the command specified in the `EXECUTE` clause must be prepared to have data piped into it. Since all segments that have data to send will write their output to the specified command or program, the only available option for the `ON` clause is `ON ALL`. **`FORMAT 'TEXT | CSV' (options)`** -When the `FORMAT` clause identfies delimited text (`TEXT`) or comma separated values (`CSV`) format, formatting options are similar to those available with the PostgreSQL [COPY](/docs/sql-stmts/copy.md) command. If the data in the file does not use the default column delimiter, escape character, null string and so on, you must specify the additional formatting options so that the data in the external file is read correctly by Cloudberry Database. +When the `FORMAT` clause identfies delimited text (`TEXT`) or comma separated values (`CSV`) format, formatting options are similar to those available with the PostgreSQL [COPY](/docs/sql-stmts/copy.md) command. If the data in the file does not use the default column delimiter, escape character, null string and so on, you must specify the additional formatting options so that the data in the external file is read correctly by Apache Cloudberry. If you use the `pxf` protocol to access an external data source, refer to Accessing External Data with PXF for information about using PXF. @@ -248,7 +248,7 @@ Specifies the single character that is used for C escape sequences (such as `\n` **`NEWLINE`** -Specifies the newline used in your data files – `LF` (Line feed, 0x0A), `CR` (Carriage return, 0x0D), or `CRLF` (Carriage return plus line feed, 0x0D 0x0A). If not specified, a Cloudberry Database segment will detect the newline type by looking at the first row of data it receives and using the first newline type encountered. +Specifies the newline used in your data files – `LF` (Line feed, 0x0A), `CR` (Carriage return, 0x0D), or `CRLF` (Carriage return plus line feed, 0x0D 0x0A). If not specified, a Apache Cloudberry segment will detect the newline type by looking at the first row of data it receives and using the first newline type encountered. **`HEADER`** @@ -288,7 +288,7 @@ This is an optional clause that can precede a `SEGMENT REJECT LIMIT` clause to l The data is deleted when the external table is dropped unless you specify the keyword `PERSISTENTLY`. If the keyword is specified, the log data persists after the external table is dropped. -The error log data is accessed with the Cloudberry Database built-in SQL function `gp_read_error_log()`, or with the SQL function `gp_read_persistent_error_log()` if the `PERSISTENTLY` keyword is specified. +The error log data is accessed with the Apache Cloudberry built-in SQL function `gp_read_error_log()`, or with the SQL function `gp_read_persistent_error_log()` if the `PERSISTENTLY` keyword is specified. If you use the `PERSISTENTLY` keyword, you must install the functions that manage the persistent error log information. @@ -298,15 +298,15 @@ See [Notes](#notes) for information about the error log information and built-in Runs a `COPY FROM` operation in single row error isolation mode. If the input rows have format errors they will be discarded provided that the reject limit count is not reached on any Cloudberry segment instance during the load operation. The reject limit count can be specified as number of rows (the default) or percentage of total rows (1-100). If `PERCENT` is used, each segment starts calculating the bad row percentage only after the number of rows specified by the parameter `gp_reject_percent_threshold` has been processed. The default for `gp_reject_percent_threshold` is 300 rows. Constraint errors such as violation of a `NOT NULL`, `CHECK`, or `UNIQUE` constraint will still be handled in "all-or-nothing" input mode. If the limit is not reached, all good rows will be loaded and any error rows discarded. -> **Note** When reading an external table, Cloudberry Database limits the initial number of rows that can contain formatting errors if the `SEGMENT REJECT LIMIT` is not triggered first or is not specified. If the first 1000 rows are rejected, the `COPY` operation is stopped and rolled back. +> **Note** When reading an external table, Apache Cloudberry limits the initial number of rows that can contain formatting errors if the `SEGMENT REJECT LIMIT` is not triggered first or is not specified. If the first 1000 rows are rejected, the `COPY` operation is stopped and rolled back. -You can change the limit for the number of initial rejected rows with the Cloudberry Database server configuration parameter `gp_initial_bad_row_limit`. +You can change the limit for the number of initial rejected rows with the Apache Cloudberry server configuration parameter `gp_initial_bad_row_limit`. **`DISTRIBUTED BY ({column [opclass]}, [ ... ] )`** **`DISTRIBUTED RANDOMLY`** -Used to declare the Cloudberry Database distribution policy for a writable external table. By default, writable external tables are distributed randomly. If the source table you are exporting data from has a hash distribution policy, defining the same distribution key column(s) and operator class(es), `oplcass`, for the writable external table will improve unload performance by eliminating the need to move rows over the interconnect. When you issue an unload command such as `INSERT INTO wex_table SELECT * FROM source_table`, the rows that are unloaded can be sent directly from the segments to the output location if the two tables have the same hash distribution policy. +Used to declare the Apache Cloudberry distribution policy for a writable external table. By default, writable external tables are distributed randomly. If the source table you are exporting data from has a hash distribution policy, defining the same distribution key column(s) and operator class(es), `oplcass`, for the writable external table will improve unload performance by eliminating the need to move rows over the interconnect. When you issue an unload command such as `INSERT INTO wex_table SELECT * FROM source_table`, the rows that are unloaded can be sent directly from the segments to the output location if the two tables have the same hash distribution policy. ## Examples @@ -385,7 +385,7 @@ INSERT INTO campaign_out SELECT * FROM campaign WHERE customer_id=123; ## Notes -When you specify the `LOG ERRORS` clause, Cloudberry Database captures errors that occur while reading the external table data. +When you specify the `LOG ERRORS` clause, Apache Cloudberry captures errors that occur while reading the external table data. You can view and manage the captured error log data. The functions to manage log data depend on whether the data is persistent (the `PERSISTENTLY` keyword is used with the `LOG ERRORS` clause). @@ -422,11 +422,11 @@ You can view and manage the captured error log data. The functions to manage log - The `gp_truncate_*` functions require owner privilege on the table. - You can use the `*` wildcard character to delete error log information for existing tables in the current database. Specify the string `*.*` to delete all database error log information, including error log information that was not deleted due to previous database issues. If `*` is specified, database owner privilege is required. If `*.*` is specified, operating system super-user privilege is required. Non-persistent and persistent error log data must be deleted with their respective `gp_truncate_*` functions. -When multiple Cloudberry Database external tables are defined with the `gpfdist`, `gpfdists`, or `file` protocol and access the same named pipe a Linux system, Cloudberry Database restricts access to the named pipe to a single reader. An error is returned if a second reader attempts to access the named pipe. +When multiple Apache Cloudberry external tables are defined with the `gpfdist`, `gpfdists`, or `file` protocol and access the same named pipe a Linux system, Apache Cloudberry restricts access to the named pipe to a single reader. An error is returned if a second reader attempts to access the named pipe. ## Compatibility -`CREATE EXTERNAL TABLE` is a Cloudberry Database extension. The SQL standard makes no provisions for external tables. +`CREATE EXTERNAL TABLE` is a Apache Cloudberry extension. The SQL standard makes no provisions for external tables. ## See also diff --git a/docs/sql-stmts/create-foreign-data-wrapper.md b/docs/sql-stmts/create-foreign-data-wrapper.md index 3e4bfaa084..7238019496 100644 --- a/docs/sql-stmts/create-foreign-data-wrapper.md +++ b/docs/sql-stmts/create-foreign-data-wrapper.md @@ -31,13 +31,13 @@ The name of the foreign-data wrapper to create. The name must be unique within t **`HANDLER handler_function`** -The name of a previously registered function that Cloudberry Database calls to retrieve the execution functions for foreign tables. hander_function must take no arguments, and its return type must be `fdw_handler`. +The name of a previously registered function that Apache Cloudberry calls to retrieve the execution functions for foreign tables. hander_function must take no arguments, and its return type must be `fdw_handler`. It is possible to create a foreign-data wrapper with no handler function, but you can only declare, not access, foreign tables using such a wrapper. **`VALIDATOR validator_function`** -The name of a previously registered function that Cloudberry Database calls to check the generic options provided to the foreign-data wrapper. This function also checks the options for foreign servers, user mappings, and foreign tables that use the foreign-data wrapper. If no validator function or `NO VALIDATOR` is specified, Cloudberry Database does not check options at creation time. (Depending upon the implementation, foreign-data wrappers may ignore or reject invalid options at runtime.) +The name of a previously registered function that Apache Cloudberry calls to check the generic options provided to the foreign-data wrapper. This function also checks the options for foreign servers, user mappings, and foreign tables that use the foreign-data wrapper. If no validator function or `NO VALIDATOR` is specified, Apache Cloudberry does not check options at creation time. (Depending upon the implementation, foreign-data wrappers may ignore or reject invalid options at runtime.) validator_function must take two arguments: one of type `text[]`, which contains the array of options as stored in the system catalogs, and one of type `oid`, which identifies the OID of the system catalog containing the options. @@ -49,13 +49,13 @@ The options for the new foreign-data wrapper. Option names must be unique. The o **`mpp_execute { 'coordinator' | 'any' | 'all segments' }`** -A Cloudberry Database-specific option that identifies the host from which the foreign-data wrapper reads or writes data: +A Apache Cloudberry-specific option that identifies the host from which the foreign-data wrapper reads or writes data: - `coordinator` (the default)—Read or write data from the coordinator host. - `any`—Read data from either the coordinator host or any one segment, depending on which path costs less. - `all segments`—Read or write data from all segments. To support this option value, the foreign-data wrapper must have a policy that matches the segments to data. -> **Note** Cloudberry Database supports parallel writes to foreign tables only when you set `mpp_execute 'all segments'`. +> **Note** Apache Cloudberry supports parallel writes to foreign tables only when you set `mpp_execute 'all segments'`. Support for the foreign-data wrapper `mpp_execute` option, and the specific modes, is foreign-data wrapper-specific. @@ -87,7 +87,7 @@ CREATE FOREIGN DATA WRAPPER mywrapper OPTIONS (debug 'true'); ## Compatibility -`CREATE FOREIGN DATA WRAPPER` conforms to ISO/IEC 9075-9 (SQL/MED), with the exception that the `HANDLER` and `VALIDATOR` clauses are extensions, and the standard clauses `LIBRARY` and `LANGUAGE` are not implemented in Cloudberry Database. +`CREATE FOREIGN DATA WRAPPER` conforms to ISO/IEC 9075-9 (SQL/MED), with the exception that the `HANDLER` and `VALIDATOR` clauses are extensions, and the standard clauses `LIBRARY` and `LANGUAGE` are not implemented in Apache Cloudberry. Note, however, that the SQL/MED functionality as a whole is not yet conforming. diff --git a/docs/sql-stmts/create-foreign-table.md b/docs/sql-stmts/create-foreign-table.md index 51ef3a9094..6a758b58af 100644 --- a/docs/sql-stmts/create-foreign-table.md +++ b/docs/sql-stmts/create-foreign-table.md @@ -53,7 +53,7 @@ WITH ( MODULUS , REMAINDER ) `CREATE FOREIGN TABLE` creates a new foreign table in the current database. The user who creates the foreign table becomes its owner. -If you schema-qualify the table name (for example, `CREATE FOREIGN TABLE myschema.mytable ...`), Cloudberry Database creates the table in the specified schema. Otherwise, the foreign table is created in the current schema. The name of the foreign table must be distinct from the name of any other foreign table, table, sequence, index, view, or materialized view in the same schema. +If you schema-qualify the table name (for example, `CREATE FOREIGN TABLE myschema.mytable ...`), Apache Cloudberry creates the table in the specified schema. Otherwise, the foreign table is created in the current schema. The name of the foreign table must be distinct from the name of any other foreign table, table, sequence, index, view, or materialized view in the same schema. Because `CREATE FOREIGN TABLE` automatically creates a data type that represents the composite type corresponding to one row of the foreign table, foreign tables cannot have the same name as any existing data type in the same schema. @@ -65,7 +65,7 @@ To create a foreign table, you must have `USAGE` privilege on the foreign server **`IF NOT EXISTS`** -Do not throw an error if a relation with the same name already exists. Cloudberry Database issues a notice in this case. Note that there is no guarantee that the existing relation is anything like the one that would have been created. +Do not throw an error if a relation with the same name already exists. Apache Cloudberry issues a notice in this case. Note that there is no guarantee that the existing relation is anything like the one that would have been created. **`table_name`** @@ -113,9 +113,9 @@ Currently, `CHECK` expressions cannot contain subqueries nor refer to variables **`DEFAULT default_expr`** -The `DEFAULT` clause assigns a default value for the column whose definition it appears within. The value is any variable-free expression; Cloudberry Database does not allow subqueries and cross-references to other columns in the current table. The data type of the default expression must match the data type of the column. +The `DEFAULT` clause assigns a default value for the column whose definition it appears within. The value is any variable-free expression; Apache Cloudberry does not allow subqueries and cross-references to other columns in the current table. The data type of the default expression must match the data type of the column. -Cloudberry Database uses the default expression in any insert operation that does not specify a value for the column. If there is no default for a column, then the default is null. +Apache Cloudberry uses the default expression in any insert operation that does not specify a value for the column. If there is no default for a column, then the default is null. **`GENERATED ALWAYS AS ( generation_expr ) STORED`** @@ -131,17 +131,17 @@ The name of an existing server to use for the foreign table. For details on defi **`OPTIONS ( option 'value' [, ... ] )`** -The options for the new foreign table or one of its columns. While option names must be unique, a table option and a column option may have the same name. The option names and values are foreign-data wrapper-specific. Cloudberry Database validates the options and values using the foreign-data wrapper's validator_function. +The options for the new foreign table or one of its columns. While option names must be unique, a table option and a column option may have the same name. The option names and values are foreign-data wrapper-specific. Apache Cloudberry validates the options and values using the foreign-data wrapper's validator_function. **`mpp_execute { 'coordinator' | 'any' | 'all segments' }`** -A Cloudberry Database-specific option that identifies the host from which the foreign-data wrapper reads or writes data: +A Apache Cloudberry-specific option that identifies the host from which the foreign-data wrapper reads or writes data: - `coordinator` (the default)—Read or write data from the coordinator host. - `any`—Read data from either the coordinator host or any one segment, depending on which path costs less. - `all segments`—Read or write data from all segments. To support this option value, the foreign-data wrapper must have a policy that matches the segments to data. -> **Note** Cloudberry Database supports parallel writes to foreign tables only when you set `mpp_execute 'all segments'`. +> **Note** Apache Cloudberry supports parallel writes to foreign tables only when you set `mpp_execute 'all segments'`. Support for the foreign table `mpp_execute` option, and the specific modes, is foreign-data wrapper-specific. @@ -149,17 +149,17 @@ The `mpp_execute` option can be specified in multiple commands: `CREATE FOREIGN ## Notes -Constraints on foreign tables (such as `CHECK` or `NOT NULL` clauses) are not enforced by Cloudberry Database, and most foreign-data wrappers do not attempt to enforce them either; that is, the constraint is simply assumed to hold true. There would be little point in such enforcement since it would only apply to rows inserted or updated via the foreign table, and not to rows modified by other means, such as directly on the remote server. Instead, a constraint attached to a foreign table should represent a constraint that is being enforced by the remote server. +Constraints on foreign tables (such as `CHECK` or `NOT NULL` clauses) are not enforced by Apache Cloudberry, and most foreign-data wrappers do not attempt to enforce them either; that is, the constraint is simply assumed to hold true. There would be little point in such enforcement since it would only apply to rows inserted or updated via the foreign table, and not to rows modified by other means, such as directly on the remote server. Instead, a constraint attached to a foreign table should represent a constraint that is being enforced by the remote server. Some special-purpose foreign-data wrappers might be the only access mechanism for the data they access, and in that case it might be appropriate for the foreign-data wrapper itself to perform constraint enforcement. But you should not assume that a wrapper does that unless its documentation says so. -Although Cloudberry Database does not attempt to enforce constraints on foreign tables, it does assume that they are correct for purposes of query optimization. If there are rows visible in the foreign table that do not satisfy a declared constraint, queries on the table might produce errors or incorrect answers. It is the user's responsibility to ensure that the constraint definition matches reality. +Although Apache Cloudberry does not attempt to enforce constraints on foreign tables, it does assume that they are correct for purposes of query optimization. If there are rows visible in the foreign table that do not satisfy a declared constraint, queries on the table might produce errors or incorrect answers. It is the user's responsibility to ensure that the constraint definition matches reality.
When a foreign table is used as a partition of a partitioned table, there is an implicit constraint that its contents must satisfy the partitioning rule. Again, it is the user's responsibility to ensure that that is true, which is best done by installing a matching constraint on the remote server.
Within a partitioned table containing foreign-table partitions, an `UPDATE` that changes the partition key value can cause a row to be moved from a local partition to a foreign-table partition, provided the foreign-data wrapper supports tuple routing. However it is not currently possible to move a row from a foreign-table partition to another partition. An `UPDATE` that would require doing that will fail due to the partitioning constraint, assuming that that is properly enforced by the remote server. -Similar considerations apply to generated columns. Stored generated columns are computed on insert or update on the local Cloudberry Database server and handed to the foreign-data wrapper for writing out to the foreign data store, but it is not enforced that a query of the foreign table returns values for stored generated columns that are consistent with the generation expression. Again, this might result in incorrect query results. +Similar considerations apply to generated columns. Stored generated columns are computed on insert or update on the local Apache Cloudberry server and handed to the foreign-data wrapper for writing out to the foreign data store, but it is not enforced that a query of the foreign table returns values for stored generated columns that are consistent with the generation expression. Again, this might result in incorrect query results. ## Examples @@ -187,7 +187,7 @@ CREATE FOREIGN TABLE measurement_y2016m07 ## Compatibility -`CREATE FOREIGN TABLE` largely conforms to the SQL standard; however, much as with [CREATE TABLE](/docs/sql-stmts/create-table.md), Cloudberry Database permits `NULL` constraints and zero-column foreign tables. The ability to specify column default values is a Cloudberry Database extension, as is the `mpp_execute` option. Table inheritance, in the form defined by Cloudberry Database, is nonstandard. +`CREATE FOREIGN TABLE` largely conforms to the SQL standard; however, much as with [CREATE TABLE](/docs/sql-stmts/create-table.md), Apache Cloudberry permits `NULL` constraints and zero-column foreign tables. The ability to specify column default values is a Apache Cloudberry extension, as is the `mpp_execute` option. Table inheritance, in the form defined by Apache Cloudberry, is nonstandard. ## See also diff --git a/docs/sql-stmts/create-function.md b/docs/sql-stmts/create-function.md index 63fd94d244..64e82a612b 100644 --- a/docs/sql-stmts/create-function.md +++ b/docs/sql-stmts/create-function.md @@ -52,7 +52,7 @@ For more information about creating functions, refer to the [User Defined Functi **Limited Use of VOLATILE and STABLE Functions** -To prevent data from becoming out-of-sync across the segments in Cloudberry Database, any function classified as `STABLE` or `VOLATILE` cannot be run at the segment level if it contains SQL or modifies the database in any way. For example, functions such as `random()` or `timeofday()` are not allowed to run on distributed data in Cloudberry Database because they could potentially cause inconsistent data between the segment instances. +To prevent data from becoming out-of-sync across the segments in Apache Cloudberry, any function classified as `STABLE` or `VOLATILE` cannot be run at the segment level if it contains SQL or modifies the database in any way. For example, functions such as `random()` or `timeofday()` are not allowed to run on distributed data in Apache Cloudberry because they could potentially cause inconsistent data between the segment instances. To ensure data consistency, `VOLATILE` and `STABLE` functions can safely be used in statements that are evaluated on and run from the coordinator. For example, the following statements are always run on the coordinator (statements without a `FROM` clause): @@ -67,7 +67,7 @@ In cases where a statement has a `FROM` clause containing a distributed table an SELECT * FROM foo(); ``` -One exception to this rule are functions that return a table reference (`rangeFuncs`) or functions that use the `refCursor` data type. Note that you cannot return a `refcursor` from any kind of function in Cloudberry Database. +One exception to this rule are functions that return a table reference (`rangeFuncs`) or functions that use the `refCursor` data type. Note that you cannot return a `refcursor` from any kind of function in Apache Cloudberry. **Function Volatility and EXECUTE ON Attributes** @@ -139,7 +139,7 @@ Lists which transforms a call to the function should apply. Transforms convert b **`STABLE`**
**`VOLATILE`** -These attributes inform the query optimizer about the behavior of the function. At most one choice may be specified. If none of these appear, `VOLATILE` is the default assumption. Since Cloudberry Database currently has limited use of `VOLATILE` functions, if a function is truly `IMMUTABLE`, you must declare it as so to be able to use it without restrictions. +These attributes inform the query optimizer about the behavior of the function. At most one choice may be specified. If none of these appear, `VOLATILE` is the default assumption. Since Apache Cloudberry currently has limited use of `VOLATILE` functions, if a function is truly `IMMUTABLE`, you must declare it as so to be able to use it without restrictions. `IMMUTABLE` indicates that the function cannot modify the database and always returns the same result when given the same argument values. It does not do database lookups or otherwise use information not directly present in its argument list. If this option is given, any call of the function with all-constant arguments can be immediately replaced with the function value. @@ -175,13 +175,13 @@ The key word `EXTERNAL` is allowed for SQL conformance, but it is optional since The `EXECUTE ON` attributes specify where (coordinator or segment instance) a function runs when it is invoked during the query execution process. -`EXECUTE ON ANY` (the default) indicates that the function can be run on the coordinator, or any segment instance, and it returns the same result regardless of where it is run. Cloudberry Database determines where the function runs. +`EXECUTE ON ANY` (the default) indicates that the function can be run on the coordinator, or any segment instance, and it returns the same result regardless of where it is run. Apache Cloudberry determines where the function runs. `EXECUTE ON COORDINATOR` indicates that the function must run only on the coordinator instance. `EXECUTE ON ALL SEGMENTS` indicates that the function must run on all primary segment instances, but not the coordinator, for each invocation. The overall result of the function is the `UNION ALL` of the results from all segment instances. -`EXECUTE ON INITPLAN` indicates that the function contains an SQL command that dispatches queries to the segment instances and requires special processing on the coordinator instance by Cloudberry Database when possible. +`EXECUTE ON INITPLAN` indicates that the function contains an SQL command that dispatches queries to the segment instances and requires special processing on the coordinator instance by Apache Cloudberry when possible. > **Note** `EXECUTE ON INITPLAN` is only supported in functions that are used in the `FROM` clause of a `CREATE TABLE AS` or `INSERT` command such as the `get_data()` function in these commands. @@ -191,7 +191,7 @@ CREATE TABLE t AS SELECT * FROM get_data(); INSERT INTO t1 SELECT * FROM get_data(); ``` -Cloudberry Database does not support the `EXECUTE ON INITPLAN` attribute in a function that is used in the `WITH` clause of a query, a CTE (common table expression). For example, specifying `EXECUTE ON INITPLAN` in function `get_data()` in this CTE is not supported. +Apache Cloudberry does not support the `EXECUTE ON INITPLAN` attribute in a function that is used in the `WITH` clause of a query, a CTE (common table expression). For example, specifying `EXECUTE ON INITPLAN` in function `get_data()` in this CTE is not supported. ```sql WITH tbl_a AS (SELECT * FROM get_data() ) @@ -250,7 +250,7 @@ The name of a callback function to run when a query that calls this function is ## Overloading -Cloudberry Database allows function overloading; that is, the same name can be used for several different functions so long as they have distinct input argument types. Whether or not you use it, this capability entails security precautions when calling functions in databases where some users mistrust other users; refer to [Functions](https://www.postgresql.org/docs/12/typeconv-func.html) in the PostgreSQL documentation for more information. +Apache Cloudberry allows function overloading; that is, the same name can be used for several different functions so long as they have distinct input argument types. Whether or not you use it, this capability entails security precautions when calling functions in databases where some users mistrust other users; refer to [Functions](https://www.postgresql.org/docs/12/typeconv-func.html) in the PostgreSQL documentation for more information. Two functions are considered the same if they have the same names and input argument types, ignoring any `OUT` parameters. Thus for example these declarations conflict: @@ -270,7 +270,7 @@ A call `foo(10)` will fail due to the ambiguity about which function should be c ## Notes -Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Cloudberry Database cluster (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. It is recommended that you locate shared libraries either relative to `$libdir` (which is located at `$GPHOME/lib`) or through the dynamic library path (set by the `dynamic_library_path` server configuration parameter) on all coordinator segment instances in the Cloudberry Database cluster. +Any compiled code (shared library files) for custom functions must be placed in the same location on every host in your Apache Cloudberry cluster (coordinator and all segments). This location must also be in the `LD_LIBRARY_PATH` so that the server can locate the files. It is recommended that you locate shared libraries either relative to `$libdir` (which is located at `$GPHOME/lib`) or through the dynamic library path (set by the `dynamic_library_path` server configuration parameter) on all coordinator segment instances in the Apache Cloudberry cluster. The full SQL type syntax is allowed for input arguments and return value. However, parenthesized type modifiers (e.g., the precision field for type `numeric`) are discarded by `CREATE FUNCTION`. Thus for example `CREATE FUNCTION foo (varchar(10)) ...` is exactly the same as `CREATE FUNCTION foo (varchar) ...`. @@ -280,7 +280,7 @@ If a function is declared `STRICT` with a `VARIADIC` argument, the strictness ch **Using Functions with Queries on Distributed Data** -In some cases, Cloudberry Database does not support using functions in a query where the data in a table specified in the `FROM` clause is distributed over Cloudberry Database segments. As an example, this SQL query contains the function `func()`: +In some cases, Apache Cloudberry does not support using functions in a query where the data in a table specified in the `FROM` clause is distributed over Apache Cloudberry segments. As an example, this SQL query contains the function `func()`: ```sql SELECT func(a) FROM table1; @@ -288,13 +288,13 @@ SELECT func(a) FROM table1; The function is not supported for use in the query if all of the following conditions are met: -- The data of table `table1` is distributed over Cloudberry Database segments. +- The data of table `table1` is distributed over Apache Cloudberry segments. - The function `func()` reads or modifies data from distributed tables. - The function `func()` returns more than one row or takes an argument (`a`) that comes from `table1`. If any of the conditions are not met, the function is supported. Specifically, the function is supported if any of the following conditions apply: -- The function `func()` does not access data from distributed tables, or accesses data that is only on the Cloudberry Database coordinator. +- The function `func()` does not access data from distributed tables, or accesses data that is only on the Apache Cloudberry coordinator. - The table `table1` is a coordinator only table. - The function `func()` returns only one row and only takes input arguments that are constant values. The function is supported if it can be changed to require no input arguments. @@ -309,10 +309,10 @@ These are limitations for functions defined with the `EXECUTE ON COORDINATOR` or - The function cannot be in the `SELECT` list of a query with a `FROM` clause. - A query that includes the function falls back from GPORCA to the Postgres Planner. -The attribute `EXECUTE ON INITPLAN` indicates that the function contains an SQL command that dispatches queries to the segment instances and requires special processing on the coordinator instance by Cloudberry Database. When possible, Cloudberry Database handles the function on the coordinator instance in the following manner. +The attribute `EXECUTE ON INITPLAN` indicates that the function contains an SQL command that dispatches queries to the segment instances and requires special processing on the coordinator instance by Apache Cloudberry. When possible, Apache Cloudberry handles the function on the coordinator instance in the following manner. -1. First, Cloudberry Database runs the function as part of an InitPlan node on the coordinator instance and holds the function output temporarily. -2. Then, in the MainPlan of the query plan, the function is called in an EntryDB (a special query executor (QE) that runs on the coordinator instance) and Cloudberry Database returns the data that was captured when the function was run as part of the InitPlan node. The function is not run in the MainPlan. +1. First, Apache Cloudberry runs the function as part of an InitPlan node on the coordinator instance and holds the function output temporarily. +2. Then, in the MainPlan of the query plan, the function is called in an EntryDB (a special query executor (QE) that runs on the coordinator instance) and Apache Cloudberry returns the data that was captured when the function was run as part of the InitPlan node. The function is not run in the MainPlan. This simple example uses the function `get_data()` in a CTAS command to create a table using data from the table `country`. The function contains a `SELECT` command that retrieves data from the table `country` and uses the `EXECUTE ON INITPLAN` attribute. @@ -491,7 +491,7 @@ $$ LANGUAGE plpgsql SET search_path = admin, pg_temp; ``` -The `SET` option was not available in earlier versions of Cloudberry Database, and so older functions may contain rather complicated logic to save, set, and restore `search_path`. The `SET` option is far easier to use for this purpose. +The `SET` option was not available in earlier versions of Apache Cloudberry, and so older functions may contain rather complicated logic to save, set, and restore `search_path`. The `SET` option is far easier to use for this purpose. Another point to keep in mind is that by default, execute privilege is granted to `PUBLIC` for newly created functions (see [GRANT](/docs/sql-stmts/grant.md) for more information). Frequently you will wish to restrict use of a security definer function to only some users. To do that, you must revoke the default `PUBLIC` privileges and then grant `EXECUTE` privilege selectively. To avoid having a window where the new function is accessible to all, create it and set the privileges within a single transaction. For example: @@ -505,7 +505,7 @@ COMMIT; ## Compatibility -`CREATE FUNCTION` is defined in SQL:1999 and later. The Cloudberry Database version is similar but not fully compatible. The attributes are not portable, neither are the different available languages. +`CREATE FUNCTION` is defined in SQL:1999 and later. The Apache Cloudberry version is similar but not fully compatible. The attributes are not portable, neither are the different available languages. For compatibility with some other database systems, argmode can be written either before or after argname. But only the first way is standard-compliant. diff --git a/docs/sql-stmts/create-index.md b/docs/sql-stmts/create-index.md index 974006fa63..bc0042f96d 100644 --- a/docs/sql-stmts/create-index.md +++ b/docs/sql-stmts/create-index.md @@ -25,7 +25,7 @@ The key field(s) for the index are specified as column names, or alternatively a An index field can be an expression computed from the values of one or more columns of the table row. This feature can be used to obtain fast access to data based on some transformation of the basic data. For example, an index computed on `upper(col)` would allow the clause `WHERE upper(col) = 'JIM'` to use an index. -Cloudberry Database provides the index methods B-tree, hash, bitmap, GiST, SP-GiST, GIN, and BRIN. Users can also define their own index methods, but that is fairly complicated. +Apache Cloudberry provides the index methods B-tree, hash, bitmap, GiST, SP-GiST, GIN, and BRIN. Users can also define their own index methods, but that is fairly complicated. When the `WHERE` clause is present, a partial index is created. A partial index is an index that contains entries for only a portion of a table, usually a portion that is more useful for indexing than the rest of the table. For example, if you have a table that contains both billed and unbilled orders where the unbilled orders take up a small fraction of the total table and yet is most often selected, you can improve performance by creating an index on just that portion. Another possible application is to use `WHERE` with `UNIQUE` to enforce uniqueness over a subset of a table. See [Partial Indexes](https://www.postgresql.org/docs/14/indexes-partial.html) in the PostgreSQL documentation for more information. @@ -59,7 +59,7 @@ Currently, the B-tree and the GiST index access methods support `INCLUDE`. In B- **`name`** -The name of the index to be created. The index is always created in the same schema as its parent table. If the name is omitted, Cloudberry Database chooses a suitable name based on the parent table's name and the indexed column name(s). +The name of the index to be created. The index is always created in the same schema as its parent table. If the name is omitted, Apache Cloudberry chooses a suitable name based on the parent table's name and the indexed column name(s). **`ONLY`** @@ -137,7 +137,7 @@ GiST indexes additionally accept this parameter: **`buffering`** -Determines whether Cloudberry Database builds the index using the buffering build technique described in [GiST buffering build](https://www.postgresql.org/docs/14/gist-implementation.html) in the PostgreSQL documentation. With `OFF` it is deactivated, with `ON` it is activated, and with `AUTO` it is initially deactivated, but turned on on-the-fly once the index size reaches effective_cache_size. The default is `AUTO`. +Determines whether Apache Cloudberry builds the index using the buffering build technique described in [GiST buffering build](https://www.postgresql.org/docs/14/gist-implementation.html) in the PostgreSQL documentation. With `OFF` it is deactivated, with `ON` it is activated, and with `AUTO` it is initially deactivated, but turned on on-the-fly once the index size reaches effective_cache_size. The default is `AUTO`. GIN indexes accept different parameters: @@ -247,7 +247,7 @@ SELECT * FROM points WHERE box(location,location) && '(0,0),(1,1)'::box; ## Compatibility -`CREATE INDEX` is a Cloudberry Database extension to the SQL standard. There are no provisions for indexes in the SQL standard. +`CREATE INDEX` is a Apache Cloudberry extension to the SQL standard. There are no provisions for indexes in the SQL standard. ## See also diff --git a/docs/sql-stmts/create-language.md b/docs/sql-stmts/create-language.md index 41d03b11d4..cff6977029 100644 --- a/docs/sql-stmts/create-language.md +++ b/docs/sql-stmts/create-language.md @@ -20,17 +20,17 @@ CREATE [ OR REPLACE ] [ TRUSTED ] [ PROCEDURAL ] LANGUAGE ## Description -`CREATE LANGUAGE` registers a new procedural language with a Cloudberry Database. Subsequently, functions and procedures can be defined in this new language. +`CREATE LANGUAGE` registers a new procedural language with a Apache Cloudberry. Subsequently, functions and procedures can be defined in this new language. -> **Note** Procedural languages for Cloudberry Database have been made into "extensions," and should therefore be installed with [CREATE EXTENSION](/docs/sql-stmts/create-extension.md), not `CREATE LANGUAGE`. Using `CREATE LANGUAGE` directly should be restricted to extension installation scripts. If you have a "bare" language in your database, perhaps as a result of an upgrade, you can convert it to an extension using `CREATE EXTENSION FROM unpackaged`. +> **Note** Procedural languages for Apache Cloudberry have been made into "extensions," and should therefore be installed with [CREATE EXTENSION](/docs/sql-stmts/create-extension.md), not `CREATE LANGUAGE`. Using `CREATE LANGUAGE` directly should be restricted to extension installation scripts. If you have a "bare" language in your database, perhaps as a result of an upgrade, you can convert it to an extension using `CREATE EXTENSION FROM unpackaged`. `CREATE LANGUAGE` effectively associates the language name with handler function(s) that are responsible for executing functions written in the language. -There are two forms of the `CREATE LANGUAGE` command. In the first form, the user supplies just the name of the desired language, and the Cloudberry Database server consults the `pg_pltemplate` system catalog to determine the correct parameters. In the second form, the user supplies the language parameters along with the language name. The second form can be used to create a language that is not defined in `pg_pltemplate`, but this approach is considered obsolete. +There are two forms of the `CREATE LANGUAGE` command. In the first form, the user supplies just the name of the desired language, and the Apache Cloudberry server consults the `pg_pltemplate` system catalog to determine the correct parameters. In the second form, the user supplies the language parameters along with the language name. The second form can be used to create a language that is not defined in `pg_pltemplate`, but this approach is considered obsolete. When the server finds an entry in the `pg_pltemplate` catalog for the given language name, it will use the catalog data even if the command includes language parameters. This behavior simplifies loading of old dump files, which are likely to contain out-of-date information about language support functions. -Ordinarily, the user must have the Cloudberry Database superuser privilege to register a new language. However, the owner of a database can register a new language within that database if the language is listed in the `pg_pltemplate` catalog and is marked as allowed to be created by database owners (`tmpldbacreate` is `true`). The default is that trusted languages can be created by database owners, but this can be adjusted by superusers by modifying the contents of `pg_pltemplate`. The creator of a language becomes its owner and can later drop it, rename it, or assign it to a new owner. +Ordinarily, the user must have the Apache Cloudberry superuser privilege to register a new language. However, the owner of a database can register a new language within that database if the language is listed in the `pg_pltemplate` catalog and is marked as allowed to be created by database owners (`tmpldbacreate` is `true`). The default is that trusted languages can be created by database owners, but this can be adjusted by superusers by modifying the contents of `pg_pltemplate`. The creator of a language becomes its owner and can later drop it, rename it, or assign it to a new owner. `CREATE OR REPLACE LANGUAGE` will either create a new language, or replace an existing definition. If the language already exists, its parameters are updated according to the values specified or taken from `pg_pltemplate`, but the language's ownership and permissions settings do not change, and any existing functions written in the language are assumed to still be valid. In addition to the normal privilege requirements for creating a language, the user must be superuser or owner of the existing language. The `REPLACE` case is mainly meant to be used to ensure that the language exists. If the language has a `pg_pltemplate` entry then `REPLACE` will not actually change anything about an existing definition, except in the unusual case where the `pg_pltemplate` entry has been modified since the language was created. @@ -39,7 +39,7 @@ Ordinarily, the user must have the Cloudberry Database superuser privilege to re **`TRUSTED`** -`TRUSTED` specifies that the language does not grant access to data that the user would not otherwise have. If this key word is omitted when registering the language, only users with the Cloudberry Database superuser privilege can use this language to create new functions. +`TRUSTED` specifies that the language does not grant access to data that the user would not otherwise have. If this key word is omitted when registering the language, only users with the Apache Cloudberry superuser privilege can use this language to create new functions. **`PROCEDURAL`** @@ -52,7 +52,7 @@ For backward compatibility, the name can be enclosed by single quotes. **`HANDLER call_handler`** -The name of a previously registered function that will be called to run the procedural language's functions. The call handler for a procedural language must be written in a compiled language such as C with version 1 call convention and registered with Cloudberry Database as a function taking no arguments and returning the `language_handler` type, a placeholder type that is simply used to identify the function as a call handler. +The name of a previously registered function that will be called to run the procedural language's functions. The call handler for a procedural language must be written in a compiled language such as C with version 1 call convention and registered with Apache Cloudberry as a function taking no arguments and returning the `language_handler` type, a placeholder type that is simply used to identify the function as a call handler. **`INLINE inline_handler`** @@ -60,7 +60,7 @@ The name of a previously registered function that is called to run an anonymous **`VALIDATOR valfunction`** -The name of a previously registered function that will be called when a new function in the language is created, to validate the new function. If no validator function is specified, then Cloudberry Database will not check a new function when it is created. The validator function must take one argument of type `oid`, which will be the OID of the to-be-created function, and will typically return `void`. +The name of a previously registered function that will be called when a new function in the language is created, to validate the new function. If no validator function is specified, then Apache Cloudberry will not check a new function when it is created. The validator function must take one argument of type `oid`, which will be the OID of the to-be-created function, and will typically return `void`. A validator function would typically inspect the function body for syntactical correctness, but it can also look at other properties of the function, for example if the language cannot handle certain argument types. To signal an error, the validator function should use the `ereport()` function. The return value of the function is ignored. @@ -99,7 +99,7 @@ CREATE LANGUAGE plsample ## Compatibility -`CREATE LANGUAGE` is a Cloudberry Database extension. +`CREATE LANGUAGE` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-materialized-view.md b/docs/sql-stmts/create-materialized-view.md index 8d717e779b..6c664cbc2c 100644 --- a/docs/sql-stmts/create-materialized-view.md +++ b/docs/sql-stmts/create-materialized-view.md @@ -63,7 +63,7 @@ This clause specifies whether or not the materialized view should be populated w **`DISTRIBUTED RANDOMLY`**
**`DISTRIBUTED REPLICATED`** -Used to declare the Cloudberry Database distribution policy for the materialized view data. For information about a table distribution policy, see [CREATE TABLE](/docs/sql-stmts/create-table.md). +Used to declare the Apache Cloudberry distribution policy for the materialized view data. For information about a table distribution policy, see [CREATE TABLE](/docs/sql-stmts/create-table.md). ## Notes @@ -91,7 +91,7 @@ names, rank WHERE rank < '11' AND names.id=rank.id; ## Compatibility -`CREATE MATERIALIZED VIEW` is a Cloudberry Database extension of the SQL standard. +`CREATE MATERIALIZED VIEW` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/create-operator-class.md b/docs/sql-stmts/create-operator-class.md index 27d40c4ab8..32c7183d7f 100644 --- a/docs/sql-stmts/create-operator-class.md +++ b/docs/sql-stmts/create-operator-class.md @@ -27,7 +27,7 @@ The user who defines an operator class becomes its owner. Presently, the creatin `CREATE OPERATOR CLASS` does not presently check whether the operator class definition includes all the operators and functions required by the index method, nor whether the operators and functions form a self-consistent set. It is the user's responsibility to define a valid operator class. -Related operator classes can be grouped into operator families. To add a new operator class to an existing family, specify the `FAMILY` option in `CREATE OPERATOR CLASS`. Without this option, the new class is placed into a family named the same as the new class (Cloudberry Database creates that family if it doesn't already exist). +Related operator classes can be grouped into operator families. To add a new operator class to an existing family, specify the `FAMILY` option in `CREATE OPERATOR CLASS`. Without this option, the new class is placed into a family named the same as the new class (Apache Cloudberry creates that family if it doesn't already exist). Refer to [Interfacing Extensions to Indexes](https://www.postgresql.org/docs/12/xindex.html) in the PostgreSQL documentation for more information. @@ -99,7 +99,7 @@ The operators should not be defined by SQL functions. A SQL function is likely t Any functions used to implement the operator class must be defined as `IMMUTABLE`. -Before Cloudberry Database 6.0, the `OPERATOR` clause could include a `RECHECK` option. This option is no longer supported. Cloudberry Database now determines whether an index operator is "lossy" on-the-fly at run time. This allows more efficient handling of cases where an operator might or might not be lossy. +Before Apache Cloudberry 6.0, the `OPERATOR` clause could include a `RECHECK` option. This option is no longer supported. Apache Cloudberry now determines whether an index operator is "lossy" on-the-fly at run time. This allows more efficient handling of cases where an operator might or might not be lossy. ## Examples @@ -124,7 +124,7 @@ CREATE OPERATOR CLASS gist__int_ops ## Compatibility -`CREATE OPERATOR CLASS` is a Cloudberry Database extension. There is no `CREATE OPERATOR CLASS` statement in the SQL standard. +`CREATE OPERATOR CLASS` is a Apache Cloudberry extension. There is no `CREATE OPERATOR CLASS` statement in the SQL standard. ## See also diff --git a/docs/sql-stmts/create-operator-family.md b/docs/sql-stmts/create-operator-family.md index 3aba27c17c..ce16f19a03 100644 --- a/docs/sql-stmts/create-operator-family.md +++ b/docs/sql-stmts/create-operator-family.md @@ -36,7 +36,7 @@ The name of the index method this operator family is for. ## Compatibility -`CREATE OPERATOR FAMILY` is a Cloudberry Database extension. There is no `CREATE OPERATOR FAMILY` statement in the SQL standard. +`CREATE OPERATOR FAMILY` is a Apache Cloudberry extension. There is no `CREATE OPERATOR FAMILY` statement in the SQL standard. ## See also diff --git a/docs/sql-stmts/create-operator.md b/docs/sql-stmts/create-operator.md index 215287f4ad..195dc2492d 100644 --- a/docs/sql-stmts/create-operator.md +++ b/docs/sql-stmts/create-operator.md @@ -28,7 +28,7 @@ There are a few restrictions on your choice of name: - `--` and `/*` cannot appear anywhere in an operator name, since they will be taken as the start of a comment. - A multicharacter operator name cannot end in `+` or `-`, unless the name also contains at least one of these characters: ~ ! @ # % ^ & | ` ? - For example, `@-` is an allowed operator name, but `*-` is not. This restriction allows Cloudberry Database to parse SQL-compliant commands without requiring spaces between tokens. + For example, `@-` is an allowed operator name, but `*-` is not. This restriction allows Apache Cloudberry to parse SQL-compliant commands without requiring spaces between tokens. - The use of `=>` as an operator name is deprecated. It may be disallowed altogether in a future release. @@ -36,7 +36,7 @@ The operator `!=` is mapped to `<>` on input, so these two names are always equi At least one of `LEFTARG` and `RIGHTARG` must be defined. For binary operators, both must be defined. For right unary operators, only `LEFTARG` should be defined, while for left unary operators only `RIGHTARG` should be defined. -**Note:** Right unary, also called postfix, operators are deprecated and may be removed in a future Cloudberry Database release. +**Note:** Right unary, also called postfix, operators are deprecated and may be removed in a future Apache Cloudberry release. The function_name function must have been previously defined using `CREATE FUNCTION`, must be `IMMUTABLE`, and must be defined to accept the correct number of arguments (either one or two) of the indicated types. @@ -144,7 +144,7 @@ SELECT (a + b) AS c FROM test_complex; ## Compatibility -`CREATE OPERATOR` is a Cloudberry Database extension to the SQL standard. The SQL standard does not provide for user-defined operators. +`CREATE OPERATOR` is a Apache Cloudberry extension to the SQL standard. The SQL standard does not provide for user-defined operators. ## See also diff --git a/docs/sql-stmts/create-policy.md b/docs/sql-stmts/create-policy.md index 3eaeddfe35..5ef55e9faf 100644 --- a/docs/sql-stmts/create-policy.md +++ b/docs/sql-stmts/create-policy.md @@ -19,7 +19,7 @@ CREATE POLICY ON ## Description -The `CREATE POLICY` command defines a new row-level security policy for a table. Note that row-level security must be enabled on the table (using `ALTER TABLE ... ENABLE ROW LEVEL SECURITY`) in order for Cloudberry Database to apply created policies. +The `CREATE POLICY` command defines a new row-level security policy for a table. Note that row-level security must be enabled on the table (using `ALTER TABLE ... ENABLE ROW LEVEL SECURITY`) in order for Apache Cloudberry to apply created policies. A policy grants the permission to select, insert, update, or delete rows that match the relevant policy expression. Existing table rows are checked against the expression specified in `USING`, while new rows that would be created via `INSERT` or `UPDATE` are checked against the expression specified in `WITH CHECK`. When a `USING` expression returns true for a given row, then that row is visible to the user, while if false or null is returned then the row is not visible. When a `WITH CHECK` expression returns true for a row, then that row is inserted or updated, while if false or null is returned then an error occurs. @@ -173,7 +173,7 @@ Refer to About Configuring Row-Level Security Policies for more information and ## Compatibility -`CREATE POLICY` is a Cloudberry Database extension to the SQL standard. +`CREATE POLICY` is a Apache Cloudberry extension to the SQL standard. ## See also diff --git a/docs/sql-stmts/create-procedure.md b/docs/sql-stmts/create-procedure.md index 660ac92533..2f432a914a 100644 --- a/docs/sql-stmts/create-procedure.md +++ b/docs/sql-stmts/create-procedure.md @@ -127,7 +127,7 @@ CALL insert_data(1, 2); ## Compatibility -A `CREATE PROCEDURE` command is defined in the SQL standard. The Cloudberry Database version is similar but not fully compatible. For details see also [CREATE FUNCTION](/docs/sql-stmts/create-function.md). +A `CREATE PROCEDURE` command is defined in the SQL standard. The Apache Cloudberry version is similar but not fully compatible. For details see also [CREATE FUNCTION](/docs/sql-stmts/create-function.md). ## See also diff --git a/docs/sql-stmts/create-protocol.md b/docs/sql-stmts/create-protocol.md index 14a2fa9419..fba25083f6 100644 --- a/docs/sql-stmts/create-protocol.md +++ b/docs/sql-stmts/create-protocol.md @@ -4,7 +4,7 @@ title: CREATE PROTOCOL # CREATE PROTOCOL -Registers a custom data access protocol that can be specified when defining a Cloudberry Database external table. +Registers a custom data access protocol that can be specified when defining a Apache Cloudberry external table. ## Synopsis @@ -36,11 +36,11 @@ The name of the data access protocol. The protocol name is case sensitive. The n **`readfunc= 'read_call_handler'`** -The name of a previously registered function that Cloudberry Database calls to read data from an external data source. The command must specify either a read call handler or a write call handler. +The name of a previously registered function that Apache Cloudberry calls to read data from an external data source. The command must specify either a read call handler or a write call handler. **`writefunc= 'write_call_handler'`** -The name of a previously registered function that Cloudberry Database calls to write data to an external data source. The command must specify either a read call handler or a write call handler. +The name of a previously registered function that Apache Cloudberry calls to write data to an external data source. The command must specify either a read call handler or a write call handler. **`validatorfunc='validate_handler'`** @@ -48,13 +48,13 @@ An optional validator function that validates the URL specified in the `CREATE E ## Notes -Cloudberry Database handles external tables of type `file`, `gpfdist`, and `gpfdists` internally. +Apache Cloudberry handles external tables of type `file`, `gpfdist`, and `gpfdists` internally. -Any shared library that implements a data access protocol must be located in the same location on all Cloudberry Database segment hosts. For example, the shared library can be in a location specified by the operating system environment variable `LD_LIBRARY_PATH` on all hosts. You can also specify the location when you define the handler function. For example, when you define the `s3` protocol in the `CREATE PROTOCOL` command, you specify `$libdir/gps3ext.so` as the location of the shared object, where `$libdir` is located at `$GPHOME/lib`. +Any shared library that implements a data access protocol must be located in the same location on all Apache Cloudberry segment hosts. For example, the shared library can be in a location specified by the operating system environment variable `LD_LIBRARY_PATH` on all hosts. You can also specify the location when you define the handler function. For example, when you define the `s3` protocol in the `CREATE PROTOCOL` command, you specify `$libdir/gps3ext.so` as the location of the shared object, where `$libdir` is located at `$GPHOME/lib`. ## Compatibility -`CREATE PROTOCOL` is a Cloudberry Database extension. +`CREATE PROTOCOL` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-resource-group.md b/docs/sql-stmts/create-resource-group.md index a64afaddf0..a6dce58790 100644 --- a/docs/sql-stmts/create-resource-group.md +++ b/docs/sql-stmts/create-resource-group.md @@ -24,17 +24,17 @@ CPU_MAX_PERCENT= | CPUSET=; ## Description -Creates a new resource group for Cloudberry Database resource management. You can create resource groups to manage resources for roles or to manage the resources of a Cloudberry Database external component such as PL/Container. +Creates a new resource group for Apache Cloudberry resource management. You can create resource groups to manage resources for roles or to manage the resources of a Apache Cloudberry external component such as PL/Container. A resource group that you create to manage a user role identifies concurrent transaction, memory, and CPU limits for the role when resource groups are enabled. You may assign such resource groups to one or more roles. -A resource group that you create to manage the resources of a Cloudberry Database external component such as PL/Container identifies the memory and CPU limits for the component when resource groups are enabled. These resource groups use cgroups for both CPU and memory management. Assignment of resource groups to external components is component-specific. For example, you assign a PL/Container resource group when you configure a PL/Container runtime. You cannot assign a resource group that you create for external components to a role, nor can you assign a resource group that you create for roles to an external component. +A resource group that you create to manage the resources of a Apache Cloudberry external component such as PL/Container identifies the memory and CPU limits for the component when resource groups are enabled. These resource groups use cgroups for both CPU and memory management. Assignment of resource groups to external components is component-specific. For example, you assign a PL/Container resource group when you configure a PL/Container runtime. You cannot assign a resource group that you create for external components to a role, nor can you assign a resource group that you create for roles to an external component. -You must have `SUPERUSER` privileges to create a resource group. The maximum number of resource groups allowed in your Cloudberry Database cluster is 100. +You must have `SUPERUSER` privileges to create a resource group. The maximum number of resource groups allowed in your Apache Cloudberry cluster is 100. -Cloudberry Database pre-defines two default resource groups: `admin_group` and `default_group`. These group names, as well as the group name `none`, are reserved. +Apache Cloudberry pre-defines two default resource groups: `admin_group` and `default_group`. These group names, as well as the group name `none`, are reserved. -To set appropriate limits for resource groups, the Cloudberry Database administrator must be familiar with the queries typically run on the system, as well as the users/roles running those queries and the external components they may be using, such as PL/Containers. +To set appropriate limits for resource groups, the Apache Cloudberry administrator must be familiar with the queries typically run on the system, as well as the users/roles running those queries and the external components they may be using, such as PL/Containers. After creating a resource group for a role, assign the group to one or more roles using the [ALTER ROLE](/docs/sql-stmts/alter-role.md) or [CREATE ROLE](/docs/sql-stmts/create-role.md) commands. @@ -70,7 +70,7 @@ Optional. The scheduling priority of the current group. The value range is `1-50 Specify cores as a comma-separated list of single core numbers or core number intervals. Define the coordinator host cores first, followed by segment host cores, and separate the two with a semicolon. You must enclose the full core configuration in single quotes. For example, '1;1,3-4' configures core 1 for the coordinator host, and cores 1, 3, and 4 for the segment hosts. -> **Note** You can configure `CPUSET` for a resource group only after you have enabled resource group-based resource management for your Cloudberry Database cluster. +> **Note** You can configure `CPUSET` for a resource group only after you have enabled resource group-based resource management for your Apache Cloudberry cluster. **`MEMORY_LIMIT integer`** @@ -130,7 +130,7 @@ CREATE RESOURCE GROUP rgroup3 WITH (CPUSET='1;1-3', MEMORY_LIMIT=11); ## Compatibility -`CREATE RESOURCE GROUP` is a Cloudberry Database extension. There is no provision for resource groups or resource management in the SQL standard. +`CREATE RESOURCE GROUP` is a Apache Cloudberry extension. There is no provision for resource groups or resource management in the SQL standard. ## See also diff --git a/docs/sql-stmts/create-resource-queue.md b/docs/sql-stmts/create-resource-queue.md index a328541eba..23e70e84dc 100644 --- a/docs/sql-stmts/create-resource-queue.md +++ b/docs/sql-stmts/create-resource-queue.md @@ -30,21 +30,21 @@ where queue_attribute is: ## Description -Creates a new resource queue for Cloudberry Database resource management. A resource queue must have either an `ACTIVE_STATEMENTS` or a `MAX_COST` value (or it can have both). Only a superuser can create a resource queue. +Creates a new resource queue for Apache Cloudberry resource management. A resource queue must have either an `ACTIVE_STATEMENTS` or a `MAX_COST` value (or it can have both). Only a superuser can create a resource queue. Resource queues with an `ACTIVE_STATEMENTS` threshold set a maximum limit on the number of queries that can be run by roles assigned to that queue. It controls the number of active queries that are allowed to run at the same time. The value for `ACTIVE_STATEMENTS` should be an integer greater than 0. Resource queues with a `MAX_COST` threshold set a maximum limit on the total cost of queries that can be run by roles assigned to that queue. Cost is measured in the *estimated total cost* for the query as determined by the query planner (as shown in the `EXPLAIN` output for a query). Therefore, an administrator must be familiar with the queries typically run on the system in order to set an appropriate cost threshold for a queue. Cost is measured in units of disk page fetches; 1.0 equals one sequential disk page read. The value for `MAX_COST` is specified as a floating point number (for example 100.0) or can also be specified as an exponent (for example 1e+2). If a resource queue is limited based on a cost threshold, then the administrator can allow `COST_OVERCOMMIT=TRUE` (the default). This means that a query that exceeds the allowed cost threshold will be allowed to run but only when the system is idle. If `COST_OVERCOMMIT=FALSE` is specified, queries that exceed the cost limit will always be rejected and never allowed to run. Specifying a value for `MIN_COST` allows the administrator to define a cost for small queries that will be exempt from resource queueing. -> **Note** GPORCA and the Postgres Planner utilize different query costing models and may compute different costs for the same query. The Cloudberry Database resource queue resource management scheme neither differentiates nor aligns costs between GPORCA and the Postgres Planner; it uses the literal cost value returned from the optimizer to throttle queries. +> **Note** GPORCA and the Postgres Planner utilize different query costing models and may compute different costs for the same query. The Apache Cloudberry resource queue resource management scheme neither differentiates nor aligns costs between GPORCA and the Postgres Planner; it uses the literal cost value returned from the optimizer to throttle queries. -When resource queue-based resource management is active, use the `MEMORY_LIMIT` and `ACTIVE_STATEMENTS` limits for resource queues rather than configuring cost-based limits. Even when using GPORCA, Cloudberry Database may fall back to using the Postgres Planner for certain queries, so using cost-based limits can lead to unexpected results. +When resource queue-based resource management is active, use the `MEMORY_LIMIT` and `ACTIVE_STATEMENTS` limits for resource queues rather than configuring cost-based limits. Even when using GPORCA, Apache Cloudberry may fall back to using the Postgres Planner for certain queries, so using cost-based limits can lead to unexpected results. If a value is not defined for `ACTIVE_STATEMENTS` or `MAX_COST`, it is set to `-1` by default (meaning no limit). After defining a resource queue, you must assign roles to the queue using the [ALTER ROLE](/docs/sql-stmts/alter-role.md) or [CREATE ROLE](/docs/sql-stmts/create-role.md) command. You can optionally assign a `PRIORITY` to a resource queue to control the relative share of available CPU resources used by queries associated with the queue in relation to other resource queues. If a value is not defined for `PRIORITY`, queries associated with the queue have a default priority of `MEDIUM`. -Resource queues with an optional `MEMORY_LIMIT` threshold set a maximum limit on the amount of memory that all queries submitted through a resource queue can consume on a segment host. This determines the total amount of memory that all worker processes of a query can consume on a segment host during query execution. Cloudberry Database recommends that `MEMORY_LIMIT` be used in conjunction with `ACTIVE_STATEMENTS` rather than with `MAX_COST`. The default amount of memory allotted per query on statement-based queues is: `MEMORY_LIMIT / ACTIVE_STATEMENTS`. The default amount of memory allotted per query on cost-based queues is: `MEMORY_LIMIT * (query_cost / MAX_COST)`. +Resource queues with an optional `MEMORY_LIMIT` threshold set a maximum limit on the amount of memory that all queries submitted through a resource queue can consume on a segment host. This determines the total amount of memory that all worker processes of a query can consume on a segment host during query execution. Apache Cloudberry recommends that `MEMORY_LIMIT` be used in conjunction with `ACTIVE_STATEMENTS` rather than with `MAX_COST`. The default amount of memory allotted per query on statement-based queues is: `MEMORY_LIMIT / ACTIVE_STATEMENTS`. The default amount of memory allotted per query on cost-based queues is: `MEMORY_LIMIT * (query_cost / MAX_COST)`. The default memory allotment can be overridden on a per-query basis using the `statement_mem` server configuration parameter, provided that `MEMORY_LIMIT` or `max_statement_mem` is not exceeded. For example, to allocate more memory to a particular query: @@ -74,7 +74,7 @@ Sets the total memory quota for all statements submitted from users in this reso **`MAX_COST float`** -Resource queues with a `MAX_COST` threshold set a maximum limit on the total cost of queries that can be run by roles assigned to that queue. Cost is measured in the *estimated total cost* for the query as determined by the Cloudberry Database query optimizer (as shown in the `EXPLAIN` output for a query). Therefore, an administrator must be familiar with the queries typically run on the system in order to set an appropriate cost threshold for a queue. Cost is measured in units of disk page fetches; 1.0 equals one sequential disk page read. The value for `MAX_COST` is specified as a floating point number (for example 100.0) or can also be specified as an exponent (for example 1e+2). +Resource queues with a `MAX_COST` threshold set a maximum limit on the total cost of queries that can be run by roles assigned to that queue. Cost is measured in the *estimated total cost* for the query as determined by the Apache Cloudberry query optimizer (as shown in the `EXPLAIN` output for a query). Therefore, an administrator must be familiar with the queries typically run on the system in order to set an appropriate cost threshold for a queue. Cost is measured in units of disk page fetches; 1.0 equals one sequential disk page read. The value for `MAX_COST` is specified as a floating point number (for example 100.0) or can also be specified as an exponent (for example 1e+2). **`COST_OVERCOMMIT boolean`** @@ -97,7 +97,7 @@ SELECT * from gp_toolkit.gp_resqueue_status WHERE rsqname='queue_name'; ``` -There is also another system view named `pg_stat_resqueue` which shows statistical metrics for a resource queue over time. To use this view, however, you must enable the `stats_queue_level` server configuration parameter. See "Managing Workload and Resources" in the *Cloudberry Database Administrator Guide* for more information about using resource queues. +There is also another system view named `pg_stat_resqueue` which shows statistical metrics for a resource queue over time. To use this view, however, you must enable the `stats_queue_level` server configuration parameter. See "Managing Workload and Resources" in the *Apache Cloudberry Administrator Guide* for more information about using resource queues. `CREATE RESOURCE QUEUE` cannot be run within a transaction. @@ -147,7 +147,7 @@ CREATE RESOURCE QUEUE myqueue WITH (ACTIVE_STATEMENTS=5, ## Compatibility -`CREATE RESOURCE QUEUE` is a Cloudberry Database extension. There is no provision for resource queues or resource management in the SQL standard. +`CREATE RESOURCE QUEUE` is a Apache Cloudberry extension. There is no provision for resource queues or resource management in the SQL standard. ## See also diff --git a/docs/sql-stmts/create-role.md b/docs/sql-stmts/create-role.md index 6327ad2dd6..d38ee8243f 100644 --- a/docs/sql-stmts/create-role.md +++ b/docs/sql-stmts/create-role.md @@ -44,9 +44,9 @@ where option can be: ## Description -`CREATE ROLE` adds a new role to a Cloudberry Database system. A role is an entity that can own database objects and have database privileges. A role can be considered a user, a group, or both depending on how it is used. You must have `CREATEROLE` privilege or be a database superuser to use this command. +`CREATE ROLE` adds a new role to a Apache Cloudberry system. A role is an entity that can own database objects and have database privileges. A role can be considered a user, a group, or both depending on how it is used. You must have `CREATEROLE` privilege or be a database superuser to use this command. -Note that roles are defined at the system-level and are valid for all databases in your Cloudberry Database system. +Note that roles are defined at the system-level and are valid for all databases in your Apache Cloudberry system. ## Parameters @@ -74,7 +74,7 @@ These clauses determine whether a role will be permitted to create new roles (th If `CREATEEXTTABLE` is specified, the role being defined is allowed to create external tables. The default `type` is `readable` and the default `protocol` is `gpfdist`, if not specified. Valid types are `gpfdist`, `gpfdists`, `http`, and `https`. `NOCREATEEXTTABLE` (the default type) denies the role the ability to create external tables. Note that external tables that use the `file` or `execute` protocols can only be created by superusers. -Use the `GRANT...ON PROTOCOL` command to allow users to create and use external tables with a custom protocol type, including the `s3` and `pxf` protocols included with Cloudberry Database. +Use the `GRANT...ON PROTOCOL` command to allow users to create and use external tables with a custom protocol type, including the `s3` and `pxf` protocols included with Apache Cloudberry. **`INHERIT`**
**`NOINHERIT`** @@ -107,7 +107,7 @@ If role can log in, this specifies how many concurrent connections the role can Sets the role's password. (A password is only of use for roles having the `LOGIN` attribute, but you can nonetheless define one for roles without it.) If you do not plan to use password authentication you can omit this option. If no password is specified, the password will be set to null and password authentication will always fail for that user. A null password can optionally be written explicitly as `PASSWORD NULL`. -> **Note:** Specifying an empty string will also set the password to null, but that was not the case before Cloudberry Database version 7. In earlier versions, an empty string could be used, or not, depending on the authentication method and the exact version, and `libpq` would refuse to use it in any case. To avoid the ambiguity, specifying an empty string should be avoided. +> **Note:** Specifying an empty string will also set the password to null, but that was not the case before Apache Cloudberry version 7. In earlier versions, an empty string could be used, or not, depending on the authentication method and the exact version, and `libpq` would refuse to use it in any case. To avoid the ambiguity, specifying an empty string should be avoided. The password is always stored encrypted in the system catalogs. The `ENCRYPTED` keyword has no effect, but is accepted for backwards compatibility. The method of encryption is determined by the configuration parameter `password_encryption`. If the presented password string is already in MD5-encrypted or SCRAM-encrypted format, then it is stored as-is regardless of `password_encryption` (since the system cannot decrypt the specified encrypted password string, to encrypt it in a different format). This allows reloading of encrypted passwords during dump/restore. @@ -197,11 +197,11 @@ The `VALID UNTIL` clause defines an expiration time for a password only, not for The `INHERIT` attribute governs inheritance of grantable privileges (access privileges for database objects and role memberships). It does not apply to the special role attributes set by `CREATE ROLE` and `ALTER ROLE`. For example, being a member of a role with `CREATEDB` privilege does not immediately grant the ability to create databases, even if `INHERIT` is set; it would be necessary to become that role via [SET ROLE](/docs/sql-stmts/set-role.md) before creating a database. -The `INHERIT` attribute is the default for reasons of backwards compatibility. In prior releases of Cloudberry Database, users always had access to all privileges of groups they were members of. However, `NOINHERIT` provides a closer match to the semantics specified in the SQL standard. +The `INHERIT` attribute is the default for reasons of backwards compatibility. In prior releases of Apache Cloudberry, users always had access to all privileges of groups they were members of. However, `NOINHERIT` provides a closer match to the semantics specified in the SQL standard. Be careful with the `CREATEROLE` privilege. There is no concept of inheritance for the privileges of a `CREATEROLE`-role. That means that even if a role does not have a certain privilege but is allowed to create other roles, it can easily create another role with different privileges than its own (except for creating roles with superuser privileges). For example, if a role has the `CREATEROLE` privilege but not the `CREATEDB` privilege, it can create a new role with the `CREATEDB` privilege. Therefore, regard roles that have the `CREATEROLE` privilege as almost-superuser-roles. -Cloudberry Database includes a program createuser that has the same functionality as `CREATE ROLE` (in fact, it calls this command) but can be run from the command shell. +Apache Cloudberry includes a program createuser that has the same functionality as `CREATE ROLE` (in fact, it calls this command) but can be run from the command shell. The `CONNECTION LIMIT` option is only enforced approximately; if two new sessions start at about the same time when just one connection "slot" remains for the role, it is possible that both will fail. Also, the limit is never enforced for superusers. @@ -268,9 +268,9 @@ CREATE ROLE jonathan LOGIN RESOURCE QUEUE poweruser; CREATE ROLE [WITH ADMIN ] ``` -Allowing multiple initial administrators, and all the other options of `CREATE ROLE`, are Cloudberry Database extensions. +Allowing multiple initial administrators, and all the other options of `CREATE ROLE`, are Apache Cloudberry extensions. -The SQL standard defines the concepts of users and roles, but it regards them as distinct concepts and leaves all commands defining users to be specified by the database implementation. In Cloudberry Database users and roles are unified into a single type of object. Roles therefore have many more optional attributes than they do in the standard. +The SQL standard defines the concepts of users and roles, but it regards them as distinct concepts and leaves all commands defining users to be specified by the database implementation. In Apache Cloudberry users and roles are unified into a single type of object. Roles therefore have many more optional attributes than they do in the standard. The behavior specified by the SQL standard is most closely approximated by giving users the `NOINHERIT` attribute, while roles are given the `INHERIT` attribute. diff --git a/docs/sql-stmts/create-rule.md b/docs/sql-stmts/create-rule.md index 879a496b5f..6dd4485c9d 100644 --- a/docs/sql-stmts/create-rule.md +++ b/docs/sql-stmts/create-rule.md @@ -23,7 +23,7 @@ CREATE [OR REPLACE] RULE AS ON `CREATE RULE` defines a new rule applying to a specified table or view. `CREATE OR REPLACE RULE` will either create a new rule, or replace an existing rule of the same name for the same table. -The Cloudberry Database rule system allows one to define an alternate action to be performed on insertions, updates, or deletions in database tables. A rule causes additional or alternate commands to be run when a given command on a given table is run. An `INSTEAD` rule can replace a given command by another, or cause a command to not be run at all. Rules can be used to implement SQL views as well. It is important to realize that a rule is really a command transformation mechanism, or command macro. The transformation happens before the execution of the command starts. It does not operate independently for each physical row as does a trigger. +The Apache Cloudberry rule system allows one to define an alternate action to be performed on insertions, updates, or deletions in database tables. A rule causes additional or alternate commands to be run when a given command on a given table is run. An `INSTEAD` rule can replace a given command by another, or cause a command to not be run at all. Rules can be used to implement SQL views as well. It is important to realize that a rule is really a command transformation mechanism, or command macro. The transformation happens before the execution of the command starts. It does not operate independently for each physical row as does a trigger. `ON SELECT` rules must be unconditional `INSTEAD` rules and must have actions that consist of a single `SELECT` command. Thus, an `ON SELECT` rule effectively turns the table into a view, whose visible contents are the rows returned by the rule's `SELECT` command rather than whatever had been stored in the table (if anything). It is considered better style to write a `CREATE VIEW` command than to create a real table and define an `ON SELECT` rule for it. @@ -71,7 +71,7 @@ You must be the owner of a table to create or change rules for it. In a rule for `INSERT`, `UPDATE`, or `DELETE` on a view, you can add a `RETURNING` clause that emits the view's columns. This clause will be used to compute the outputs if the rule is triggered by an `INSERT RETURNING`, `UPDATE RETURNING`, or `DELETE RETURNING` command respectively. When the rule is triggered by a command without `RETURNING`, the rule's `RETURNING` clause will be ignored. The current implementation allows only unconditional `INSTEAD` rules to contain `RETURNING`; furthermore there can be at most one `RETURNING` clause among all the rules for the same event. (This ensures that there is only one candidate `RETURNING` clause to be used to compute the results.) `RETURNING` queries on the view will be rejected if there is no `RETURNING` clause in any available rule. -It is very important to take care to avoid circular rules. For example, though each of the following two rule definitions are accepted by Cloudberry Database, the `SELECT` command would cause Cloudberry to report an error because of recursive expansion of a rule: +It is very important to take care to avoid circular rules. For example, though each of the following two rule definitions are accepted by Apache Cloudberry, the `SELECT` command would cause Cloudberry to report an error because of recursive expansion of a rule: ```sql CREATE RULE "_RETURN" AS @@ -100,7 +100,7 @@ one `NOTIFY` event will be sent during the `UPDATE`, whether or not there are an ## Compatibility -`CREATE RULE` is a Cloudberry Database extension, as is the entire query rewrite system. +`CREATE RULE` is a Apache Cloudberry extension, as is the entire query rewrite system. ## See also diff --git a/docs/sql-stmts/create-schema.md b/docs/sql-stmts/create-schema.md index 2723091d82..8fe1358692 100644 --- a/docs/sql-stmts/create-schema.md +++ b/docs/sql-stmts/create-schema.md @@ -47,7 +47,7 @@ The role name of the user who will own the new schema. If omitted, defaults to t An SQL statement defining an object to be created within the schema. Currently, only `CREATE TABLE`, `CREATE VIEW`, `CREATE INDEX`, `CREATE SEQUENCE`, `CREATE TRIGGER`, and `GRANT` are accepted as clauses within `CREATE SCHEMA`. Other kinds of objects may be created in separate commands after the schema is created. - > **Note** Cloudberry Database does not support triggers. + > **Note** Apache Cloudberry does not support triggers. **`IF NOT EXISTS`** @@ -99,13 +99,13 @@ CREATE VIEW hollywood.winners AS ## Compatibility -The SQL standard allows a `DEFAULT CHARACTER SET` clause in `CREATE SCHEMA`, as well as more subcommand types than are presently accepted by Cloudberry Database. +The SQL standard allows a `DEFAULT CHARACTER SET` clause in `CREATE SCHEMA`, as well as more subcommand types than are presently accepted by Apache Cloudberry. -The SQL standard specifies that the subcommands in `CREATE SCHEMA` may appear in any order. The present Cloudberry Database implementation does not handle all cases of forward references in subcommands; it may sometimes be necessary to reorder the subcommands in order to avoid forward references. +The SQL standard specifies that the subcommands in `CREATE SCHEMA` may appear in any order. The present Apache Cloudberry implementation does not handle all cases of forward references in subcommands; it may sometimes be necessary to reorder the subcommands in order to avoid forward references. -According to the SQL standard, the owner of a schema always owns all objects within it. Cloudberry Database allows schemas to contain objects owned by users other than the schema owner. This can happen only if the schema owner grants the `CREATE` privilege on the schema to someone else, or a superuser chooses to create objects in it. +According to the SQL standard, the owner of a schema always owns all objects within it. Apache Cloudberry allows schemas to contain objects owned by users other than the schema owner. This can happen only if the schema owner grants the `CREATE` privilege on the schema to someone else, or a superuser chooses to create objects in it. -The `IF NOT EXISTS` option is a Cloudberry Database extension. +The `IF NOT EXISTS` option is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-sequence.md b/docs/sql-stmts/create-sequence.md index 3ffdf66eae..f31ec1c273 100644 --- a/docs/sql-stmts/create-sequence.md +++ b/docs/sql-stmts/create-sequence.md @@ -38,22 +38,22 @@ You can also use the function `setval()` to operate on a sequence, but only for SELECT setval('myserial', 201); ``` -But the following query will be rejected in Cloudberry Database because it operates on distributed data: +But the following query will be rejected in Apache Cloudberry because it operates on distributed data: ```sql INSERT INTO product VALUES (setval('myserial', 201), 'gizmo'); ``` -In a regular (non-distributed) database, functions that operate on the sequence go to the local sequence table to get values as they are needed. In Cloudberry Database, however, keep in mind that each segment is its own distinct database process. Therefore the segments need a single point of truth to go for sequence values so that all segments get incremented correctly and the sequence moves forward in the right order. A sequence server process runs on the coordinator and is the point-of-truth for a sequence in a Cloudberry Database distributed database. Segments get sequence values at runtime from the coordinator. +In a regular (non-distributed) database, functions that operate on the sequence go to the local sequence table to get values as they are needed. In Apache Cloudberry, however, keep in mind that each segment is its own distinct database process. Therefore the segments need a single point of truth to go for sequence values so that all segments get incremented correctly and the sequence moves forward in the right order. A sequence server process runs on the coordinator and is the point-of-truth for a sequence in a Apache Cloudberry distributed database. Segments get sequence values at runtime from the coordinator. -Because of this distributed sequence design, there are some limitations on the functions that operate on a sequence in Cloudberry Database: +Because of this distributed sequence design, there are some limitations on the functions that operate on a sequence in Apache Cloudberry: - `lastval()` and `currval()` functions are not supported. - `setval()` can only be used to set the value of the sequence generator on the coordinator, it cannot be used in subqueries to update records on distributed table data. -- `nextval()` sometimes grabs a block of values from the coordinator for a segment to use, depending on the query. So values may sometimes be skipped in the sequence if all of the block turns out not to be needed at the segment level. Note that a regular PostgreSQL database does this too, so this is not something unique to Cloudberry Database. +- `nextval()` sometimes grabs a block of values from the coordinator for a segment to use, depending on the query. So values may sometimes be skipped in the sequence if all of the block turns out not to be needed at the segment level. Note that a regular PostgreSQL database does this too, so this is not something unique to Apache Cloudberry. > **Note** -> The default sequence cache size in Cloudberry Database is `20`. +> The default sequence cache size in Apache Cloudberry is `20`. Although you cannot update a sequence directly, you can use a query like: @@ -71,7 +71,7 @@ If specified, the sequence object is created only for this session, and is autom **`IF NOT EXISTS`** -Do not throw an error if a relation with the same name already exists. Cloudberry Database issues a notice in this case. Note that there is no guarantee that the existing relation is anything like the sequence that would have been created - it might not even be a sequence. +Do not throw an error if a relation with the same name already exists. Apache Cloudberry issues a notice in this case. Note that there is no guarantee that the existing relation is anything like the sequence that would have been created - it might not even be a sequence. **`name`** @@ -103,7 +103,7 @@ Allows the sequence to begin anywhere. The default starting value is `minvalue` Specifies how many sequence numbers are to be preallocated and stored in memory for faster access. The default value is 20. The minimum value is 1 (no cache). -> **Note** When operating with a cache of sequence numbers (`cache > 1`), Cloudberry Database may discard some cached sequence values. If you require consecutive values, you must explicitly set `CACHE 1` when you create or alter the sequence. +> **Note** When operating with a cache of sequence numbers (`cache > 1`), Apache Cloudberry may discard some cached sequence values. If you require consecutive values, you must explicitly set `CACHE 1` when you create or alter the sequence. **`CYCLE`**
**`NO CYCLE`** @@ -141,13 +141,13 @@ Insert a row into a table that gets the next value of the sequence named `myseq` INSERT INTO distributors VALUES (nextval('myseq'), 'acme'); ``` -Reset the sequence counter value on the Cloudberry Database coordinator: +Reset the sequence counter value on the Apache Cloudberry coordinator: ```sql SELECT setval('myseq', 201); ``` -Illegal use of `setval()` in Cloudberry Database (setting sequence values on distributed data): +Illegal use of `setval()` in Apache Cloudberry (setting sequence values on distributed data): ```sql INSERT INTO product VALUES (setval('myseq', 201), 'gizmo'); @@ -158,7 +158,7 @@ INSERT INTO product VALUES (setval('myseq', 201), 'gizmo'); `CREATE SEQUENCE` conforms to the SQL standard, with the following exceptions: - You obtain the next value using the `nextval()` function instead of the `NEXT VALUE FOR` expression specified in the SQL standard. -- The `OWNED BY` clause is a Cloudberry Database extension. +- The `OWNED BY` clause is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-server.md b/docs/sql-stmts/create-server.md index 84e907a6b6..23d8d06f97 100644 --- a/docs/sql-stmts/create-server.md +++ b/docs/sql-stmts/create-server.md @@ -28,7 +28,7 @@ Creating a server requires the `USAGE` privilege on the foreign-data wrapper spe **`IF NOT EXISTS`** -Do not throw an error if a server with the same name already exists. Cloudberry Database issues a notice in this case. Note that there is no guarantee that the existing server is anything like the one that would have been created. +Do not throw an error if a server with the same name already exists. Apache Cloudberry issues a notice in this case. Note that there is no guarantee that the existing server is anything like the one that would have been created. **`server_name`** @@ -52,13 +52,13 @@ The options for the new foreign server. The options typically define the connect **`mpp_execute { 'coordinator' | 'any' | 'all segments' }`** -A Cloudberry Database-specific option that identifies the host from which the foreign-data wrapper reads or writes data: +A Apache Cloudberry-specific option that identifies the host from which the foreign-data wrapper reads or writes data: - `coordinator` (the default)—Read or write data from the coordinator host. - `any`—Read data from either the coordinator host or any one segment, depending on which path costs less. - `all segments`—Read or write data from all segments. To support this option value, the foreign-data wrapper should have a policy that matches the segments to data. -> **Note** Cloudberry Database supports parallel writes to foreign tables only when you set `mpp_execute 'all segments'`. +> **Note** Apache Cloudberry supports parallel writes to foreign tables only when you set `mpp_execute 'all segments'`. Support for the foreign server `mpp_execute` option, and the specific modes, is foreign-data wrapper-specific. @@ -66,7 +66,7 @@ The `mpp_execute` option can be specified in multiple commands: `CREATE FOREIGN **`num_segments 'num'`** -When `mpp_execute` is set to `'all segments'`, the Cloudberry Database-specific `num_segments` option identifies the number of query executors that Cloudberry Database spawns on the source Cloudberry Database cluster. If you do not provide a value, num defaults to the number of segments in the source cluster. +When `mpp_execute` is set to `'all segments'`, the Apache Cloudberry-specific `num_segments` option identifies the number of query executors that Apache Cloudberry spawns on the source Apache Cloudberry cluster. If you do not provide a value, num defaults to the number of segments in the source cluster. Support for the foreign server `num_segments` option is foreign-data wrapper-specific. diff --git a/docs/sql-stmts/create-statistics.md b/docs/sql-stmts/create-statistics.md index 89f1f0f9a0..9255f2b484 100644 --- a/docs/sql-stmts/create-statistics.md +++ b/docs/sql-stmts/create-statistics.md @@ -25,7 +25,7 @@ If a schema name is given (for example, `CREATE STATISTICS myschema.mystat ...`) **`IF NOT EXISTS`** -Do not throw an error if a statistics object with the same name already exists. Cloudberry Database issues a notice in this case. Note that only the name of the statistics object is considered here, not the details of its definition. +Do not throw an error if a statistics object with the same name already exists. Apache Cloudberry issues a notice in this case. Note that only the name of the statistics object is considered here, not the details of its definition. **`statistics_name`** diff --git a/docs/sql-stmts/create-table-as.md b/docs/sql-stmts/create-table-as.md index 14d1bb765d..206537c332 100644 --- a/docs/sql-stmts/create-table-as.md +++ b/docs/sql-stmts/create-table-as.md @@ -70,11 +70,11 @@ The `WITH` clause specifies optional storage parameters for the new table. Refer The behavior of temporary tables at the end of a transaction block can be controlled using `ON COMMIT`. The three options are: -PRESERVE ROWS — Cloudberry Database takes no special action at the ends of transactions for temporary tables. This is the default behavior. +PRESERVE ROWS — Apache Cloudberry takes no special action at the ends of transactions for temporary tables. This is the default behavior. -DELETE ROWS — Cloudberry Database deletes all rows in the temporary table at the end of each transaction block. Essentially, an automatic [TRUNCATE](/docs/sql-stmts/truncate.md) is done at each commit. +DELETE ROWS — Apache Cloudberry deletes all rows in the temporary table at the end of each transaction block. Essentially, an automatic [TRUNCATE](/docs/sql-stmts/truncate.md) is done at each commit. -DROP — Cloudberry Database drops the temporary table at the end of the current transaction block. +DROP — Apache Cloudberry drops the temporary table at the end of the current transaction block. **`TABLESPACE tablespace_name`** @@ -88,7 +88,7 @@ A [SELECT](/docs/sql-stmts/select.md), [TABLE](/docs/sql-stmts/select.md#the-tab **`DISTRIBUTED RANDOMLY`**
**`DISTRIBUTED REPLICATED`** -Used to declare the Cloudberry Database distribution policy for the table. Refer to [CREATE TABLE](/docs/sql-stmts/create-table.md) for details. +Used to declare the Apache Cloudberry distribution policy for the table. Refer to [CREATE TABLE](/docs/sql-stmts/create-table.md) for details. ## Notes @@ -125,11 +125,11 @@ CREATE TEMP TABLE films_recent ON COMMIT DROP AS `CREATE TABLE AS` conforms to the SQL standard, with the following exceptions: -- The standard requires parentheses around the subquery clause; in Cloudberry Database, these parentheses are optional. -- In the standard, the `WITH [NO] DATA` clause is required, in Cloudberry Database it is optional. -- Cloudberry Database handles temporary tables differently from the standard; see [CREATE TABLE](/docs/sql-stmts/create-table.md) for details. -- The `WITH` clause is a Cloudberry Database extension; storage parameters are not part of the standard. -- The Cloudberry Database concept of tablespaces is not part of the standard. The `TABLESPACE` clause is an extension. +- The standard requires parentheses around the subquery clause; in Apache Cloudberry, these parentheses are optional. +- In the standard, the `WITH [NO] DATA` clause is required, in Apache Cloudberry it is optional. +- Apache Cloudberry handles temporary tables differently from the standard; see [CREATE TABLE](/docs/sql-stmts/create-table.md) for details. +- The `WITH` clause is a Apache Cloudberry extension; storage parameters are not part of the standard. +- The Apache Cloudberry concept of tablespaces is not part of the standard. The `TABLESPACE` clause is an extension. ## See also diff --git a/docs/sql-stmts/create-table.md b/docs/sql-stmts/create-table.md index 3a194239db..8dc7ea47df 100644 --- a/docs/sql-stmts/create-table.md +++ b/docs/sql-stmts/create-table.md @@ -6,7 +6,7 @@ title: CREATE TABLE Defines a new table. -> **Note** Cloudberry Database accepts, but does not enforce, referential integrity syntax (foreign key constraints). +> **Note** Apache Cloudberry accepts, but does not enforce, referential integrity syntax (foreign key constraints). ## Synopsis @@ -199,19 +199,19 @@ and is: To create a table, you must have `USAGE` privilege on all column types or the type in the `OF` clause, respectively. -If you specify a schema name, Cloudberry Database creates the table in the specified schema. Otherwise Cloudberry Database creates the table in the current schema. Temporary tables exist in a special schema, so you cannot specify a schema name when creating a temporary table. The name of the table must be distinct from the name of any other table, external table, sequence, index, view, or foreign table in the same schema. +If you specify a schema name, Apache Cloudberry creates the table in the specified schema. Otherwise Apache Cloudberry creates the table in the current schema. Temporary tables exist in a special schema, so you cannot specify a schema name when creating a temporary table. The name of the table must be distinct from the name of any other table, external table, sequence, index, view, or foreign table in the same schema. `CREATE TABLE` also automatically creates a data type that represents the composite type corresponding to one row of the table. Therefore, tables cannot have the same name as any existing data type in the same schema. The optional constraint clauses specify conditions that new or updated rows must satisfy for an insert or update operation to succeed. A constraint is an SQL object that helps define the set of valid values in the table in various ways. -Cloudberry Database accepts, but does not enforce, referential integrity (foreign key) constraints. The information is retained in the system catalogs but is otherwise ignored. +Apache Cloudberry accepts, but does not enforce, referential integrity (foreign key) constraints. The information is retained in the system catalogs but is otherwise ignored. You can define two types of constraints: table constraints and column constraints. A column constraint is defined as part of a column definition. A table constraint definition is not tied to a particular column, and it can encompass more than one column. Every column constraint can also be written as a table constraint; a column constraint is only a notational convenience for use when the constraint only affects one column. -When creating a table, you specify an additional clause to declare the Cloudberry Database distribution policy. If a `DISTRIBUTED BY`, `DISTRIBUTED RANDOMLY`, or `DISTRIBUTED REPLICATED` clause is not supplied, then Cloudberry Database assigns a hash distribution policy to the table using either the `PRIMARY KEY` (if the table has one) or the first column of the table as the distribution key. Columns of geometric or user-defined data types are not eligible to be a Cloudberry Database distribution key column. If a table does not have a column of an eligible data type, the rows are distributed based on a random distribution. To ensure an even distribution of data in your Cloudberry Database system, you want to choose a distribution key that is unique for each record, or if that is not possible, then choose `DISTRIBUTED RANDOMLY`. +When creating a table, you specify an additional clause to declare the Apache Cloudberry distribution policy. If a `DISTRIBUTED BY`, `DISTRIBUTED RANDOMLY`, or `DISTRIBUTED REPLICATED` clause is not supplied, then Apache Cloudberry assigns a hash distribution policy to the table using either the `PRIMARY KEY` (if the table has one) or the first column of the table as the distribution key. Columns of geometric or user-defined data types are not eligible to be a Apache Cloudberry distribution key column. If a table does not have a column of an eligible data type, the rows are distributed based on a random distribution. To ensure an even distribution of data in your Apache Cloudberry system, you want to choose a distribution key that is unique for each record, or if that is not possible, then choose `DISTRIBUTED RANDOMLY`. -If you supply the `DISTRIBUTED REPLICATED` clause, Cloudberry Database distributes all rows of the table to all segments in the Cloudberry Database system. You can use this option in cases where user-defined functions must run on the segments, and the functions require access to all rows of the table. Replicated functions can also be used to improve query performance by preventing broadcast motions for the table. The `DISTRIBUTED REPLICATED` clause cannot be used with the `PARTITION` clauses or the `INHERITS` clause. A replicated table also cannot be inherited by another table. The hidden system columns (`ctid`, `cmin`, `cmax`, `xmin`, `xmax`, and `gp_segment_id`) cannot be referenced in user queries on replicated tables because they have no single, unambiguous value. Cloudberry Database returns a `column does not exist` error for the query. +If you supply the `DISTRIBUTED REPLICATED` clause, Apache Cloudberry distributes all rows of the table to all segments in the Apache Cloudberry system. You can use this option in cases where user-defined functions must run on the segments, and the functions require access to all rows of the table. Replicated functions can also be used to improve query performance by preventing broadcast motions for the table. The `DISTRIBUTED REPLICATED` clause cannot be used with the `PARTITION` clauses or the `INHERITS` clause. A replicated table also cannot be inherited by another table. The hidden system columns (`ctid`, `cmin`, `cmax`, `xmin`, `xmax`, and `gp_segment_id`) cannot be referenced in user queries on replicated tables because they have no single, unambiguous value. Apache Cloudberry returns a `column does not exist` error for the query. The `PARTITION BY` and `PARTITION OF` clauses allow you to divide the table into multiple sub-tables (or parts) that, taken together, make up the parent table and share its schema. @@ -219,7 +219,7 @@ The `PARTITION BY` and `PARTITION OF` clauses allow you to divide the table into **`GLOBAL | LOCAL`** -These keywords are present for SQL standard compatibility, but have no effect in Cloudberry Database and are deprecated. +These keywords are present for SQL standard compatibility, but have no effect in Apache Cloudberry and are deprecated. **`TEMPORARY | TEMP`** @@ -229,11 +229,11 @@ Be sure to perform appropriate vacuum and analyze operations on temporary tables **`UNLOGGED`** -If specified, the table is created as an unlogged table. Data written to unlogged tables is not written to the write-ahead (WAL) log, which makes them considerably faster than ordinary tables. However, the contents of an unlogged table are not replicated to mirror segment instances. Also an unlogged table is not crash-safe: Cloudberry Database automatically truncates an unlogged table after a crash or unclean shutdown. Any indexes created on an unlogged table are automatically unlogged as well. +If specified, the table is created as an unlogged table. Data written to unlogged tables is not written to the write-ahead (WAL) log, which makes them considerably faster than ordinary tables. However, the contents of an unlogged table are not replicated to mirror segment instances. Also an unlogged table is not crash-safe: Apache Cloudberry automatically truncates an unlogged table after a crash or unclean shutdown. Any indexes created on an unlogged table are automatically unlogged as well. **`IF NOT EXISTS`** -Do not throw an error if a relation with the same name already exists. Cloudberry Database issues a notice in this case. Note that there is no guarantee that the existing relation is anything like the one that would have been created. +Do not throw an error if a relation with the same name already exists. Apache Cloudberry issues a notice in this case. Note that there is no guarantee that the existing relation is anything like the one that would have been created. **`table_name`** @@ -251,15 +251,15 @@ The name of a column to be created in the new table. **`data_type`** -The data type of the column. This may include array specifiers. For more information on the data types supported by Cloudberry Database, refer to the Data Types documentation. +The data type of the column. This may include array specifiers. For more information on the data types supported by Apache Cloudberry, refer to the Data Types documentation. -For table columns that contain textual data, Specify the data type `VARCHAR` or `TEXT`. Specifying the data type `CHAR` is not recommended. In Cloudberry Database, the data types `VARCHAR` or `TEXT` handle padding added to the data (space characters added after the last non-space character) as significant characters, the data type `CHAR` does not. See [Notes](#notes). +For table columns that contain textual data, Specify the data type `VARCHAR` or `TEXT`. Specifying the data type `CHAR` is not recommended. In Apache Cloudberry, the data types `VARCHAR` or `TEXT` handle padding added to the data (space characters added after the last non-space character) as significant characters, the data type `CHAR` does not. See [Notes](#notes). **`COLLATE` collation** The `COLLATE` clause assigns a collation to the column (which must be of a collatable data type). If not specified, the column data type's default collation is used. -> **Note**: The Cloudberry Query Optimizer (GPORCA) supports collation only when all columns in the query use the same collation. If columns in the query use different collations, then Cloudberry Database uses the Postgres Planner. +> **Note**: The Cloudberry Query Optimizer (GPORCA) supports collation only when all columns in the query use the same collation. If columns in the query use different collations, then Apache Cloudberry uses the Postgres Planner. **`ENCODING ( storage_directive [, ...] )`** @@ -291,7 +291,7 @@ The optional `INHERITS` clause specifies a list of tables from which the new tab Use of `INHERITS` creates a persistent relationship between the new child table and its parent table(s). Schema modifications to the parent(s) normally propagate to children as well, and by default the data of the child table is included in scans of the parent(s). -If the same column name exists in more than one parent table, an error is reported unless the data types of the columns match in each of the parent tables. If there is no conflict, then the duplicate columns are merged to form a single column in the new table. If the column name list of the new table contains a column name that is also inherited, the data type must likewise match the inherited column(s), and the column definitions are merged into one. If the new table explicitly specifies a default value for the column, this default overrides any defaults from inherited declarations of the column. Otherwise, any parents that specify default values for the column must all specify the same default, or Cloudberry Database reports an error. +If the same column name exists in more than one parent table, an error is reported unless the data types of the columns match in each of the parent tables. If there is no conflict, then the duplicate columns are merged to form a single column in the new table. If the column name list of the new table contains a column name that is also inherited, the data type must likewise match the inherited column(s), and the column definitions are merged into one. If the new table explicitly specifies a default value for the column, this default overrides any defaults from inherited declarations of the column. Otherwise, any parents that specify default values for the column must all specify the same default, or Apache Cloudberry reports an error. `CHECK` constraints are merged in essentially the same way as columns: if multiple parent tables or the new table definition contain identically-named `CHECK` constraints, these constraints must all have the same check expression, or an error will be reported. Constraints having the same name and expression will be merged into one copy. A constraint marked `NO INHERIT` in a parent will not be considered. Notice that an unnamed `CHECK` constraint in the new table will never be merged, since a unique name will always be chosen for it. @@ -303,11 +303,11 @@ If a column in the parent table is an identity column, that property is not inhe The optional `PARTITION BY` clause of the *modern partitioning syntax* specifies a strategy of partitioning the table. The table thus created is referred to as a partitioned table. The parenthesized list of columns or expressions forms the partition key for the table. When using range or hash partitioning, the partition key can include multiple columns or expressions (up to 32), but for list partitioning, the partition key must consist of a single column or expression. -Range and list partitioning require a btree operator class, while hash partitioning requires a hash operator class. If no operator class is specified explicitly, the default operator class of the appropriate type will be used; if no default operator class exists, Cloudberry Database raises an error. When hash partitioning is used, the operator class used must implement support function 2 (see [Index Method Support Routines](https://www.postgresql.org/docs/12/xindex.html#XINDEX-SUPPORT) in the PostgreSQL documentation for details). +Range and list partitioning require a btree operator class, while hash partitioning requires a hash operator class. If no operator class is specified explicitly, the default operator class of the appropriate type will be used; if no default operator class exists, Apache Cloudberry raises an error. When hash partitioning is used, the operator class used must implement support function 2 (see [Index Method Support Routines](https://www.postgresql.org/docs/12/xindex.html#XINDEX-SUPPORT) in the PostgreSQL documentation for details). > **Note** Only the modern partitioning syntax supports hash partitions. -A partitioned table is divided into sub-tables (called partitions), which are typically created using separate `CREATE TABLE` commands. The partitioned table is itself empty. A data row inserted into the table is routed to a partition based on the value of columns or expressions in the partition key. If no existing partition matches the values in the new row, Cloudberry Database reports an error. +A partitioned table is divided into sub-tables (called partitions), which are typically created using separate `CREATE TABLE` commands. The partitioned table is itself empty. A data row inserted into the table is routed to a partition based on the value of columns or expressions in the partition key. If no existing partition matches the values in the new row, Apache Cloudberry reports an error. Partitioned tables do not support `EXCLUDE` constraints; however, you can define these constraints on individual partitions. @@ -333,15 +333,15 @@ Note that if `MINVALUE` or `MAXVALUE` is used for one column of a partitioning b Also note that some element types, such as timestamp, have a notion of "infinity", which is just another value that can be stored. This is different from `MINVALUE` and `MAXVALUE`, which are not real values that can be stored, but rather they are ways of saying that the value is unbounded. `MAXVALUE` can be thought of as being greater than any other value, including "infinity" and `MINVALUE` as being less than any other value, including "minus infinity". Thus the range `FROM ('infinity') TO (MAXVALUE)` is not an empty range; it allows precisely one value to be stored — "infinity". -If `DEFAULT` is specified, the table will be created as the default partition of the parent table. This option is not available for hash-partitioned tables. Cloudberry Database routes a partition key value not fitting into any other partition of the given parent to the default partition. +If `DEFAULT` is specified, the table will be created as the default partition of the parent table. This option is not available for hash-partitioned tables. Apache Cloudberry routes a partition key value not fitting into any other partition of the given parent to the default partition. -When a table has an existing `DEFAULT` partition and a new partition is added to it, the default partition must be scanned to verify that it does not contain any rows which properly belong in the new partition. If the default partition contains a large number of rows, this may be a slow operation. Cloudberry Database skips the scan if the default partition is a foreign table or if it has a constraint which proves that it cannot contain rows which should be placed in the new partition. +When a table has an existing `DEFAULT` partition and a new partition is added to it, the default partition must be scanned to verify that it does not contain any rows which properly belong in the new partition. If the default partition contains a large number of rows, this may be a slow operation. Apache Cloudberry skips the scan if the default partition is a foreign table or if it has a constraint which proves that it cannot contain rows which should be placed in the new partition. When creating a hash partition, you must specify a modulus and a remainder. The modulus must be a positive integer, and the remainder must be a non-negative integer less than the modulus. Typically, when initially setting up a hash-partitioned table, you should choose a modulus equal to the number of partitions and assign every table the same modulus and a different remainder (see examples below). However, it is not required that every partition have the same modulus, only that every modulus which occurs among the partitions of a hash-partitioned table is a factor of the next larger modulus. This allows the number of partitions to be increased incrementally without needing to move all the data at once. For example, suppose you have a hash-partitioned table with 8 partitions, each of which has modulus 8, but find it necessary to increase the number of partitions to 16. You can detach one of the modulus-8 partitions, create two new modulus-16 partitions covering the same portion of the key space (one with a remainder equal to the remainder of the detached partition, and the other with a remainder equal to that value plus 8), and repopulate them with data. You can then repeat this -- perhaps at a later time -- for each modulus-8 partition until none remain. While this may still involve a large amount of data movement at each step, it is still preferable to having to create a whole new table and move all the data at once. A partition must have the same column names and types as the partitioned table to which it belongs. Modifications to the column names or types of a partitioned table automatically propagate to all partitions. `CHECK` constraints are inherited automatically by every partition, but an individual partition may specify additional `CHECK` constraints; additional constraints with the same name and condition as in the parent will be merged with the parent constraint. Defaults may be specified separately for each partition. But note that a partition's default value is not applied when inserting a tuple through a partitioned table. -Cloudberry Database automatically routes rows inserted into a partitioned table to the correct partition. If no suitable partition exists, Cloudberry Database returns an error. +Apache Cloudberry automatically routes rows inserted into a partitioned table to the correct partition. If no suitable partition exists, Apache Cloudberry returns an error. Operations such as `TRUNCATE` which normally affect a table and all of its inheritance children will cascade to all partitions, but may also be performed on an individual partition. Note that dropping a partition with `DROP TABLE` requires taking an `ACCESS EXCLUSIVE` lock on the parent table. @@ -353,7 +353,7 @@ The `LIKE` clause specifies a table from which the new table automatically copie Unlike `INHERITS`, the new table and original table are completely decoupled after creation is complete. Changes to the original table will not be applied to the new table, and it is not possible to include data of the new table in scans of the original table. -Also unlike `INHERITS`, columns and constraints copied by `LIKE` are not merged with similarly named columns and constraints. If the same name is specified explicitly or in another `LIKE` clause, Cloudberry Database signals an error. +Also unlike `INHERITS`, columns and constraints copied by `LIKE` are not merged with similarly named columns and constraints. If the same name is specified explicitly or in another `LIKE` clause, Apache Cloudberry signals an error. The optional like_option clauses specify which additional properties of the original table to copy. Specifying `INCLUDING` copies the property, specifying `EXCLUDING` omits the property. `EXCLUDING` is the default. If multiple specifications are made for the same kind of object, the last one is used. The available options are: @@ -409,7 +409,7 @@ Extended statistics are copied to the new table. `INCLUDING ALL` is an abbreviated form of all available options (It may be useful to specify individual `EXCLUDING` clauses after `INCLUDING ALL` to select all but some specific options.) -You can also use the `LIKE` clause to copy column definitions from views, foreign tables, or composite types. Cloudberry Database ignores inapplicable options (for example, `INCLUDING INDEXES` from a view). +You can also use the `LIKE` clause to copy column definitions from views, foreign tables, or composite types. Apache Cloudberry ignores inapplicable options (for example, `INCLUDING INDEXES` from a view). **`CONSTRAINT constraint_name`** @@ -429,13 +429,13 @@ This clause is only provided for compatibility with non-standard SQL databases. **`CHECK (expression) [ NO INHERIT ]`** -The `CHECK` clause specifies an expression producing a Boolean result which new or updated rows must satisfy for an insert or update operation to succeed. Expressions evaluating to `TRUE` or `UNKNOWN` succeed. Should any row of an insert or update operation produce a `FALSE` result, Cloudberry Database raises an error exception, and the insert or update does not alter the database. A check constraint specified as a column constraint should reference that column's value only, while an expression appearing in a table constraint can reference multiple columns. +The `CHECK` clause specifies an expression producing a Boolean result which new or updated rows must satisfy for an insert or update operation to succeed. Expressions evaluating to `TRUE` or `UNKNOWN` succeed. Should any row of an insert or update operation produce a `FALSE` result, Apache Cloudberry raises an error exception, and the insert or update does not alter the database. A check constraint specified as a column constraint should reference that column's value only, while an expression appearing in a table constraint can reference multiple columns. Currently, `CHECK` expressions cannot contain subqueries nor refer to variables other than columns of the current row. You can reference the system column `tableoid`, but not any other system column. A constraint marked with `NO INHERIT` will not propagate to child tables. -When a table has multiple `CHECK` constraints, they will be tested for each row in alphabetical order by name, after checking `NOT NULL` constraints. (Previous Cloudberry Database versions did not honor any particular firing order for `CHECK` constraints.) +When a table has multiple `CHECK` constraints, they will be tested for each row in alphabetical order by name, after checking `NOT NULL` constraints. (Previous Apache Cloudberry versions did not honor any particular firing order for `CHECK` constraints.) **`DEFAULT default_expr`** @@ -451,7 +451,7 @@ The generation expression can refer to other columns in the table, but not other **`GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) ]`** -This clause creates the column as an identity column. Cloudberry Database attaches an implicit sequence to it, and automatically assigns a value from the sequence to the column in new rows. Such a column is implicitly `NOT NULL`. +This clause creates the column as an identity column. Apache Cloudberry attaches an implicit sequence to it, and automatically assigns a value from the sequence to the column in new rows. Such a column is implicitly `NOT NULL`. The clauses `ALWAYS` and `BY DEFAULT` determine how the sequence value is given precedence over a user-specified value in an `INSERT` statement. If `ALWAYS` is specified, a user-specified value is only accepted if the `INSERT` statement specifies `OVERRIDING SYSTEM VALUE`. If `BY DEFAULT` is specified, then the user-specified value takes precedence. See [INSERT](/docs/sql-stmts/insert.md) for details. (In the `COPY` command, ueser-specified values are always used regardless of this setting.) @@ -463,9 +463,9 @@ You can use the optional sequence_options clause to override the options of the The `UNIQUE` constraint specifies that a group of one or more columns of a table may contain only unique values. The behavior of a unique table constraint is the same as that of a unique column constraint, with the additional capability to span multiple columns. The constraint therefore enforces that any two rows must differ in at least one of these columns. -For the purpose of a unique constraint, null values are not considered equal. The column(s) that are unique must contain all the columns of the Cloudberry Database distribution key. In addition, the `` must contain all the columns in the partition key if the table is partitioned. Note that a `` constraint in a partitioned table is not the same as a simple `UNIQUE INDEX`. +For the purpose of a unique constraint, null values are not considered equal. The column(s) that are unique must contain all the columns of the Apache Cloudberry distribution key. In addition, the `` must contain all the columns in the partition key if the table is partitioned. Note that a `` constraint in a partitioned table is not the same as a simple `UNIQUE INDEX`. -Each unique constraint should name a set of columns that is different from the set of columns named by any other unique or primary key constraint defined for the table. (Otherwise, Cloudberry Database discards redundant unique constraints.) +Each unique constraint should name a set of columns that is different from the set of columns named by any other unique or primary key constraint defined for the table. (Otherwise, Apache Cloudberry discards redundant unique constraints.) When establishing a unique constraint for a multi-level partition hierarchy, all of the columns in the partition key of the target partitioned table, as well as those of all its descendant partitioned tables, must be included in the constraint definition. @@ -492,7 +492,7 @@ The optional `INCLUDE` clause adds to that index one or more columns that are si The `EXCLUDE` clause defines an exclusion constraint, which guarantees that if any two rows are compared on the specified column(s) or expression(s) using the specified operator(s), not all of these comparisons will return `TRUE`. If all of the specified operators test for equality, this is equivalent to a `UNIQUE` constraint, although an ordinary unique constraint will be faster. However, exclusion constraints can specify constraints that are more general than simple equality. For example, you can specify a constraint that no two rows in the table contain overlapping circles by using the `&&` operator. -Cloudberry Database does not support specifying an exclusion constraint on a randomly-distributed table. +Apache Cloudberry does not support specifying an exclusion constraint on a randomly-distributed table. Exclusion constraints are implemented using an index, so each specified operator must be associated with an appropriate operator class for the index access method index_method. The operators are required to be commutative. Each exclude_element can optionally specify an operator class and/or ordering options; these are described fully under [CREATE INDEX](/docs/sql-stmts/create-index.md). @@ -505,7 +505,7 @@ The predicate allows you to specify an exclusion constraint on a subset of the t **`FOREIGN KEY (column_name [, ...]) REFERENCES reftable [ ( refcolumn [, ... ] ) ]`**
**`[ MATCH matchtype ] [ ON DELETE referential_action ] [ ON UPDATE referential_action ] (table constraint)`** -The `REFERENCES` and `FOREIGN KEY` clauses specify referential integrity constraints (foreign key constraints). Cloudberry Database accepts referential integrity constraints but does not enforce them. +The `REFERENCES` and `FOREIGN KEY` clauses specify referential integrity constraints (foreign key constraints). Apache Cloudberry accepts referential integrity constraints but does not enforce them. **`DEFERRABLE`**
**`NOT DEFERRABLE`** @@ -537,7 +537,7 @@ You can control the behavior of temporary tables at the end of a transaction blo **PRESERVE ROWS** - No special action is taken at the ends of transactions for temporary tables. This is the default behavior. -**DELETE ROWS** - All rows in the temporary table will be deleted at the end of each transaction block. Essentially, Cloudberry Database performs an automatic [TRUNCATE](/docs/sql-stmts/truncate.md) at each commit. When used on a partitioned table, this operation is not cascaded to its partitions. +**DELETE ROWS** - All rows in the temporary table will be deleted at the end of each transaction block. Essentially, Apache Cloudberry performs an automatic [TRUNCATE](/docs/sql-stmts/truncate.md) at each commit. When used on a partitioned table, this operation is not cascaded to its partitions. **DROP** - The temporary table will be dropped at the end of the current transaction block. When used on a partitioned table, this action drops its partitions and when used on tables with inheritance children, it drops the dependent children. @@ -553,22 +553,22 @@ This clause allows selection of the tablespace in which the index associated wit **`DISTRIBUTED RANDOMLY`**
**`DISTRIBUTED REPLICATED`** -Used to declare the Cloudberry Database distribution policy for the table. `DISTRIBUTED BY` uses hash distribution with one or more columns declared as the distribution key. For the most even data distribution, the distribution key should be the primary key of the table or a unique column (or set of columns). If that is not possible, then you may choose `DISTRIBUTED RANDOMLY`, which will send the data randomly to the segment instances. Additionally, an operator class, `opclass`, can be specified, to use a non-default hash function. +Used to declare the Apache Cloudberry distribution policy for the table. `DISTRIBUTED BY` uses hash distribution with one or more columns declared as the distribution key. For the most even data distribution, the distribution key should be the primary key of the table or a unique column (or set of columns). If that is not possible, then you may choose `DISTRIBUTED RANDOMLY`, which will send the data randomly to the segment instances. Additionally, an operator class, `opclass`, can be specified, to use a non-default hash function. -The Cloudberry Database server configuration parameter `gp_create_table_random_default_distribution` controls the default table distribution policy if the DISTRIBUTED BY clause is not specified when you create a table. Cloudberry Database follows these rules to create a table if a distribution policy is not specified. +The Apache Cloudberry server configuration parameter `gp_create_table_random_default_distribution` controls the default table distribution policy if the DISTRIBUTED BY clause is not specified when you create a table. Apache Cloudberry follows these rules to create a table if a distribution policy is not specified. -If the value of the parameter is `off` (the default), Cloudberry Database chooses the table distribution key based on the command: +If the value of the parameter is `off` (the default), Apache Cloudberry chooses the table distribution key based on the command: -- If a `LIKE` or `INHERITS` clause is specified, then Cloudberry Database copies the distribution key from the source or parent table. -- If `PRIMARY KEY`, `UNIQUE`, or `EXCLUDE` constraints are specified, then Cloudberry Database chooses the largest subset of all the key columns as the distribution key. -- If no constraints nor a `LIKE` or `INHERITS` clause is specified, then Cloudberry Database chooses the first suitable column as the distribution key. (Columns with geometric or user-defined data types are not eligible as Cloudberry Database distribution key columns.) +- If a `LIKE` or `INHERITS` clause is specified, then Apache Cloudberry copies the distribution key from the source or parent table. +- If `PRIMARY KEY`, `UNIQUE`, or `EXCLUDE` constraints are specified, then Apache Cloudberry chooses the largest subset of all the key columns as the distribution key. +- If no constraints nor a `LIKE` or `INHERITS` clause is specified, then Apache Cloudberry chooses the first suitable column as the distribution key. (Columns with geometric or user-defined data types are not eligible as Apache Cloudberry distribution key columns.) -If the value of the parameter is set to `on`, Cloudberry Database follows these rules: +If the value of the parameter is set to `on`, Apache Cloudberry follows these rules: - If `PRIMARY KEY`, `UNIQUE`, or `EXCLUDE` columns are not specified, the distribution of the table is random (`DISTRIBUTED RANDOMLY`). Table distribution is random even if the table creation command contains the `LIKE` or `INHERITS` clause. - If `PRIMARY KEY`, `UNIQUE`, or `EXCLUDE` columns are specified, you must also specify a `DISTRIBUTED BY` clause If a `DISTRIBUTED BY` clause is not specified as part of the table creation command, the command fails. -The `DISTRIBUTED REPLICATED` clause replicates the entire table to all Cloudberry Database segment instances. It can be used when it is necessary to run user-defined functions on segments when the functions require access to all rows in the table, or to improve query performance by preventing broadcast motions. +The `DISTRIBUTED REPLICATED` clause replicates the entire table to all Apache Cloudberry segment instances. It can be used when it is necessary to run user-defined functions on segments when the functions require access to all rows in the table, or to improve query performance by preventing broadcast motions. ### Classic partitioning syntax parameters @@ -578,7 +578,7 @@ Descriptions of additional parameters that are specific to the *classic partitio **`CREATE TABLE table_name ... PARTITION BY`** -When creating a partitioned table using the *classic syntax*, Cloudberry Database creates the root partitioned table with the specified table name. Cloudberry Database also creates a hierarchy of tables, child tables, that are the sub-partitions based on the partitioning options that you specify. The pg_partitioned_table system catalog contains information about the sub-partition tables. +When creating a partitioned table using the *classic syntax*, Apache Cloudberry creates the root partitioned table with the specified table name. Apache Cloudberry also creates a hierarchy of tables, child tables, that are the sub-partitions based on the partitioning options that you specify. The pg_partitioned_table system catalog contains information about the sub-partition tables. **`classic_partition_spec`** @@ -586,7 +586,7 @@ Declares the individual partitions to create. Each partition can be defined indi **`DEFAULT PARTITION name`** -Declares a default partition. When data does not match the bouds of an existing partition, Cloudberry Database inserts it into the default partition. Partition designs that do not identify a default partition will reject incoming rows that do not match an existing partition. +Declares a default partition. When data does not match the bouds of an existing partition, Apache Cloudberry inserts it into the default partition. Partition designs that do not identify a default partition will reject incoming rows that do not match an existing partition. **`PARTITION name`** @@ -632,7 +632,7 @@ Note that you can also set storage parameters for a particular partition or sub- You can specify the defaults for some of the table storage options with the server configuration parameter `gp_default_storage_options`. For information about setting default storage options, see [Notes](#notes). -> **Note** Because Cloudberry Database does not permit autovacuuming user tables, it accepts, but does not apply, certain per-table parameter settings as noted below. +> **Note** Because Apache Cloudberry does not permit autovacuuming user tables, it accepts, but does not apply, certain per-table parameter settings as noted below. The following table storage parameters are available: @@ -678,15 +678,15 @@ Set to `column` for column-oriented storage, or `row` (the default) for row-orie **`toast_tuple_target (integer)`** -The `toast_tuple_target` specifies the minimum tuple length required before Cloudberry Database attempts to compress and/or move long column values into TOAST tables, and is also the target length Cloudberry Database tries to reduce the length below once toasting begins. This affects columns marked as External (for move), Main (for compression), or Extended (for both) and applies only to new tuples. There is no effect on existing rows. By default this parameter is set to allow at least 4 tuples per block, which with the default blocksize will be 8184 bytes. Valid values are between 128 bytes and the (blocksize - header), by default 8160 bytes. Changing this value may not be useful for very short or very long rows. Note that the default setting is often close to optimal, and it is possible that setting this parameter could have negative effects in some cases. You can not set this parameter for TOAST tables. +The `toast_tuple_target` specifies the minimum tuple length required before Apache Cloudberry attempts to compress and/or move long column values into TOAST tables, and is also the target length Apache Cloudberry tries to reduce the length below once toasting begins. This affects columns marked as External (for move), Main (for compression), or Extended (for both) and applies only to new tuples. There is no effect on existing rows. By default this parameter is set to allow at least 4 tuples per block, which with the default blocksize will be 8184 bytes. Valid values are between 128 bytes and the (blocksize - header), by default 8160 bytes. Changing this value may not be useful for very short or very long rows. Note that the default setting is often close to optimal, and it is possible that setting this parameter could have negative effects in some cases. You can not set this parameter for TOAST tables. **`parallel_workers (integer)`** -Sets the number of workers that should be used to assist a parallel scan of this table. If not set, Cloudberry Database determines a value based on the relation size. The actual number of workers chosen by the planner or by utility statements that use parallel scans may be less, for example due to the setting of `max_worker_processes`. +Sets the number of workers that should be used to assist a parallel scan of this table. If not set, Apache Cloudberry determines a value based on the relation size. The actual number of workers chosen by the planner or by utility statements that use parallel scans may be less, for example due to the setting of `max_worker_processes`. **`autovacuum_enabled, toast.autovacuum_enabled (boolean)`** -Enables or disables the autovacuum daemon for a particular table. If `true`, the autovacuum daemon will perform automatic `VACUUM` and/or `ANALYZE` operations on this table following the rules discussed in [The Autovacuum Daemon](https://www.postgresql.org/docs/12/routine-vacuuming.html#AUTOVACUUM) in the PostgreSQL documentation. If `false`, Cloudberry Database does not autovacuum the table, except to prevent transaction ID wraparound. Note that the autovacuum daemon does not run at all (except to prevent transaction ID wraparound) if the `autovacuum` parameter is `false`; setting individual tables' storage parameters does not override that. So there is seldom much point in explicitly setting this storage parameter to `true`, only to `false`. +Enables or disables the autovacuum daemon for a particular table. If `true`, the autovacuum daemon will perform automatic `VACUUM` and/or `ANALYZE` operations on this table following the rules discussed in [The Autovacuum Daemon](https://www.postgresql.org/docs/12/routine-vacuuming.html#AUTOVACUUM) in the PostgreSQL documentation. If `false`, Apache Cloudberry does not autovacuum the table, except to prevent transaction ID wraparound. Note that the autovacuum daemon does not run at all (except to prevent transaction ID wraparound) if the `autovacuum` parameter is `false`; setting individual tables' storage parameters does not override that. So there is seldom much point in explicitly setting this storage parameter to `true`, only to `false`. **`vacuum_index_cleanup, toast.vacuum_index_cleanup (boolean)`** @@ -702,13 +702,13 @@ Enables or disables vacuum to attempt to truncate any empty pages at the end of Per-table value for the `autovacuum_vacuum_threshold` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_vacuum_scale_factor, toast.autovacuum_vacuum_scale_factor (floating point)`** Per-table value for the `autovacuum_vacuum_scale_factor` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_analyze_threshold (integer)`** @@ -722,75 +722,75 @@ Per-table value for the `autovacuum_analyze_scale_factor` server configuration p Per-table value for the `autovacuum_vacuum_cost_delay` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_vacuum_cost_limit, toast.autovacuum_vacuum_cost_limit (integer)`** Per-table value for the `autovacuum_vacuum_cost_limit` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_freeze_min_age, toast.autovacuum_freeze_min_age (integer)`** Per-table value for the `vacuum_freeze_min_age` parameter. Note that autovacuum will ignore per-table `autovacuum_freeze_min_age` parameters that are larger than half of the system-wide `autovacuum_freeze_max_age` setting. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_freeze_max_age, toast.autovacuum_freeze_max_age (integer)`** Per-table value for the `autovacuum_freeze_max_age` server configuration parameter. Note that autovacuum will ignore per-table `autovacuum_freeze_max_age` parameters that are larger than the system-wide setting (it can only be set smaller). -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_freeze_table_age, toast.autovacuum_freeze_table_age (integer)`** Per-table value for the `vacuum_freeze_table_age` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_multixact_freeze_min_age, toast.autovacuum_multixact_freeze_min_age (integer)`** Per-table value for the `vacuum_multixact_freeze_min_age` server configuration parameter. Note that autovacuum will ignore per-table `autovacuum_multixact_freeze_min_age` parameters that are larger than half of the system-wide `autovacuum_multixact_freeze_max_age` setting. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_multixact_freeze_max_age, toast.autovacuum_multixact_freeze_max_age (integer)`** Per-table value for the `autovacuum_multixact_freeze_max_age` server configuration parameter. Note that autovacuum will ignore per-table `autovacuum_multixact_freeze_max_age` parameters that are larger than the system-wide setting (it can only be set smaller). -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`autovacuum_multixact_freeze_table_age, toast.autovacuum_multixact_freeze_table_age (integer)`** Per-table value for the `vacuum_multixact_freeze_table_age` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. **`log_autovacuum_min_duration, toast.log_autovacuum_min_duration (integer)`** Per-table value for the `log_autovacuum_min_duration` server configuration parameter. -> **Note** Cloudberry Database accepts, but does not apply, values for these storage parameters. +> **Note** Apache Cloudberry accepts, but does not apply, values for these storage parameters. ## Notes -Cloudberry Database automatically creates an index for each unique constraint and primary key constraint to enforce uniqueness, so it is not necessary to create an index explicitly for primary key columns. (See [CREATE INDEX](/docs/sql-stmts/create-index.md) for more information.) +Apache Cloudberry automatically creates an index for each unique constraint and primary key constraint to enforce uniqueness, so it is not necessary to create an index explicitly for primary key columns. (See [CREATE INDEX](/docs/sql-stmts/create-index.md) for more information.) Unique constraints and primary keys are not inherited. You cannot define a table with more than 1600 columns. (In practice, the effective limit is usually lower because of tuple-length constraints.) -The Cloudberry Database data types `VARCHAR` or `TEXT` handle padding added to the textual data (space characters added after the last non-space character) as significant characters; the data type `CHAR` does not. +The Apache Cloudberry data types `VARCHAR` or `TEXT` handle padding added to the textual data (space characters added after the last non-space character) as significant characters; the data type `CHAR` does not. -In Cloudberry Database, values of type `CHAR()` are padded with trailing spaces to the specified width ``. The values are stored and displayed with the spaces. However, the padding spaces are treated as semantically insignificant. When the values are distributed, the trailing spaces are disregarded. The trailing spaces are also treated as semantically insignificant when comparing two values of data type `CHAR`, and the trailing spaces are removed when converting a character value to one of the other string types. +In Apache Cloudberry, values of type `CHAR()` are padded with trailing spaces to the specified width ``. The values are stored and displayed with the spaces. However, the padding spaces are treated as semantically insignificant. When the values are distributed, the trailing spaces are disregarded. The trailing spaces are also treated as semantically insignificant when comparing two values of data type `CHAR`, and the trailing spaces are removed when converting a character value to one of the other string types. -Cloudberry Database requires certain special conditions for primary key and unique constraints with regards to columns that are the *distribution key* in a Cloudberry Database table. For a unique constraint to be enforced in Cloudberry Database, the table must be hash-distributed (not `DISTRIBUTED RANDOMLY`), and the constraint columns must be the same as, or a superset of, the table's distribution key columns. +Apache Cloudberry requires certain special conditions for primary key and unique constraints with regards to columns that are the *distribution key* in a Apache Cloudberry table. For a unique constraint to be enforced in Apache Cloudberry, the table must be hash-distributed (not `DISTRIBUTED RANDOMLY`), and the constraint columns must be the same as, or a superset of, the table's distribution key columns. Replicated tables (`DISTRIBUTED REPLICATED`) can have both `PRIMARY KEY` and `UNIQUE` column constraints. A primary key constraint is simply a combination of a unique constraint and a not-null constraint. -Foreign key constraints are not supported in Cloudberry Database. +Foreign key constraints are not supported in Apache Cloudberry. For inherited tables, unique constraints, primary key constraints, indexes and table privileges are *not* inherited in the current implementation. @@ -800,7 +800,7 @@ For append-optimized tables, `UPDATE` and `DELETE` are not allowed in a repeatab `CLUSTER` on append-optimized tables is only supported over B-tree indexes. -The Cloudberry Database Query Optimizer does not support list partitions with multi-column (composite) partition keys. +The Apache Cloudberry Query Optimizer does not support list partitions with multi-column (composite) partition keys. ## Examples @@ -811,7 +811,7 @@ CREATE TABLE baby.rank (id int, rank int, year smallint, count int ) DISTRIBUTED BY (rank, year); ``` -Create tables named `films` and `distributors` (the primary key will be used as the Cloudberry Database distribution key by default): +Create tables named `films` and `distributors` (the primary key will be used as the Apache Cloudberry distribution key by default): ```sql CREATE TABLE films ( @@ -1176,11 +1176,11 @@ PARTITION BY RANGE (year) ### Temporary tables -In the SQL standard, temporary tables are defined just once and automatically exist (starting with empty contents) in every session that needs them. Cloudberry Database instead requires each session to issue its own `CREATE TEMPORARY TABLE` command for each temporary table to be used. This allows different sessions to use the same temporary table name for different purposes, whereas the standard's approach constrains all instances of a given temporary table name to have the same table structure. +In the SQL standard, temporary tables are defined just once and automatically exist (starting with empty contents) in every session that needs them. Apache Cloudberry instead requires each session to issue its own `CREATE TEMPORARY TABLE` command for each temporary table to be used. This allows different sessions to use the same temporary table name for different purposes, whereas the standard's approach constrains all instances of a given temporary table name to have the same table structure. -The standard's distinction between global and local temporary tables is not in Cloudberry Database. Cloudberry Database will accept the `GLOBAL` and `LOCAL` keywords in a temporary table declaration, but they have no effect and are deprecated. +The standard's distinction between global and local temporary tables is not in Apache Cloudberry. Apache Cloudberry will accept the `GLOBAL` and `LOCAL` keywords in a temporary table declaration, but they have no effect and are deprecated. -If the `ON COMMIT` clause is omitted, the SQL standard specifies that the default behavior as `ON COMMIT DELETE ROWS`. However, the default behavior in Cloudberry Database is `ON COMMIT PRESERVE ROWS`. The `ON COMMIT DROP` option does not exist in the SQL standard. +If the `ON COMMIT` clause is omitted, the SQL standard specifies that the default behavior as `ON COMMIT DELETE ROWS`. However, the default behavior in Apache Cloudberry is `ON COMMIT PRESERVE ROWS`. The `ON COMMIT DROP` option does not exist in the SQL standard. ### Non-deferred uniqueness constraints @@ -1188,29 +1188,29 @@ When a `UNIQUE` or `PRIMARY KEY` constraint is not deferrable, Greeplum Database ### Column check constraints -**Column Check Constraints** — The SQL standard states that `CHECK` column constraints may only refer to the column they apply to; only `CHECK` table constraints may refer to multiple columns. Cloudberry Database does not enforce this restriction; it treats column and table check constraints alike. +**Column Check Constraints** — The SQL standard states that `CHECK` column constraints may only refer to the column they apply to; only `CHECK` table constraints may refer to multiple columns. Apache Cloudberry does not enforce this restriction; it treats column and table check constraints alike. -**Exclude Constraint** — The `EXCLUDE` constraint type is a Cloudberry Database extension. +**Exclude Constraint** — The `EXCLUDE` constraint type is a Apache Cloudberry extension. -**NULL Constraint** — The `NULL` constraint is a Cloudberry Database extension to the SQL standard that is included for compatibility with some other database systems (and for symmetry with the `NOT NULL` constraint). Since it is the default for any column, its presence is not required. +**NULL Constraint** — The `NULL` constraint is a Apache Cloudberry extension to the SQL standard that is included for compatibility with some other database systems (and for symmetry with the `NOT NULL` constraint). Since it is the default for any column, its presence is not required. ### Constraint naming -The SQL standard states that table and domain constraints must have names that are unique across the schema containing the table or domain. Cloudberry Database is laxer: it only requires constraint names to be unique across the constraints attached to a particular table or domain. However, this extra freedom does not exist for index-based constraints (`UNIQUE`, `PRIMARY KEY`, and `EXCLUDE` constraints), because the associated index is named the same as the constraint, and index names must be unique across all relations within the same schema. +The SQL standard states that table and domain constraints must have names that are unique across the schema containing the table or domain. Apache Cloudberry is laxer: it only requires constraint names to be unique across the constraints attached to a particular table or domain. However, this extra freedom does not exist for index-based constraints (`UNIQUE`, `PRIMARY KEY`, and `EXCLUDE` constraints), because the associated index is named the same as the constraint, and index names must be unique across all relations within the same schema. -Cloudberry Database does not currently record names for `NOT NULL` constraints at all, so they are not subject to the uniqueness restriction. +Apache Cloudberry does not currently record names for `NOT NULL` constraints at all, so they are not subject to the uniqueness restriction. ### Inheritance -Multiple inheritance via the `INHERITS` clause is a Cloudberry Database language extension. SQL:1999 and later define single inheritance using a different syntax and different semantics. SQL:1999-style inheritance is not yet supported by Cloudberry Database. +Multiple inheritance via the `INHERITS` clause is a Apache Cloudberry language extension. SQL:1999 and later define single inheritance using a different syntax and different semantics. SQL:1999-style inheritance is not yet supported by Apache Cloudberry. ### Zero-column tables -Cloudberry Database allows a table of no columns to be created (for example, `CREATE TABLE foo();`). This is an extension from the SQL standard, which does not allow zero-column tables. Because zero-column tables are not in themselves very useful, disallowing them creates odd special cases for `ALTER TABLE DROP COLUMN`, so Cloudberry Database ignores this spec restriction. +Apache Cloudberry allows a table of no columns to be created (for example, `CREATE TABLE foo();`). This is an extension from the SQL standard, which does not allow zero-column tables. Because zero-column tables are not in themselves very useful, disallowing them creates odd special cases for `ALTER TABLE DROP COLUMN`, so Apache Cloudberry ignores this spec restriction. ### Multiple identity columns -Cloudberry Database allows a table to have more than one identity column. The standard specifies that a table can have at most one identity column. Cloudberry Database relaxes this restriction to provide more flexibility for schema changes or migrations. Note that the `INSERT` command supports only one override clause that applies to the entire statement, so having multiple identity columns with different behaviors is not well supported. +Apache Cloudberry allows a table to have more than one identity column. The standard specifies that a table can have at most one identity column. Apache Cloudberry relaxes this restriction to provide more flexibility for schema changes or migrations. Note that the `INSERT` command supports only one override clause that applies to the entire statement, so having multiple identity columns with different behaviors is not well supported. ### Generated columns @@ -1218,32 +1218,32 @@ The option `STORED` is not standard but is also used by other SQL implementation ### Like clause -While a `LIKE` clause exists in the SQL standard, many of the options that Cloudberry Database accepts for it are not in the standard, and some of the standard's options are not implemented by Cloudberry Database. +While a `LIKE` clause exists in the SQL standard, many of the options that Apache Cloudberry accepts for it are not in the standard, and some of the standard's options are not implemented by Apache Cloudberry. ### With clause -The `WITH` clause is a Cloudberry Database extension; storage parameters are in the standard. +The `WITH` clause is a Apache Cloudberry extension; storage parameters are in the standard. ### Tablespaces -The Cloudberry Database concept of tablespaces is not part of the SQL standard. The clauses `TABLESPACE` and `USING INDEX TABLESPACE` are extensions. +The Apache Cloudberry concept of tablespaces is not part of the SQL standard. The clauses `TABLESPACE` and `USING INDEX TABLESPACE` are extensions. ### Typed tables -Typed tables implement a subset of the SQL standard. According to the standard, a typed table has columns corresponding to the underlying composite type as well as one other column that is the "self-referencing column". Cloudberry Database does not support self-referencing columns explicitly. +Typed tables implement a subset of the SQL standard. According to the standard, a typed table has columns corresponding to the underlying composite type as well as one other column that is the "self-referencing column". Apache Cloudberry does not support self-referencing columns explicitly. ### Partition by clause -Table partitioning via the `PARTITION BY` clause is a Cloudberry Database extension. +Table partitioning via the `PARTITION BY` clause is a Apache Cloudberry extension. ### Partition of clause -Table partitioning via the `PARTITION OF` clause is a Cloudberry Database extension. +Table partitioning via the `PARTITION OF` clause is a Apache Cloudberry extension. ### Data distribution -The Cloudberry Database concept of a parallel or distributed database is not part of the SQL standard. The `DISTRIBUTED` clauses are extensions. +The Apache Cloudberry concept of a parallel or distributed database is not part of the SQL standard. The `DISTRIBUTED` clauses are extensions. ## See also diff --git a/docs/sql-stmts/create-tablespace.md b/docs/sql-stmts/create-tablespace.md index c128bd548a..61eb6418a1 100644 --- a/docs/sql-stmts/create-tablespace.md +++ b/docs/sql-stmts/create-tablespace.md @@ -17,13 +17,13 @@ CREATE TABLESPACE ## Description -`CREATE TABLESPACE` registers and configures a new tablespace for your Cloudberry Database system. The tablespace name must be distinct from the name of any existing tablespace in the system. A tablespace is a Cloudberry Database system object (a global object), you can use a tablespace from any database if you have appropriate privileges. +`CREATE TABLESPACE` registers and configures a new tablespace for your Apache Cloudberry system. The tablespace name must be distinct from the name of any existing tablespace in the system. A tablespace is a Apache Cloudberry system object (a global object), you can use a tablespace from any database if you have appropriate privileges. A tablespace allows superusers to define an alternative host file system location where the data files containing database objects (such as tables and indexes) reside. -A user with appropriate privileges can pass tablespace_name to [`CREATE DATABASE`](/docs/sql-stmts/create-database.md), [`CREATE TABLE`](/docs/sql-stmts/create-table.md), or [`CREATE INDEX`](/docs/sql-stmts/create-index.md) to direct Cloudberry Database to store the data files for these objects within the specified tablespace. +A user with appropriate privileges can pass tablespace_name to [`CREATE DATABASE`](/docs/sql-stmts/create-database.md), [`CREATE TABLE`](/docs/sql-stmts/create-table.md), or [`CREATE INDEX`](/docs/sql-stmts/create-index.md) to direct Apache Cloudberry to store the data files for these objects within the specified tablespace. -In Cloudberry Database, the file system location must exist on all hosts including the hosts running the coordinator, standby mirror, each primary segment, and each mirror segment. +In Apache Cloudberry, the file system location must exist on all hosts including the hosts running the coordinator, standby mirror, each primary segment, and each mirror segment. ## Parameters @@ -37,15 +37,15 @@ The name of the user who will own the tablespace. If omitted, defaults to the us **`LOCATION 'directory'`** -The directory that will be used for the tablespace. The directory should be empty and must be owned by the Cloudberry Database system user. You must specify the absolute path of the directory, and the path name must not be greater than 100 characters in length. (The location is used to create a symlink target in the pg_tblspc directory, and symlink targets are truncated to 100 characters when sending to `tar` from utilities such as `pg_basebackup`.) +The directory that will be used for the tablespace. The directory should be empty and must be owned by the Apache Cloudberry system user. You must specify the absolute path of the directory, and the path name must not be greater than 100 characters in length. (The location is used to create a symlink target in the pg_tblspc directory, and symlink targets are truncated to 100 characters when sending to `tar` from utilities such as `pg_basebackup`.) -You can specify a different tablespace directory for any Cloudberry Database segment instance in the `WITH` clause. +You can specify a different tablespace directory for any Apache Cloudberry segment instance in the `WITH` clause. **`contentID_i='directory_i'`** The value ID_i is the content ID for the segment instance. directory_i is the absolute path to the host system file location that the segment instance uses as the root directory for the tablespace. You cannot specify the content ID of the coordinator instance (`-1`). You can specify the same directory for multiple segments. -If a segment instance is not listed in the `WITH` clause, Cloudberry Database uses the tablespace directory specified in the `LOCATION` clause. +If a segment instance is not listed in the `WITH` clause, Apache Cloudberry uses the tablespace directory specified in the `LOCATION` clause. The restrictions identified for the `LOCATION` directory also hold for directory_i. @@ -55,14 +55,14 @@ A tablespace parameter to set or reset. Currently, the only available parameters ## Notes -Because `CREATE TABLESPACE` creates symbolic links from the `pg_tblspc` directory in the coordinator and segment instance data directory to the directories specified in the command, Cloudberry Database supports tablespaces only on systems that support symbolic links. +Because `CREATE TABLESPACE` creates symbolic links from the `pg_tblspc` directory in the coordinator and segment instance data directory to the directories specified in the command, Apache Cloudberry supports tablespaces only on systems that support symbolic links. You cannot run `CREATE TABLESPACE` inside a transaction block. When creating tablespaces, ensure that file system locations have sufficient I/O speed and available disk space. :::info -Cloudberry Database does not support different tablespace locations for a primary-mirror pair with the same content ID. It is only possible to configure different locations for different content IDs. Do not modify symbolic links under the `pg_tblspc` directory so that primary-mirror pairs point to different file locations; this will lead to erroneous behavior. +Apache Cloudberry does not support different tablespace locations for a primary-mirror pair with the same content ID. It is only possible to configure different locations for different content IDs. Do not modify symbolic links under the `pg_tblspc` directory so that primary-mirror pairs point to different file locations; this will lead to erroneous behavior. ::: ## Examples @@ -89,7 +89,7 @@ The example specifies the same location for the two segment instances. You can a ## Compatibility -`CREATE TABLESPACE` is a Cloudberry Database extension. +`CREATE TABLESPACE` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/create-transform.md b/docs/sql-stmts/create-transform.md index d0031bfcde..3ba5a8b905 100644 --- a/docs/sql-stmts/create-transform.md +++ b/docs/sql-stmts/create-transform.md @@ -88,7 +88,7 @@ In practice, these commands would be wrapped up in an extension. ## Compatibility -This form of `CREATE TRANSFORM` is a Cloudberry Database extension. There is a `CREATE TRANSFORM` command in the SQL standard, but it is for adapting data types to client languages. That usage is not supported by Cloudberry Database. +This form of `CREATE TRANSFORM` is a Apache Cloudberry extension. There is a `CREATE TRANSFORM` command in the SQL standard, but it is for adapting data types to client languages. That usage is not supported by Apache Cloudberry. ## See also diff --git a/docs/sql-stmts/create-trigger.md b/docs/sql-stmts/create-trigger.md index 75115fe4ad..cb55ae7d74 100644 --- a/docs/sql-stmts/create-trigger.md +++ b/docs/sql-stmts/create-trigger.md @@ -4,7 +4,7 @@ title: CREATE TRIGGER # CREATE TRIGGER -Defines a new trigger. User-defined triggers are not supported in Cloudberry Database. +Defines a new trigger. User-defined triggers are not supported in Apache Cloudberry. ## Synopsis @@ -18,8 +18,8 @@ CREATE TRIGGER {BEFORE | AFTER} { [OR ...]} `CREATE TRIGGER` creates a new trigger. The trigger will be associated with the specified table and will run the specified function when certain events occur. If multiple triggers of the same kind are defined for the same event, they will be fired in alphabetical order by name. ->*Important* Due to the distributed nature of a Cloudberry Database system, the use of triggers on data is very limited in Cloudberry Database. The function used in the trigger must be `IMMUTABLE`, meaning it cannot use information not directly present in its argument list. The function specified in the trigger also cannot run any SQL or modify distributed database objects in any way. Given that triggers are most often used to alter tables (for example, update these other rows when this row is updated), these limitations offer very little practical use of triggers in Cloudberry Database. For that reason, Cloudberry Database does not support the use of user-defined triggers in Cloudberry Database. Triggers cannot be used on append-optimized tables. -> Event Triggers, which capture only DDL events, _are_ supported in Cloudberry Database. See the PostgreSQL documentation for [Event Triggers](https://www.postgresql.org/docs/12/event-triggers.html) for additional information. +>*Important* Due to the distributed nature of a Apache Cloudberry system, the use of triggers on data is very limited in Apache Cloudberry. The function used in the trigger must be `IMMUTABLE`, meaning it cannot use information not directly present in its argument list. The function specified in the trigger also cannot run any SQL or modify distributed database objects in any way. Given that triggers are most often used to alter tables (for example, update these other rows when this row is updated), these limitations offer very little practical use of triggers in Apache Cloudberry. For that reason, Apache Cloudberry does not support the use of user-defined triggers in Apache Cloudberry. Triggers cannot be used on append-optimized tables. +> Event Triggers, which capture only DDL events, _are_ supported in Apache Cloudberry. See the PostgreSQL documentation for [Event Triggers](https://www.postgresql.org/docs/12/event-triggers.html) for additional information. [SELECT](/docs/sql-stmts/select.md) does not modify any rows so you can not create `SELECT` triggers. Rules and views are more appropriate in such cases. @@ -72,15 +72,15 @@ ON mytable FOR EACH STATEMENT EXECUTE PROCEDURE sendmail(); ## Compatibility -The `CREATE TRIGGER` statement in Cloudberry Database implements a subset of the SQL standard. The following functionality is currently missing: +The `CREATE TRIGGER` statement in Apache Cloudberry implements a subset of the SQL standard. The following functionality is currently missing: -- Cloudberry Database has strict limitations on the function that is called by a trigger, which makes the use of triggers very limited in Cloudberry Database. For this reason, triggers are not officially supported in Cloudberry Database. +- Apache Cloudberry has strict limitations on the function that is called by a trigger, which makes the use of triggers very limited in Apache Cloudberry. For this reason, triggers are not officially supported in Apache Cloudberry. - SQL allows triggers to fire on updates to specific columns (e.g., `AFTER UPDATE OF col1, col2`). -- SQL allows you to define aliases for the 'old' and 'new' rows or tables for use in the definition of the triggered action (e.g., `CREATE TRIGGER ... ON tablename REFERENCING OLD ROW AS somename NEW ROW AS othername ...`). Since Cloudberry Database allows trigger procedures to be written in any number of user-defined languages, access to the data is handled in a language-specific way. -- Cloudberry Database only allows the execution of a user-defined function for the triggered action. The standard allows the execution of a number of other SQL commands, such as `CREATE TABLE` as the triggered action. This limitation is not hard to work around by creating a user-defined function that runs the desired commands. -- SQL specifies that multiple triggers should be fired in time-of-creation order. Cloudberry Database uses name order, which was judged to be more convenient. -- SQL specifies that `BEFORE DELETE` triggers on cascaded deletes fire after the cascaded `DELETE` completes. The Cloudberry Database behavior is for `BEFORE DELETE` to always fire before the delete action, even a cascading one. This is considered more consistent. -- The ability to specify multiple actions for a single trigger using `OR` is a Cloudberry Database extension of the SQL standard. +- SQL allows you to define aliases for the 'old' and 'new' rows or tables for use in the definition of the triggered action (e.g., `CREATE TRIGGER ... ON tablename REFERENCING OLD ROW AS somename NEW ROW AS othername ...`). Since Apache Cloudberry allows trigger procedures to be written in any number of user-defined languages, access to the data is handled in a language-specific way. +- Apache Cloudberry only allows the execution of a user-defined function for the triggered action. The standard allows the execution of a number of other SQL commands, such as `CREATE TABLE` as the triggered action. This limitation is not hard to work around by creating a user-defined function that runs the desired commands. +- SQL specifies that multiple triggers should be fired in time-of-creation order. Apache Cloudberry uses name order, which was judged to be more convenient. +- SQL specifies that `BEFORE DELETE` triggers on cascaded deletes fire after the cascaded `DELETE` completes. The Apache Cloudberry behavior is for `BEFORE DELETE` to always fire before the delete action, even a cascading one. This is considered more consistent. +- The ability to specify multiple actions for a single trigger using `OR` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/create-type.md b/docs/sql-stmts/create-type.md index 81f4e39f5e..972fe3419a 100644 --- a/docs/sql-stmts/create-type.md +++ b/docs/sql-stmts/create-type.md @@ -83,7 +83,7 @@ The optional subtype_diff_function must take two values of the subtype type as a The fourth form of `CREATE TYPE` creates a new base type (scalar type). You must be a superuser to create a new base type. (This restriction is in place because an erroneous type definition could confuse or even crash the server.) -The parameters may appear in any order, not only that shown in the syntax, and most are optional. You must register two or more functions (using `CREATE FUNCTION`) before defining the type. The support functions input_function and output_function are required, while the functions receive_function, send_function, type_modifier_input_function, type_modifier_output_function, and analyze_function are optional. Generally these functions have to be coded in C or another low-level language. In Cloudberry Database, any function used to implement a data type must be defined as `IMMUTABLE`. +The parameters may appear in any order, not only that shown in the syntax, and most are optional. You must register two or more functions (using `CREATE FUNCTION`) before defining the type. The support functions input_function and output_function are required, while the functions receive_function, send_function, type_modifier_input_function, type_modifier_output_function, and analyze_function are optional. Generally these functions have to be coded in C or another low-level language. In Apache Cloudberry, any function used to implement a data type must be defined as `IMMUTABLE`. The input_function converts the type's external textual representation to the internal representation used by the operators and functions defined for the type. output_function performs the reverse transformation. The input function may be declared as taking one argument of type `cstring`, or as taking three arguments of types `cstring`, `oid`, `integer`. The first argument is the input text as a C string, the second argument is the type's own OID (except for array types, which instead receive their element type's OID), and the third is the `typmod` of the destination column, if known (`-1` will be passed if not). The input function must return a value of the data type itself. Usually, an input function should be declared `STRICT`; if it is not, it will be called with a `NULL` first parameter when reading a `NULL` input value. The function must still return `NULL` in this case, unless it raises an error. (This case is mainly meant to support domain input functions, which may need to reject `NULL` inputs.) The output function must be declared as taking one argument of the new data type. The output function must return type `cstring`. Output functions are not invoked for `NULL` values. @@ -91,11 +91,11 @@ The optional receive_function converts the type's external binary representation How can the input and output functions be declared to have results or arguments of the new type, when they have to be created before the new type can be created? The type should first be defined as a shell type, which is a placeholder type that has no properties except a name and an owner. This is done by issuing the command `CREATE TYPE name`, with no additional parameters. Then the I/O functions can be defined referencing the shell type. Finally, `CREATE TYPE` with a full definition replaces the shell entry with a complete, valid type definition, after which the new type can be used normally. -The optional type_modifier_input_function and type_modifier_output_function are required if the type supports modifiers. Modifiers are optional constraints attached to a type declaration, such as `char(5)` or `numeric(30,2)`. While Cloudberry Database allows user-defined types to take one or more simple constants or identifiers as modifiers, this information must fit into a single non-negative integer value for storage in the system catalogs. Cloudberry Database passes the declared modifier(s) to the type_modifier_input_function in the form of a `cstring` array. The modifier input function must check the values for validity, throwing an error if they are incorrect. If the values are correct, the modifier input function returns a single non-negative integer value that Cloudberry Database stores as the column `typmod`. Type modifiers are rejected if the type was not defined with a type_modifier_input_function. The type_modifier_output_function converts the internal integer `typmod` value back to the correct form for user display. The modifier output function must return a `cstring` value that is the exact string to append to the type name. For example, `numeric`'s function might return `(30,2)`. The type_modifier_output_function is optional. When not specified, the default display format is the stored `typmod` integer value enclosed in parentheses. +The optional type_modifier_input_function and type_modifier_output_function are required if the type supports modifiers. Modifiers are optional constraints attached to a type declaration, such as `char(5)` or `numeric(30,2)`. While Apache Cloudberry allows user-defined types to take one or more simple constants or identifiers as modifiers, this information must fit into a single non-negative integer value for storage in the system catalogs. Apache Cloudberry passes the declared modifier(s) to the type_modifier_input_function in the form of a `cstring` array. The modifier input function must check the values for validity, throwing an error if they are incorrect. If the values are correct, the modifier input function returns a single non-negative integer value that Apache Cloudberry stores as the column `typmod`. Type modifiers are rejected if the type was not defined with a type_modifier_input_function. The type_modifier_output_function converts the internal integer `typmod` value back to the correct form for user display. The modifier output function must return a `cstring` value that is the exact string to append to the type name. For example, `numeric`'s function might return `(30,2)`. The type_modifier_output_function is optional. When not specified, the default display format is the stored `typmod` integer value enclosed in parentheses. The optional analyze_function performs type-specific statistics collection for columns of the data type. By default, `ANALYZE` attempts to gather statistics using the type's "equals" and "less-than" operators, if there is a default b-tree operator class for the type. For non-scalar types this behavior is likely to be unsuitable, so it can be overridden by specifying a custom analysis function. The analysis function must be declared to take a single argument of type `internal`, and return a `boolean` result. -While the details of the new type's internal representation are only known to the I/O functions and other functions you create to work with the type, there are several properties of the internal representation that must be declared to Cloudberry Database. Foremost of these is internallength. Base data types can be fixed-length, in which case internallength is a positive integer, or variable length, indicated by setting internallength to `VARIABLE`. (Internally, this is represented by setting `typlen` to `-1`.) The internal representation of all variable-length types must start with a 4-byte integer giving the total length of this value of the type. +While the details of the new type's internal representation are only known to the I/O functions and other functions you create to work with the type, there are several properties of the internal representation that must be declared to Apache Cloudberry. Foremost of these is internallength. Base data types can be fixed-length, in which case internallength is a positive integer, or variable length, indicated by setting internallength to `VARIABLE`. (Internally, this is represented by setting `typlen` to `-1`.) The internal representation of all variable-length types must start with a 4-byte integer giving the total length of this value of the type. The optional flag `PASSEDBYVALUE` indicates that values of this data type are passed by value, rather than by reference. You may not pass by value types whose internal representation is larger than the size of the `Datum` type (4 bytes on most machines, 8 bytes on a few). @@ -107,7 +107,7 @@ All storage values other than `plain` imply that the functions of the data type The like_type parameter provides an alternative method for specifying the basic representation properties of a data type: copy them from some existing type. The values `internallength`, `passedbyvalue`, `alignment`, and `storage` are copied from the named type. (It is possible, though usually undesirable, to override some of these values by specifying them along with the `LIKE` clause.) Specifying representation this way is especially useful when the low-level implementation of the new type "piggybacks" on an existing type in some fashion. -The *category* and *preferred* parameters can be used to help control which implicit cast Cloudberry Database applies in ambiguous situations. Each data type belongs to a category named by a single ASCII character, and each type is either "preferred" or not within its category. The parser will prefer casting to preferred types (but only from other types within the same category) when this rule helps resolve overloaded functions or operators. For types that have no implicit casts to or from any other types, it is sufficient to retain the default settings. However, for a group of related types that have implicit casts, it is often helpful to mark them all as belonging to a category and select one or two of the "most general" types as being preferred within the category. The *category* parameter is especially useful when you add a user-defined type to an existing built-in category, such as the numeric or string types. It is also possible to create new entirely-user-defined type categories. Select any ASCII character other than an upper-case letter to name such a category. +The *category* and *preferred* parameters can be used to help control which implicit cast Apache Cloudberry applies in ambiguous situations. Each data type belongs to a category named by a single ASCII character, and each type is either "preferred" or not within its category. The parser will prefer casting to preferred types (but only from other types within the same category) when this rule helps resolve overloaded functions or operators. For types that have no implicit casts to or from any other types, it is sufficient to retain the default settings. However, for a group of related types that have implicit casts, it is often helpful to mark them all as belonging to a category and select one or two of the "most general" types as being preferred within the category. The *category* parameter is especially useful when you add a user-defined type to an existing built-in category, such as the numeric or string types. It is also possible to create new entirely-user-defined type categories. Select any ASCII character other than an upper-case letter to name such a category. A default value may be specified, in case a user wants columns of the data type to default to something other than the null value. Specify the default with the `DEFAULT` key word. (Such a default may be overridden by an explicit `DEFAULT` clause attached to a particular column.) @@ -119,7 +119,7 @@ If the optional Boolean parameter collatable is true, column definitions and exp **Array Types** -Whenever a user-defined type is created, Cloudberry Database automatically creates an associated array type, whose name consists of the element type's name prepended with an underscore, and truncated if necessary to keep it less than `NAMEDATALEN` bytes long. (If the name so generated collides with an existing type name, the process is repeated until a non-colliding name is found.) This implicitly-created array type is variable length and uses the built-in input and output functions `array_in` and `array_out`. The array type tracks any changes in its element type's owner or schema, and is dropped if the element type is. +Whenever a user-defined type is created, Apache Cloudberry automatically creates an associated array type, whose name consists of the element type's name prepended with an underscore, and truncated if necessary to keep it less than `NAMEDATALEN` bytes long. (If the name so generated collides with an existing type name, the process is repeated until a non-colliding name is found.) This implicitly-created array type is variable length and uses the built-in input and output functions `array_in` and `array_out`. The array type tracks any changes in its element type's owner or schema, and is dropped if the element type is. Why is there is an `ELEMENT` option, when the system makes the correct array type automatically? The only case where it's useful to use `ELEMENT` is when you are making a fixed-length type that happens to be internally an array of a number of identical things, and you want to allow these things to be accessed directly by subscripting, in addition to whatever operations you plan to provide for the type as a whole. For example, type `point` is represented as just two floating-point numbers, each can be accessed using `point[0]` and `point[1]`. Note that this facility only works for fixed-length types whose internal form is exactly a sequence of identical fixed-length fields. A subscriptable variable-length type must have the generalized internal representation used by `array_in` and `array_out`. For historical reasons, subscripting of fixed-length array types starts from zero, rather than from one as for variable-length arrays. @@ -245,7 +245,7 @@ Set to the size, in bytes, for each block in the column. The `BLOCKSIZE` must be User-defined type names cannot begin with the underscore character (_) and can only be 62 characters long (or in general `NAMEDATALEN - 2`, rather than the `NAMEDATALEN - 1` characters allowed for other names). Type names beginning with underscore are reserved for internally-created array type names. -Cloudberry Database does not support adding storage options for row or composite types. +Apache Cloudberry does not support adding storage options for row or composite types. Storage options defined at the table- and column- level override the default storage options defined for a scalar type. @@ -339,9 +339,9 @@ CREATE TABLE big_objs ( ## Compatibility -The first form of the `CREATE TYPE` command, which creates a composite type, conforms to the SQL standard. The other forms are Cloudberry Database extensions. The `CREATE TYPE` statement in the SQL standard also defines other forms that are not implemented in Cloudberry Database. +The first form of the `CREATE TYPE` command, which creates a composite type, conforms to the SQL standard. The other forms are Apache Cloudberry extensions. The `CREATE TYPE` statement in the SQL standard also defines other forms that are not implemented in Apache Cloudberry. -The ability to create a composite type with zero attributes is a Cloudberry Database-specific deviation from the standard (analogous to the same case in `CREATE TABLE`). +The ability to create a composite type with zero attributes is a Apache Cloudberry-specific deviation from the standard (analogous to the same case in `CREATE TABLE`). ## See also diff --git a/docs/sql-stmts/create-user-mapping.md b/docs/sql-stmts/create-user-mapping.md index e66ff9cea1..d75303712a 100644 --- a/docs/sql-stmts/create-user-mapping.md +++ b/docs/sql-stmts/create-user-mapping.md @@ -24,15 +24,15 @@ The owner of a foreign server can create user mappings for that server for any u **`IF NOT EXISTS`** -Do not throw an error if a mapping of the given user to the given foreign server already exists. Cloudberry Database issues a notice in this case. Note that there is no guarantee that the existing user mapping is anything like the one that would have been created. +Do not throw an error if a mapping of the given user to the given foreign server already exists. Apache Cloudberry issues a notice in this case. Note that there is no guarantee that the existing user mapping is anything like the one that would have been created. **`user_name`** -The name of an existing user that is mapped to the foreign server. `CURRENT_USER` and `USER` match the name of the current user. When `PUBLIC` is specified, Cloudberry Database creates a so-called public mapping that is used when no user-specific mapping is applicable. +The name of an existing user that is mapped to the foreign server. `CURRENT_USER` and `USER` match the name of the current user. When `PUBLIC` is specified, Apache Cloudberry creates a so-called public mapping that is used when no user-specific mapping is applicable. **`server_name`** -The name of an existing server for which Cloudberry Database is to create the user mapping. +The name of an existing server for which Apache Cloudberry is to create the user mapping. **`OPTIONS ( option 'value' [, ... ] )`** diff --git a/docs/sql-stmts/create-user.md b/docs/sql-stmts/create-user.md index 7d6cc3667e..e7faa83b85 100644 --- a/docs/sql-stmts/create-user.md +++ b/docs/sql-stmts/create-user.md @@ -45,7 +45,7 @@ where option can be: ## Compatibility -The `CREATE USER` statement is a Cloudberry Database extension. The SQL standard leaves the definition of users to the implementation. +The `CREATE USER` statement is a Apache Cloudberry extension. The SQL standard leaves the definition of users to the implementation. ## See also diff --git a/docs/sql-stmts/create-view.md b/docs/sql-stmts/create-view.md index 959565ef9f..1b86119589 100644 --- a/docs/sql-stmts/create-view.md +++ b/docs/sql-stmts/create-view.md @@ -17,7 +17,7 @@ CREATE [OR REPLACE] [TEMP | TEMPORARY] [RECURSIVE] VIEW [ ( ## Description -`CREATE VIEW` defines a view of a query. The view is not physically materialized. Instead, Cloudberry Database runs the query every time the view is referenced in a query. +`CREATE VIEW` defines a view of a query. The view is not physically materialized. Instead, Apache Cloudberry runs the query every time the view is referenced in a query. `CREATE OR REPLACE VIEW` is similar, but if a view of the same name already exists, it is replaced. The new query must generate the same columns that were generated by the existing view query (that is, the same column names in the same order, and with the same data types), but it may add additional columns to the end of the list. The calculations giving rise to the output columns may be completely different. @@ -73,7 +73,7 @@ A [SELECT](/docs/sql-stmts/select.md) or [VALUES](/docs/sql-stmts/values.md) com **`WITH [ CASCADED | LOCAL ] CHECK OPTION`** -This option controls the behavior of automatically updatable views. When this option is specified, `INSERT` and `UPDATE` commands on the view will be checked to ensure that new rows satisfy the view-defining condition (that is, the new rows are checked to ensure that they are visible through the view). If they are not, Cloudberry Database rejects the update. If the `CHECK OPTION` is not specified, `INSERT` and `UPDATE` commands on the view are allowed to create rows that are not visible through the view. The following check options are supported: +This option controls the behavior of automatically updatable views. When this option is specified, `INSERT` and `UPDATE` commands on the view will be checked to ensure that new rows satisfy the view-defining condition (that is, the new rows are checked to ensure that they are visible through the view). If they are not, Apache Cloudberry rejects the update. If the `CHECK OPTION` is not specified, `INSERT` and `UPDATE` commands on the view are allowed to create rows that are not visible through the view. The following check options are supported: **`LOCAL`** @@ -105,13 +105,13 @@ Access to tables referenced in the view is determined by permissions of the view Functions called in the view are treated the same as if they had been called directly from the query using the view. Therefore the user of a view must have permissions to call any functions used by the view. -When both `CREATE VIEW ...` and `SELECT ... FROM ` specify an `ORDER BY` clause, Cloudberry Database ignores the `ORDER BY` clause in the `CREATE VIEW` statement. +When both `CREATE VIEW ...` and `SELECT ... FROM ` specify an `ORDER BY` clause, Apache Cloudberry ignores the `ORDER BY` clause in the `CREATE VIEW` statement. When `CREATE OR REPLACE VIEW` is used on an existing view, only the view's defining `SELECT` rule is changed. Other view properties, including ownership, permissions, and non-`SELECT` rules, remain unchanged. You must own the view to replace it (this includes being a member of the owning role). ## Updatable views -Simple views are automatically updatable: Cloudberry Database allows you to invoke `INSERT`, `UPDATE`, and `DELETE` statements on the view in the same way as on a regular table. A view is automatically updatable if it satisfies all of the following conditions: +Simple views are automatically updatable: Apache Cloudberry allows you to invoke `INSERT`, `UPDATE`, and `DELETE` statements on the view in the same way as on a regular table. A view is automatically updatable if it satisfies all of the following conditions: - The view must have exactly one entry in its `FROM` list, which must be a table or another updatable view. @@ -121,7 +121,7 @@ Simple views are automatically updatable: Cloudberry Database allows you to invo - The view's select list must not contain any aggregates, window functions, or set-returning functions. -An automatically updatable view may contain a mix of updatable and non-updatable columns. A column is updatable if it is a simple reference to an updatable column of the underlying base relation; otherwise the column is read-only, and Cloudberry Database raises an error if an `INSERT` or `UPDATE` statement attempts to assign a value to it. +An automatically updatable view may contain a mix of updatable and non-updatable columns. A column is updatable if it is a simple reference to an updatable column of the underlying base relation; otherwise the column is read-only, and Apache Cloudberry raises an error if an `INSERT` or `UPDATE` statement attempts to assign a value to it. If the view is automatically updatable the system will convert any `INSERT`, `UPDATE`, or `DELETE` statement on the view into the corresponding statement on the underlying base relation. `INSERT` statements that have an `ON CONFLICT UPDATE` clause are fully supported. @@ -206,7 +206,7 @@ Notice that although the recursive view's name is schema-qualified in this `CREA ## Compatibility -`CREATE OR REPLACE VIEW` is a Cloudberry Database extension. So is the concept of a temporary view. The `WITH ( ... )` clause is an extension as well. +`CREATE OR REPLACE VIEW` is a Apache Cloudberry extension. So is the concept of a temporary view. The `WITH ( ... )` clause is an extension as well. ## See also diff --git a/docs/sql-stmts/declare.md b/docs/sql-stmts/declare.md index f90652613e..03e8b329fb 100644 --- a/docs/sql-stmts/declare.md +++ b/docs/sql-stmts/declare.md @@ -32,9 +32,9 @@ A cursor can be specified in the `WHERE CURRENT OF` clause of the [UPDATE](/docs **Parallel Retrieve Cursors** -Cloudberry Database supports a special type of cursor, a *parallel retrieve cursor*. You can use a parallel retrieve cursor to retrieve query results, in parallel, directly from the Cloudberry Database segments, bypassing the Cloudberry Database coordinator. +Apache Cloudberry supports a special type of cursor, a *parallel retrieve cursor*. You can use a parallel retrieve cursor to retrieve query results, in parallel, directly from the Apache Cloudberry segments, bypassing the Apache Cloudberry coordinator. -Parallel retrieve cursors do not support the `WITH HOLD` clause. Cloudberry Database ignores the `BINARY` clause when you declare a parallel retrieve cursor. +Parallel retrieve cursors do not support the `WITH HOLD` clause. Apache Cloudberry ignores the `BINARY` clause when you declare a parallel retrieve cursor. You open a special retrieve session to each parallel retrieve cursor endpoint, and use the [RETRIEVE](/docs/sql-stmts/retrieve.md) command to retrieve the query results from a parallel retrieve cursor. @@ -48,26 +48,26 @@ The name of the cursor to be created. Causes the cursor to return data in binary rather than in text format. -> **Note** Cloudberry Database ignores the `BINARY` clause when you declare a `PARALLEL RETRIEVE` cursor. +> **Note** Apache Cloudberry ignores the `BINARY` clause when you declare a `PARALLEL RETRIEVE` cursor. **`INSENSITIVE`** -Indicates that data retrieved from the cursor should be unaffected by updates to the table(s) underlying the cursor that occur after the cursor is created. In Cloudberry Database, all cursors are insensitive. This key word currently has no effect and is present only for compatibility with the SQL standard. +Indicates that data retrieved from the cursor should be unaffected by updates to the table(s) underlying the cursor that occur after the cursor is created. In Apache Cloudberry, all cursors are insensitive. This key word currently has no effect and is present only for compatibility with the SQL standard. **`NO SCROLL`** -The cursor cannot be used to retrieve rows in a nonsequential fashion. This is the default behavior in Cloudberry Database; scrollable cursors (`SCROLL`) are not supported. +The cursor cannot be used to retrieve rows in a nonsequential fashion. This is the default behavior in Apache Cloudberry; scrollable cursors (`SCROLL`) are not supported. **`PARALLEL RETRIEVE`** -Declare a parallel retrieve cursor. A parallel retrieve cursor is a special type of cursor that you can use to retrieve results directly from Cloudberry Database segments, in parallel. +Declare a parallel retrieve cursor. A parallel retrieve cursor is a special type of cursor that you can use to retrieve results directly from Apache Cloudberry segments, in parallel. **`WITH HOLD`**
**`WITHOUT HOLD`** `WITH HOLD` specifies that the cursor may continue to be used after the transaction that created it successfully commits. `WITHOUT HOLD` specifies that the cursor cannot be used outside of the transaction that created it. `WITHOUT HOLD` is the default. -> **Note** Cloudberry Database does not support declaring a `PARALLEL RETRIEVE` cursor with the `WITH HOLD` clause. `WITH HOLD` also cannot not be specified when the `query` includes a `FOR UPDATE` or `FOR SHARE` clause. +> **Note** Apache Cloudberry does not support declaring a `PARALLEL RETRIEVE` cursor with the `WITH HOLD` clause. `WITH HOLD` also cannot not be specified when the `query` includes a `FOR UPDATE` or `FOR SHARE` clause. **`query`** @@ -95,13 +95,13 @@ The key words `BINARY`, `INSENSITIVE`, and `NO SCROLL` can appear in any order. ## Notes -Unless `WITH HOLD` is specified, the cursor created by this command can only be used within the current transaction. Thus, `DECLARE` without `WITH HOLD` is useless outside a transaction block: the cursor would survive only to the completion of the statement. Therefore Cloudberry Database reports an error if this command is used outside a transaction block. Use `BEGIN` and `COMMIT` (or `ROLLBACK`) to define a transaction block. +Unless `WITH HOLD` is specified, the cursor created by this command can only be used within the current transaction. Thus, `DECLARE` without `WITH HOLD` is useless outside a transaction block: the cursor would survive only to the completion of the statement. Therefore Apache Cloudberry reports an error if this command is used outside a transaction block. Use `BEGIN` and `COMMIT` (or `ROLLBACK`) to define a transaction block. If `WITH HOLD` is specified and the transaction that created the cursor successfully commits, the cursor can continue to be accessed by subsequent transactions in the same session. (But if the creating transaction ends prematurely, the cursor is removed.) A cursor created with `WITH HOLD` is closed when an explicit `CLOSE` command is issued on it, or the session ends. In the current implementation, the rows represented by a held cursor are copied into a temporary file or memory area so that they remain available for subsequent transactions. If you create a cursor with the `DECLARE` command in a transaction, you cannot use the `SET` command in the transaction until you close the cursor with the `CLOSE` command. -Scrollable cursors are not currently supported in Cloudberry Database. You can only use `FETCH` or `RETRIEVE` to move the cursor position forward, not backwards. +Scrollable cursors are not currently supported in Apache Cloudberry. You can only use `FETCH` or `RETRIEVE` to move the cursor position forward, not backwards. `DECLARE...FOR UPDATE` is not supported with append-optimized tables. @@ -123,13 +123,13 @@ DECLARE myprcursor PARALLEL RETRIEVE CURSOR FOR SELECT * FROM mytable; ## Compatibility -SQL standard allows cursors only in embedded SQL and in modules. Cloudberry Database permits cursors to be used interactively. +SQL standard allows cursors only in embedded SQL and in modules. Apache Cloudberry permits cursors to be used interactively. -Cloudberry Database does not implement an `OPEN` statement for cursors. A cursor is considered to be open when it is declared. +Apache Cloudberry does not implement an `OPEN` statement for cursors. A cursor is considered to be open when it is declared. -The SQL standard allows cursors to move both forward and backward. All Cloudberry Database cursors are forward moving only (not scrollable). +The SQL standard allows cursors to move both forward and backward. All Apache Cloudberry cursors are forward moving only (not scrollable). -Binary cursors are a Cloudberry Database extension. +Binary cursors are a Apache Cloudberry extension. The SQL standard makes no provisions for parallel retrieve cursors. diff --git a/docs/sql-stmts/delete.md b/docs/sql-stmts/delete.md index e301bc8aab..799114bd93 100644 --- a/docs/sql-stmts/delete.md +++ b/docs/sql-stmts/delete.md @@ -30,7 +30,7 @@ You must have the `DELETE` privilege on the table to delete from it, as well as > **Note** The `RETURNING` clause is not supported when deleting from append-optimized tables. -> **Note** As the default, Cloudberry Database acquires an `EXCLUSIVE` lock on tables for `DELETE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for `DELETE` operations on heap tables is `ROW EXCLUSIVE`. +> **Note** As the default, Apache Cloudberry acquires an `EXCLUSIVE` lock on tables for `DELETE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for `DELETE` operations on heap tables is `ROW EXCLUSIVE`. ## Parameters @@ -44,7 +44,7 @@ See [SELECT](/docs/sql-stmts/select.md) for details. **`table_name`** -The name (optionally schema-qualified) of the table to delete rows from. If you specify `ONLY` before the table name, Cloudberry Database deletes matching rows from the named table only. If `ONLY` is not specified, matching rows are also deleted from any tables inheriting from the named table. Optionally, you can specify `*` after the table name to explicitly indicate that descendant tables are included. +The name (optionally schema-qualified) of the table to delete rows from. If you specify `ONLY` before the table name, Apache Cloudberry deletes matching rows from the named table only. If `ONLY` is not specified, matching rows are also deleted from any tables inheriting from the named table. Optionally, you can specify `*` after the table name to explicitly indicate that descendant tables are included. **`alias`** @@ -56,7 +56,7 @@ A table expression allowing columns from other tables to appear in the `WHERE` c **`condition`** -An expression that returns a value of type `boolean`. Cloudberry Database deletes only those rows for which this expression returns `true`. +An expression that returns a value of type `boolean`. Apache Cloudberry deletes only those rows for which this expression returns `true`. **`cursor_name`** @@ -90,7 +90,7 @@ The `RETURNING` clause is not supported when deleting from append-optimized tabl The `WHERE CURRENT OF` clause is not supported with replicated tables. -Cloudberry Database lets you reference columns of other tables in the `WHERE` condition by specifying the other tables in the `USING` clause. For example, to delete all films produced by a given producer, one can run: +Apache Cloudberry lets you reference columns of other tables in the `WHERE` condition by specifying the other tables in the `USING` clause. For example, to delete all films produced by a given producer, one can run: ```sql DELETE FROM films USING producers @@ -140,7 +140,7 @@ name = 'Hannah'; ## Compatibility -This command conforms to the SQL standard, except that the `USING` and `RETURNING` clauses are Cloudberry Database extensions, as is the ability to use `WITH` with `DELETE`. +This command conforms to the SQL standard, except that the `USING` and `RETURNING` clauses are Apache Cloudberry extensions, as is the ability to use `WITH` with `DELETE`. ## See also diff --git a/docs/sql-stmts/discard.md b/docs/sql-stmts/discard.md index 5d21a3859b..714d0bcb94 100644 --- a/docs/sql-stmts/discard.md +++ b/docs/sql-stmts/discard.md @@ -16,7 +16,7 @@ DISCARD { ALL | PLANS | SEQUENCES | TEMPORARY | TEMP } `DISCARD` releases internal resources associated with a database session. This command is useful for partially or fully resetting the session's state. There are several subcommands to release different types of resources; the `DISCARD ALL` variant subsumes all the others, and also resets additional state. -Cloudberry Database does not support invoking `DISCARD ALL` in a transaction. +Apache Cloudberry does not support invoking `DISCARD ALL` in a transaction. ## Parameters @@ -54,4 +54,4 @@ Releases all temporary resources associated with the current session and resets ## Compatibility -`DISCARD` is a Cloudberry Database extension. +`DISCARD` is a Apache Cloudberry extension. diff --git a/docs/sql-stmts/do.md b/docs/sql-stmts/do.md index c15c9c5ec4..5b46978f2c 100644 --- a/docs/sql-stmts/do.md +++ b/docs/sql-stmts/do.md @@ -38,7 +38,7 @@ The name of the procedural language in which the code is written. The default is ## Notes -The procedural language to be used must already have been installed into the current database by means of `CREATE EXTENSION`. The PL/pgSQL language is installed wih Cloudberry Database and is registered by default every user-created database. The PL/Python and PL/Perl languages are installed by default, but not registered. Other languages are neither installed nor registered. The pg_language system catalog contains information about the registered languages in a database. +The procedural language to be used must already have been installed into the current database by means of `CREATE EXTENSION`. The PL/pgSQL language is installed wih Apache Cloudberry and is registered by default every user-created database. The PL/Python and PL/Perl languages are installed by default, but not registered. Other languages are neither installed nor registered. The pg_language system catalog contains information about the registered languages in a database. The user must have `USAGE` privilege for the procedural language, or must be a superuser if the language is untrusted. This is the same privilege requirement as for creating a function in the language. diff --git a/docs/sql-stmts/drop-access-method.md b/docs/sql-stmts/drop-access-method.md index 4f57a3b394..6a63f27d7d 100644 --- a/docs/sql-stmts/drop-access-method.md +++ b/docs/sql-stmts/drop-access-method.md @@ -20,7 +20,7 @@ DROP ACCESS METHOD [IF EXISTS] [CASCADE | RESTRICT] **`IF EXISTS`** -Do not throw an error if the access method does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the access method does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP ACCESS METHOD heptree; ## Compatibility -`DROP ACCESS METHOD` is a Cloudberry Database extension. +`DROP ACCESS METHOD` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-collation.md b/docs/sql-stmts/drop-collation.md index eee86e6339..6b94deadd4 100644 --- a/docs/sql-stmts/drop-collation.md +++ b/docs/sql-stmts/drop-collation.md @@ -44,7 +44,7 @@ DROP COLLATION german; ## Compatibility -The `DROP COLLATION` command conforms to the SQL standard, apart from the `IF EXISTS` option, which is a Cloudberry Database extension. +The `DROP COLLATION` command conforms to the SQL standard, apart from the `IF EXISTS` option, which is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-conversion.md b/docs/sql-stmts/drop-conversion.md index d5935ebcaf..8ac1d7dd79 100644 --- a/docs/sql-stmts/drop-conversion.md +++ b/docs/sql-stmts/drop-conversion.md @@ -41,7 +41,7 @@ DROP CONVERSION myname; ## Compatibility -There is no `DROP CONVERSION` statement in the SQL standard. The standard has `CREATE TRANSLATION` and `DROP TRANSLATION` statements that are similar to the Cloudberry Database `CREATE CONVERSION` and `DROP CONVERSION` statements. +There is no `DROP CONVERSION` statement in the SQL standard. The standard has `CREATE TRANSLATION` and `DROP TRANSLATION` statements that are similar to the Apache Cloudberry `CREATE CONVERSION` and `DROP CONVERSION` statements. ## See also diff --git a/docs/sql-stmts/drop-domain.md b/docs/sql-stmts/drop-domain.md index 7455e87037..48a45007f6 100644 --- a/docs/sql-stmts/drop-domain.md +++ b/docs/sql-stmts/drop-domain.md @@ -44,7 +44,7 @@ DROP DOMAIN us_postal_code; ## Compatibility -This command conforms to the SQL standard, except for the `IF EXISTS` option, which is a Cloudberry Database extension. +This command conforms to the SQL standard, except for the `IF EXISTS` option, which is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-extension.md b/docs/sql-stmts/drop-extension.md index 4ae9a18b76..f25351981f 100644 --- a/docs/sql-stmts/drop-extension.md +++ b/docs/sql-stmts/drop-extension.md @@ -4,7 +4,7 @@ title: DROP EXTENSION # DROP EXTENSION -Removes an extension from a Cloudberry Database. +Removes an extension from a Apache Cloudberry. ## Synopsis @@ -17,7 +17,7 @@ DROP EXTENSION [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] `DROP EXTENSION` removes extensions from the database. Dropping an extension causes its component objects to be dropped as well. :::info -The supporting extension files that were installed to create the extension (for example, the library and `.control` files) are not deleted. The files must be manually removed from the Cloudberry Database hosts. +The supporting extension files that were installed to create the extension (for example, the library and `.control` files) are not deleted. The files must be manually removed from the Apache Cloudberry hosts. ::: You must own the extension to use `DROP EXTENSION`. @@ -26,7 +26,7 @@ You must own the extension to use `DROP EXTENSION`. **`IF EXISTS`** -Do not throw an error if the extension does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the extension does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -56,7 +56,7 @@ This command fails if any of the extension objects are in use in the database. F ## Compatibility -`DROP EXTENSION` is a Cloudberry Database extension. +`DROP EXTENSION` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-external-table.md b/docs/sql-stmts/drop-external-table.md index bb29c3f4e8..ad523a421d 100644 --- a/docs/sql-stmts/drop-external-table.md +++ b/docs/sql-stmts/drop-external-table.md @@ -24,7 +24,7 @@ Optional keyword for dropping external web tables. **`IF EXISTS`** -Do not throw an error if the external table does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the external table does not exist. Apache Cloudberry issues a notice in this case. **`name`** diff --git a/docs/sql-stmts/drop-foreign-data-wrapper.md b/docs/sql-stmts/drop-foreign-data-wrapper.md index 2d8a1c763e..3446cf3d6d 100644 --- a/docs/sql-stmts/drop-foreign-data-wrapper.md +++ b/docs/sql-stmts/drop-foreign-data-wrapper.md @@ -20,7 +20,7 @@ DROP FOREIGN DATA WRAPPER [ IF EXISTS ] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the foreign-data wrapper does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the foreign-data wrapper does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP FOREIGN DATA WRAPPER dbi; ## Compatibility -`DROP FOREIGN DATA WRAPPER` conforms to ISO/IEC 9075-9 (SQL/MED). The `IF EXISTS` clause is a Cloudberry Database extension. +`DROP FOREIGN DATA WRAPPER` conforms to ISO/IEC 9075-9 (SQL/MED). The `IF EXISTS` clause is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-foreign-table.md b/docs/sql-stmts/drop-foreign-table.md index a879c0bcda..8708d7edf8 100644 --- a/docs/sql-stmts/drop-foreign-table.md +++ b/docs/sql-stmts/drop-foreign-table.md @@ -20,7 +20,7 @@ DROP FOREIGN TABLE [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the foreign table does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the foreign table does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP FOREIGN TABLE films, distributors; ## Compatibility -`DROP FOREIGN TABLE` conforms to ISO/IEC 9075-9 (SQL/MED), except that the standard only allows one foreign table to be dropped per command. The `IF EXISTS` clause is a Cloudberry Database extension. +`DROP FOREIGN TABLE` conforms to ISO/IEC 9075-9 (SQL/MED), except that the standard only allows one foreign table to be dropped per command. The `IF EXISTS` clause is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-index.md b/docs/sql-stmts/drop-index.md index 2cc6c9ae84..29ed499354 100644 --- a/docs/sql-stmts/drop-index.md +++ b/docs/sql-stmts/drop-index.md @@ -50,7 +50,7 @@ DROP INDEX title_idx; ## Compatibility -`DROP INDEX` is a Cloudberry Database language extension. There are no provisions for indexes in the SQL standard. +`DROP INDEX` is a Apache Cloudberry language extension. There are no provisions for indexes in the SQL standard. ## See also diff --git a/docs/sql-stmts/drop-materialized-view.md b/docs/sql-stmts/drop-materialized-view.md index 0aaedd6f31..8a93c3fd5b 100644 --- a/docs/sql-stmts/drop-materialized-view.md +++ b/docs/sql-stmts/drop-materialized-view.md @@ -44,7 +44,7 @@ DROP MATERIALIZED VIEW order_summary; ## Compatibility -`DROP MATERIALIZED VIEW` is a Cloudberry Database extension of the SQL standard. +`DROP MATERIALIZED VIEW` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/drop-owned.md b/docs/sql-stmts/drop-owned.md index e0edc06a6b..2d0f757af0 100644 --- a/docs/sql-stmts/drop-owned.md +++ b/docs/sql-stmts/drop-owned.md @@ -50,7 +50,7 @@ DROP OWNED BY sally; ## Compatibility -The `DROP OWNED` command is a Cloudberry Database extension. +The `DROP OWNED` command is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-policy.md b/docs/sql-stmts/drop-policy.md index b4c981a2e6..6575d5324a 100644 --- a/docs/sql-stmts/drop-policy.md +++ b/docs/sql-stmts/drop-policy.md @@ -45,7 +45,7 @@ DROP POLICY p1 ON my_table; ## Compatibility -`DROP POLICY` is a Cloudberry Database extension to the SQL standard. +`DROP POLICY` is a Apache Cloudberry extension to the SQL standard. ## See also diff --git a/docs/sql-stmts/drop-procedure.md b/docs/sql-stmts/drop-procedure.md index ab8d4dfe1b..591cbcae77 100644 --- a/docs/sql-stmts/drop-procedure.md +++ b/docs/sql-stmts/drop-procedure.md @@ -57,7 +57,7 @@ DROP PROCEDURE do_db_maintenance(); ## Compatibility -This command conforms to the SQL standard, with these Cloudberry Database extensions: +This command conforms to the SQL standard, with these Apache Cloudberry extensions: - The standard only allows one procedure to be dropped per command. diff --git a/docs/sql-stmts/drop-protocol.md b/docs/sql-stmts/drop-protocol.md index 8e6e4c55f0..65cd0c2778 100644 --- a/docs/sql-stmts/drop-protocol.md +++ b/docs/sql-stmts/drop-protocol.md @@ -24,7 +24,7 @@ You must be a superuser or the protocol owner to drop a protocol. **`IF EXISTS`** -Do not throw an error if the protocol does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the protocol does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -34,11 +34,11 @@ The name of an existing data access protocol. Dropping a data access protocol, does not drop the protocol's call handlers. You must drop these functions manually. -Be sure to remove any shared libraries that were used by the protocol from the Cloudberry Database hosts. +Be sure to remove any shared libraries that were used by the protocol from the Apache Cloudberry hosts. ## Compatibility -`DROP PROTOCOL` is a Cloudberry Database extension. +`DROP PROTOCOL` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-resource-group.md b/docs/sql-stmts/drop-resource-group.md index 6e9c8df41c..49f92bca42 100644 --- a/docs/sql-stmts/drop-resource-group.md +++ b/docs/sql-stmts/drop-resource-group.md @@ -14,7 +14,7 @@ DROP RESOURCE GROUP ## Description -This command removes a resource group from Cloudberry Database. Only a superuser can drop a resource group. When you drop a resource group, the memory and CPU resources reserved by the group are returned to Cloudberry Database. +This command removes a resource group from Apache Cloudberry. Only a superuser can drop a resource group. When you drop a resource group, the memory and CPU resources reserved by the group are returned to Apache Cloudberry. To drop a role resource group, the group cannot be assigned to any roles, nor can it have any statements pending or running in the group. If you drop a resource group that you created for an external component, the behavior is determined by the external component. For example, dropping a resource group that you assigned to a PL/Container runtime stops running containers in the group. @@ -64,7 +64,7 @@ DROP RESOURCE GROUP adhoc; ## Compatibility -The `DROP RESOURCE GROUP` statement is a Cloudberry Database extension. +The `DROP RESOURCE GROUP` statement is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-resource-queue.md b/docs/sql-stmts/drop-resource-queue.md index 113d17fa9f..ef60132d01 100644 --- a/docs/sql-stmts/drop-resource-queue.md +++ b/docs/sql-stmts/drop-resource-queue.md @@ -14,7 +14,7 @@ DROP RESOURCE QUEUE ## Description -This command removes a resource queue from Cloudberry Database. To drop a resource queue, the queue cannot have any roles assigned to it, nor can it have any statements waiting in the queue. Only a superuser can drop a resource queue. +This command removes a resource queue from Apache Cloudberry. To drop a resource queue, the queue cannot have any roles assigned to it, nor can it have any statements waiting in the queue. Only a superuser can drop a resource queue. ## Parameters @@ -58,7 +58,7 @@ DROP RESOURCE QUEUE adhoc; ## Compatibility -The `DROP RESOURCE QUEUE` statement is a Cloudberry Database extension. +The `DROP RESOURCE QUEUE` statement is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-role.md b/docs/sql-stmts/drop-role.md index 1073e29b88..7723017c8e 100644 --- a/docs/sql-stmts/drop-role.md +++ b/docs/sql-stmts/drop-role.md @@ -40,7 +40,7 @@ DROP ROLE sally, bob; ## Compatibility -The SQL standard defines `DROP ROLE`, but it allows only one role to be dropped at a time, and it specifies different privilege requirements than Cloudberry Database uses. +The SQL standard defines `DROP ROLE`, but it allows only one role to be dropped at a time, and it specifies different privilege requirements than Apache Cloudberry uses. ## See also diff --git a/docs/sql-stmts/drop-routine.md b/docs/sql-stmts/drop-routine.md index 416cd78970..ac59bd334a 100644 --- a/docs/sql-stmts/drop-routine.md +++ b/docs/sql-stmts/drop-routine.md @@ -29,7 +29,7 @@ This command will work independent of whether `foo` is an aggregate, function, o ## Compatibility -This command conforms to the SQL standard, with these Cloudberry Database extensions: +This command conforms to the SQL standard, with these Apache Cloudberry extensions: - The standard only allows one routine to be dropped per command. diff --git a/docs/sql-stmts/drop-rule.md b/docs/sql-stmts/drop-rule.md index 5516f2035f..e5bd8fbdf1 100644 --- a/docs/sql-stmts/drop-rule.md +++ b/docs/sql-stmts/drop-rule.md @@ -20,7 +20,7 @@ DROP RULE [IF EXISTS] ON [CASCADE | RESTRICT] **`IF EXISTS`** -Do not throw an error if the rule does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the rule does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -48,7 +48,7 @@ DROP RULE sales_2006 ON sales; ## Compatibility -`DROP RULE` is a Cloudberry Database extension, as is the entire query rewrite system. +`DROP RULE` is a Apache Cloudberry extension, as is the entire query rewrite system. ## See also diff --git a/docs/sql-stmts/drop-schema.md b/docs/sql-stmts/drop-schema.md index 3f6ea32534..6fd059a872 100644 --- a/docs/sql-stmts/drop-schema.md +++ b/docs/sql-stmts/drop-schema.md @@ -50,7 +50,7 @@ DROP SCHEMA mystuff CASCADE; ## Compatibility -`DROP SCHEMA` is fully conforming with the SQL standard, except that the standard only allows one schema to be dropped per command. Also, the `IF EXISTS` option is a Cloudberry Database extension. +`DROP SCHEMA` is fully conforming with the SQL standard, except that the standard only allows one schema to be dropped per command. Also, the `IF EXISTS` option is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-sequence.md b/docs/sql-stmts/drop-sequence.md index 62acff4a61..222a0fcc25 100644 --- a/docs/sql-stmts/drop-sequence.md +++ b/docs/sql-stmts/drop-sequence.md @@ -20,7 +20,7 @@ DROP SEQUENCE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -Do not throw an error if the sequence does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the sequence does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP SEQUENCE myserial; ## Compatibility -`DROP SEQUENCE` conforms to the SQL standard, except that the standard allows only one sequence to be dropped per command. Also, the `IF EXISTS` option is a Cloudberry Database extension. +`DROP SEQUENCE` conforms to the SQL standard, except that the standard allows only one sequence to be dropped per command. Also, the `IF EXISTS` option is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-server.md b/docs/sql-stmts/drop-server.md index b2ce27b254..6c9d1f28aa 100644 --- a/docs/sql-stmts/drop-server.md +++ b/docs/sql-stmts/drop-server.md @@ -20,7 +20,7 @@ DROP SERVER [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the server does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the server does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP SERVER IF EXISTS foo; ## Compatibility -`DROP SERVER` conforms to ISO/IEC 9075-9 (SQL/MED). The `IF EXISTS` clause is a Cloudberry Database extension. +`DROP SERVER` conforms to ISO/IEC 9075-9 (SQL/MED). The `IF EXISTS` clause is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-statistics.md b/docs/sql-stmts/drop-statistics.md index 44a1cddbd4..9d7ac3930e 100644 --- a/docs/sql-stmts/drop-statistics.md +++ b/docs/sql-stmts/drop-statistics.md @@ -20,7 +20,7 @@ DROP STATISTICS [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the statistics object does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the statistics object does not exist. Apache Cloudberry issues a notice in this case. **`name`** diff --git a/docs/sql-stmts/drop-table.md b/docs/sql-stmts/drop-table.md index e67fc39a91..59b9ac1687 100644 --- a/docs/sql-stmts/drop-table.md +++ b/docs/sql-stmts/drop-table.md @@ -22,7 +22,7 @@ DROP TABLE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -Do not throw an error if the table does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the table does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -52,7 +52,7 @@ DROP TABLE films, distributors; ## Compatibility -`DROP TABLE` conforms to the SQL standard, except that the standard allows only one table to be dropped per command. Also, the `IF EXISTS` option is a Cloudberry Database extension. +`DROP TABLE` conforms to the SQL standard, except that the standard allows only one table to be dropped per command. Also, the `IF EXISTS` option is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-tablespace.md b/docs/sql-stmts/drop-tablespace.md index e263e204ff..c32cf1a574 100644 --- a/docs/sql-stmts/drop-tablespace.md +++ b/docs/sql-stmts/drop-tablespace.md @@ -22,7 +22,7 @@ A tablespace can only be dropped by its owner or a superuser. The tablespace mus **`IF EXISTS`** -Do not throw an error if the tablespace does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the tablespace does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -32,7 +32,7 @@ The name of the tablespace to remove. You cannot run `DROP TABLESPACE` inside a transaction block. -Run `DROP TABLESPACE` during a period of low activity to avoid issues due to concurrent creation of tables and temporary objects. When a tablespace is dropped, there is a small window in which a table could be created in the tablespace that is currently being dropped. If this occurs, Cloudberry Database returns a warning. This is an example of the `DROP TABLESPACE` warning. +Run `DROP TABLESPACE` during a period of low activity to avoid issues due to concurrent creation of tables and temporary objects. When a tablespace is dropped, there is a small window in which a table could be created in the tablespace that is currently being dropped. If this occurs, Apache Cloudberry returns a warning. This is an example of the `DROP TABLESPACE` warning. ```sql testdb=# DROP TABLESPACE mytest; @@ -54,7 +54,7 @@ DROP TABLESPACE mystuff; ## Compatibility -`DROP TABLESPACE` is a Cloudberry Database extension. +`DROP TABLESPACE` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-text-search-configuration.md b/docs/sql-stmts/drop-text-search-configuration.md index a9ce516b9e..a694e35704 100644 --- a/docs/sql-stmts/drop-text-search-configuration.md +++ b/docs/sql-stmts/drop-text-search-configuration.md @@ -20,7 +20,7 @@ DROP TEXT SEARCH CONFIGURATION [ IF EXISTS ] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the text search configuration does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the text search configuration does not exist. Apache Cloudberry issues a notice in this case. **`name`** diff --git a/docs/sql-stmts/drop-text-search-dictionary.md b/docs/sql-stmts/drop-text-search-dictionary.md index 651747e1d6..66a025127f 100644 --- a/docs/sql-stmts/drop-text-search-dictionary.md +++ b/docs/sql-stmts/drop-text-search-dictionary.md @@ -20,7 +20,7 @@ DROP TEXT SEARCH DICTIONARY [ IF EXISTS ] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the text search dictionary does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the text search dictionary does not exist. Apache Cloudberry issues a notice in this case. **`name`** diff --git a/docs/sql-stmts/drop-text-search-parser.md b/docs/sql-stmts/drop-text-search-parser.md index 8743965235..6b007994f8 100644 --- a/docs/sql-stmts/drop-text-search-parser.md +++ b/docs/sql-stmts/drop-text-search-parser.md @@ -22,7 +22,7 @@ DROP TEXT SEARCH PARSER [ IF EXISTS ] [ CASCADE | RESTRICT ] **`IF EXISTS`** -Do not throw an error if the text search parser does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the text search parser does not exist. Apache Cloudberry issues a notice in this case. **`name`** diff --git a/docs/sql-stmts/drop-text-search-template.md b/docs/sql-stmts/drop-text-search-template.md index e12b5d5718..876b48e182 100644 --- a/docs/sql-stmts/drop-text-search-template.md +++ b/docs/sql-stmts/drop-text-search-template.md @@ -24,7 +24,7 @@ You must be a superuser to use `ALTER TEXT SEARCH TEMPLATE`. **`IF EXISTS`** -Do not throw an error if the text search template does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the text search template does not exist. Apache Cloudberry issues a notice in this case. **`name`** diff --git a/docs/sql-stmts/drop-transform.md b/docs/sql-stmts/drop-transform.md index f7f5bb1a13..becae116b0 100644 --- a/docs/sql-stmts/drop-transform.md +++ b/docs/sql-stmts/drop-transform.md @@ -22,7 +22,7 @@ To drop a transform, you must own the type and the language. These are the same **`IF EXISTS`** -Do not throw an error if the transform does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the transform does not exist. Apache Cloudberry issues a notice in this case. **`type_name`** @@ -50,7 +50,7 @@ DROP TRANSFORM FOR hstore LANGUAGE plpython3u; ## Compatibility -This form of `DROP TRANSFORM` is a Cloudberry Database extension. See [CREATE TRANSFORM](/docs/sql-stmts/create-transform.md) for details. +This form of `DROP TRANSFORM` is a Apache Cloudberry extension. See [CREATE TRANSFORM](/docs/sql-stmts/create-transform.md) for details. ## See also diff --git a/docs/sql-stmts/drop-trigger.md b/docs/sql-stmts/drop-trigger.md index 8c4310e85c..cf5ca45d3a 100644 --- a/docs/sql-stmts/drop-trigger.md +++ b/docs/sql-stmts/drop-trigger.md @@ -48,7 +48,7 @@ DROP TRIGGER sendmail ON expenses; ## Compatibility -The `DROP TRIGGER` statement in Cloudberry Database is not compatible with the SQL standard. In the SQL standard, trigger names are not local to tables, so the command is simply `DROP TRIGGER name`. +The `DROP TRIGGER` statement in Apache Cloudberry is not compatible with the SQL standard. In the SQL standard, trigger names are not local to tables, so the command is simply `DROP TRIGGER name`. ## See also diff --git a/docs/sql-stmts/drop-type.md b/docs/sql-stmts/drop-type.md index 78d2b080a8..55cf4f0810 100644 --- a/docs/sql-stmts/drop-type.md +++ b/docs/sql-stmts/drop-type.md @@ -20,7 +20,7 @@ DROP TYPE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -Do not throw an error if the type does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the type does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP TYPE box; ## Compatibility -This command is similar to the corresponding command in the SQL standard, apart from the `IF EXISTS` option, which is a Cloudberry Database extension. But note that much of the `CREATE TYPE` command and the data type extension mechanisms in Cloudberry Database differ from the SQL standard. +This command is similar to the corresponding command in the SQL standard, apart from the `IF EXISTS` option, which is a Apache Cloudberry extension. But note that much of the `CREATE TYPE` command and the data type extension mechanisms in Apache Cloudberry differ from the SQL standard. ## See also diff --git a/docs/sql-stmts/drop-user-mapping.md b/docs/sql-stmts/drop-user-mapping.md index 9c8c113592..e289a2f43a 100644 --- a/docs/sql-stmts/drop-user-mapping.md +++ b/docs/sql-stmts/drop-user-mapping.md @@ -23,7 +23,7 @@ The owner of a foreign server can drop user mappings for that server for any use **`IF EXISTS`** -Do not throw an error if the user mapping does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the user mapping does not exist. Apache Cloudberry issues a notice in this case. **`user_name`** @@ -43,7 +43,7 @@ DROP USER MAPPING IF EXISTS FOR bob SERVER foo; ## Compatibility -`DROP SERVER` conforms to ISO/IEC 9075-9 (SQL/MED). The `IF EXISTS` clause is a Cloudberry Database extension. +`DROP SERVER` conforms to ISO/IEC 9075-9 (SQL/MED). The `IF EXISTS` clause is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/drop-user.md b/docs/sql-stmts/drop-user.md index 6093f009be..47df973822 100644 --- a/docs/sql-stmts/drop-user.md +++ b/docs/sql-stmts/drop-user.md @@ -18,7 +18,7 @@ DROP USER [IF EXISTS] [, ...] ## Compatibility -The `DROP USER` statement is a Cloudberry Database extension. The SQL standard leaves the definition of users to the implementation. +The `DROP USER` statement is a Apache Cloudberry extension. The SQL standard leaves the definition of users to the implementation. ## See also diff --git a/docs/sql-stmts/drop-view.md b/docs/sql-stmts/drop-view.md index bb7e023b16..53f5b5f1e8 100644 --- a/docs/sql-stmts/drop-view.md +++ b/docs/sql-stmts/drop-view.md @@ -20,7 +20,7 @@ DROP VIEW [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -Do not throw an error if the view does not exist. Cloudberry Database issues a notice in this case. +Do not throw an error if the view does not exist. Apache Cloudberry issues a notice in this case. **`name`** @@ -44,7 +44,7 @@ DROP VIEW topten; ## Compatibility -`DROP VIEW` fully conforms to the SQL standard, except that the standard only allows one view to be dropped per command. Also, the `IF EXISTS` option is a Cloudberry Database extension. +`DROP VIEW` fully conforms to the SQL standard, except that the standard only allows one view to be dropped per command. Also, the `IF EXISTS` option is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/end.md b/docs/sql-stmts/end.md index 03a7b6af60..962ef947f1 100644 --- a/docs/sql-stmts/end.md +++ b/docs/sql-stmts/end.md @@ -14,7 +14,7 @@ END [WORK | TRANSACTION] [AND [NO] CHAIN] ## Description -`END` commits the current transaction. All changes made by the transaction become visible to others and are guaranteed to be durable if a crash occurs. This command is a Cloudberry Database extension that is equivalent to [`COMMIT`](/docs/sql-stmts/commit.md). +`END` commits the current transaction. All changes made by the transaction become visible to others and are guaranteed to be durable if a crash occurs. This command is a Apache Cloudberry extension that is equivalent to [`COMMIT`](/docs/sql-stmts/commit.md). ## Parameters @@ -43,7 +43,7 @@ END; ## Compatibility -`END` is a Cloudberry Database extension that provides functionality equivalent to [`COMMIT`](/docs/sql-stmts/commit.md), which is specified in the SQL standard. +`END` is a Apache Cloudberry extension that provides functionality equivalent to [`COMMIT`](/docs/sql-stmts/commit.md), which is specified in the SQL standard. ## See also diff --git a/docs/sql-stmts/execute.md b/docs/sql-stmts/execute.md index 82ed746047..d81e33386b 100644 --- a/docs/sql-stmts/execute.md +++ b/docs/sql-stmts/execute.md @@ -16,7 +16,7 @@ EXECUTE [ ( [, ...] ) ] `EXECUTE` is used to run a previously prepared statement. Since prepared statements only exist for the duration of a session, the prepared statement must have been created by a `PREPARE` statement run earlier in the current session. -If the `PREPARE` statement that created the statement specified some parameters, a compatible set of parameters must be passed to the `EXECUTE` statement, or else Cloudberry Database raises an error. Because (unlike functions) prepared statements are not overloaded based on the type or number of their parameters, the name of a prepared statement must be unique within a database session. +If the `PREPARE` statement that created the statement specified some parameters, a compatible set of parameters must be passed to the `EXECUTE` statement, or else Apache Cloudberry raises an error. Because (unlike functions) prepared statements are not overloaded based on the type or number of their parameters, the name of a prepared statement must be unique within a database session. For more information on the creation and usage of prepared statements, see [PREPARE](/docs/sql-stmts/prepare.md). diff --git a/docs/sql-stmts/explain.md b/docs/sql-stmts/explain.md index ff1aabe92f..7942138566 100644 --- a/docs/sql-stmts/explain.md +++ b/docs/sql-stmts/explain.md @@ -26,9 +26,9 @@ where option can be one of: ## Description -`EXPLAIN` displays the query plan that the Cloudberry Database or Postgres Planner generates for the supplied statement. Query plans are a tree plan of nodes. Each node in the plan represents a single operation, such as table scan, join, aggregation or a sort. +`EXPLAIN` displays the query plan that the Apache Cloudberry or Postgres Planner generates for the supplied statement. Query plans are a tree plan of nodes. Each node in the plan represents a single operation, such as table scan, join, aggregation or a sort. -Plans should be read from the bottom up as each node feeds rows into the node directly above it. The bottom nodes of a plan are usually table scan operations (sequential, index or bitmap index scans). If the query requires joins, aggregations, or sorts (or other operations on the raw rows) then there will be additional nodes above the scan nodes to perform these operations. The topmost plan nodes are usually the Cloudberry Database motion nodes (redistribute, explicit redistribute, broadcast, or gather motions). These are the operations responsible for moving rows between the segment instances during query processing. +Plans should be read from the bottom up as each node feeds rows into the node directly above it. The bottom nodes of a plan are usually table scan operations (sequential, index or bitmap index scans). If the query requires joins, aggregations, or sorts (or other operations on the raw rows) then there will be additional nodes above the scan nodes to perform these operations. The topmost plan nodes are usually the Apache Cloudberry motion nodes (redistribute, explicit redistribute, broadcast, or gather motions). These are the operations responsible for moving rows between the segment instances during query processing. The output of `EXPLAIN` has one line for each node in the plan tree, showing the basic node type plus the following cost estimates that the planner made for the execution of that plan node: @@ -85,7 +85,7 @@ Include information on the estimated startup and total cost of each plan node, a Include information on buffer usage. This parameter may be specified only when `ANALYZE` is also specified. If omitted, the default value is `false`, buffer usage information is not included. > **Note** -> Cloudberry Database does not support specifying `BUFFERS [true]` for distributed queries; ignore any displayed buffer usage information. +> Apache Cloudberry does not support specifying `BUFFERS [true]` for distributed queries; ignore any displayed buffer usage information. **`TIMING`** @@ -107,9 +107,9 @@ Any `SELECT`, `INSERT`, `UPDATE`, `DELETE`, `VALUES`, `EXECUTE`, `DECLARE`, or ` In order to allow the query optimizer to make reasonably informed decisions when optimizing queries, the `ANALYZE` statement should be run to record statistics about the distribution of data within the table. If you have not done this (or if the statistical distribution of the data in the table has changed significantly since the last time `ANALYZE` was run), the estimated costs are unlikely to conform to the real properties of the query, and consequently an inferior query plan may be chosen. -An SQL statement that is run during the execution of an `EXPLAIN ANALYZE` command is excluded from Cloudberry Database resource queues. +An SQL statement that is run during the execution of an `EXPLAIN ANALYZE` command is excluded from Apache Cloudberry resource queues. -For more information about query profiling, see "Query Profiling" in the *Cloudberry Database Administrator Guide*. For more information about resource queues, see "Resource Management with Resource Queues" in the *Cloudberry Database Administrator Guide*. +For more information about query profiling, see "Query Profiling" in the *Apache Cloudberry Administrator Guide*. For more information about resource queues, see "Resource Management with Resource Queues" in the *Apache Cloudberry Administrator Guide*. ## Examples @@ -128,7 +128,7 @@ EXPLAIN SELECT * FROM names WHERE name = 'Joelle'; If we read the plan from the bottom up, the query optimizer starts by doing a sequential scan of the `names` table. Notice that the `WHERE` clause is being applied as a *filter* condition. This means that the scan operation checks the condition for each row it scans, and outputs only the ones that pass the condition. -The results of the scan operation are passed up to a *gather motion* operation. In Cloudberry Database, a gather motion is when segments send rows up to the coordinator. In this case we have 3 segment instances sending to 1 coordinator instance (3:1). This operation is working on `slice1` of the parallel query execution plan. In Cloudberry Database a query plan is divided into *slices* so that portions of the query plan can be worked on in parallel by the segments. +The results of the scan operation are passed up to a *gather motion* operation. In Apache Cloudberry, a gather motion is when segments send rows up to the coordinator. In this case we have 3 segment instances sending to 1 coordinator instance (3:1). This operation is working on `slice1` of the parallel query execution plan. In Apache Cloudberry a query plan is divided into *slices* so that portions of the query plan can be worked on in parallel by the segments. The estimated startup cost for this plan is `00.00` (no cost) and a total cost of `431.27`. The planner is estimating that this query will return one row. diff --git a/docs/sql-stmts/fetch.md b/docs/sql-stmts/fetch.md index 4c061c6e67..88ca8178f0 100644 --- a/docs/sql-stmts/fetch.md +++ b/docs/sql-stmts/fetch.md @@ -35,7 +35,7 @@ where `` can be empty or one of: A cursor has an associated position, which is used by `FETCH`. The cursor position can be before the first row of the query result, on any particular row of the result, or after the last row of the result. When created, a cursor is positioned before the first row. After fetching some rows, the cursor is positioned on the row most recently retrieved. If `FETCH` runs off the end of the available rows then the cursor is left positioned after the last row. `FETCH ALL` will always leave the cursor positioned after the last row. > **Note** -> Because Cloudberry Database does not support scrollable cursors, it is not possible to move a cursor position backwards. You can only move a cursor forward in position using `FETCH`. +> Because Apache Cloudberry does not support scrollable cursors, it is not possible to move a cursor position backwards. You can only move a cursor forward in position using `FETCH`. The forms `NEXT`, `FIRST`, `ABSOLUTE`, `RELATIVE` fetch a single row after moving the cursor appropriately. If there is no such row, an empty result is returned, and the cursor is left positioned before the first row or after the last row as appropriate. @@ -50,7 +50,7 @@ The forms using `FORWARD` retrieve the indicated number of rows moving in the fo **`forward_direction`** -Defines the fetch direction and number of rows to fetch. Only forward fetches are allowed in Cloudberry Database. It can be one of the following: +Defines the fetch direction and number of rows to fetch. Only forward fetches are allowed in Apache Cloudberry. It can be one of the following: **`NEXT`** @@ -104,7 +104,7 @@ The count is the number of rows fetched (possibly zero). Note that in `psql`, th ## Notes -Cloudberry Database does not support scrollable cursors, so you can only use `FETCH` to move the cursor position forward. +Apache Cloudberry does not support scrollable cursors, so you can only use `FETCH` to move the cursor position forward. `ABSOLUTE` fetches are not any faster than navigating to the desired row with a relative move: the underlying implementation must traverse all the intermediate rows anyway. @@ -152,11 +152,11 @@ UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films; ## Compatibility -SQL standard allows cursors only in embedded SQL and in modules. Cloudberry Database permits cursors to be used interactively. +SQL standard allows cursors only in embedded SQL and in modules. Apache Cloudberry permits cursors to be used interactively. The variant of `FETCH` described here returns the data as if it were a `SELECT` result rather than placing it in host variables. Other than this point, `FETCH` is fully upward-compatible with the SQL standard. -The `FETCH` forms involving `FORWARD`, as well as the forms `FETCH` count and `FETCH ALL`, in which `FORWARD` is implicit, are Cloudberry Database extensions. `BACKWARD` is not supported. +The `FETCH` forms involving `FORWARD`, as well as the forms `FETCH` count and `FETCH ALL`, in which `FORWARD` is implicit, are Apache Cloudberry extensions. `BACKWARD` is not supported. The SQL standard allows only `FROM` preceding the cursor name; the option to use `IN`, or to leave them out altogether, is an extension. diff --git a/docs/sql-stmts/grant.md b/docs/sql-stmts/grant.md index f52a02a2ad..23383a97aa 100644 --- a/docs/sql-stmts/grant.md +++ b/docs/sql-stmts/grant.md @@ -127,7 +127,7 @@ Unlike the case with privileges, membership in a role cannot be granted to `PUBL **GRANT on Partitioned Tables** -By default, when you grant privileges to a partitioned table, Cloudberry Database recurses the operation to its child tables. To direct Cloudberry Database to perform the `GRANT` on the partitioned table only, specify the `ONLY ` clause. +By default, when you grant privileges to a partitioned table, Apache Cloudberry recurses the operation to its child tables. To direct Apache Cloudberry to perform the `GRANT` on the partitioned table only, specify the `ONLY ` clause. **GRANT on Protocols** @@ -152,7 +152,7 @@ You can also use the `GRANT` command to specify which users can access a trusted ``` -You can also use this command to grant users permissions to create and use `s3` and `pxf` external tables. However, external tables of type `http`, `https`, `gpfdist`, and `gpfdists`, are implemented internally in Cloudberry Database instead of as custom protocols. For these types, use the `CREATE ROLE` or `ALTER ROLE` command to set the `CREATEEXTTABLE` or `NOCREATEEXTTABLE` attribute for each user. See [CREATE ROLE](/docs/sql-stmts/create-role.md) for syntax and examples. +You can also use this command to grant users permissions to create and use `s3` and `pxf` external tables. However, external tables of type `http`, `https`, `gpfdist`, and `gpfdists`, are implemented internally in Apache Cloudberry instead of as custom protocols. For these types, use the `CREATE ROLE` or `ALTER ROLE` command to set the `CREATEEXTTABLE` or `NOCREATEEXTTABLE` attribute for each user. See [CREATE ROLE](/docs/sql-stmts/create-role.md) for syntax and examples. ## Parameters @@ -174,13 +174,13 @@ Allows `DELETE` of a row from the specified table. **`REFERENCES`** -This keyword is accepted, although foreign key constraints are currently not supported in Cloudberry Database. To create a foreign key constraint, it is necessary to have this privilege on both the referencing and referenced columns. The privilege may be granted for all columns of a table, or just specific columns. +This keyword is accepted, although foreign key constraints are currently not supported in Apache Cloudberry. To create a foreign key constraint, it is necessary to have this privilege on both the referencing and referenced columns. The privilege may be granted for all columns of a table, or just specific columns. **`TRIGGER`** Allows the creation of a trigger on the specified table. -> **Note** Cloudberry Database does not support triggers. +> **Note** Apache Cloudberry does not support triggers. **`TRUNCATE`** @@ -223,7 +223,7 @@ For servers, this privilege enables the grantee to create foreign tables using t **`ALL PRIVILEGES`** -Grant all of the available privileges at once. The `PRIVILEGES` key word is optional in Cloudberry Database, though it is required by strict SQL. +Grant all of the available privileges at once. The `PRIVILEGES` key word is optional in Apache Cloudberry, though it is required by strict SQL. **`PUBLIC`** @@ -241,7 +241,7 @@ The member of a role may in turn grant membership in the role to others. The [REVOKE](/docs/sql-stmts/revoke.md) command is used to revoke access privileges. -Cloudberry Database unifies the concepts of users and groups into a single kind of entity called a role. It is therefore not necessary to use the keyword `GROUP` to identify whether a grantee is a user or a group. `GROUP` is still allowed in the command, but it is a noise word. +Apache Cloudberry unifies the concepts of users and groups into a single kind of entity called a role. It is therefore not necessary to use the keyword `GROUP` to identify whether a grantee is a user or a group. `GROUP` is still allowed in the command, but it is a noise word. A user may perform `SELECT`, `INSERT`, and so forth, on a column if they hold that privilege for either the specific column or the whole table. Granting the privilege at the table level and then revoking it for one column does not do what you might wish: the table-level grant is unaffected by a column-level operation. @@ -257,7 +257,7 @@ If the role executing `GRANT` holds the required privileges indirectly via more Granting permission on a table does not automatically extend permissions to any sequences used by the table, including sequences tied to `SERIAL` columns. Permissions on a sequence must be set separately. -The `GRANT` command cannot be used to set privileges for the protocols `file`, `gpfdist`, or `gpfdists`. These protocols are implemented internally in Cloudberry Database. Instead, use the [CREATE ROLE](/docs/sql-stmts/create-role.md) or [ALTER ROLE](/docs/sql-stmts/alter-role.md) command to set the `CREATEEXTTABLE` attribute for the role. +The `GRANT` command cannot be used to set privileges for the protocols `file`, `gpfdist`, or `gpfdists`. These protocols are implemented internally in Apache Cloudberry. Instead, use the [CREATE ROLE](/docs/sql-stmts/create-role.md) or [ALTER ROLE](/docs/sql-stmts/alter-role.md) command to set the `CREATEEXTTABLE` attribute for the role. Use `psql`'s `\dp` meta-command to obtain information about existing privileges for tables and columns. There are other `\d` meta-commands that you can use to display the privileges of non-table objects. @@ -285,17 +285,17 @@ GRANT admins TO joe; ## Compatibility -According to the SQL standard, the `PRIVILEGES` key word in `ALL PRIVILEGES` is required, but it is optional in Cloudberry Database. The SQL standard does not support setting the privileges on more than one object per command. +According to the SQL standard, the `PRIVILEGES` key word in `ALL PRIVILEGES` is required, but it is optional in Apache Cloudberry. The SQL standard does not support setting the privileges on more than one object per command. -Cloudberry Database allows an object owner to revoke their own ordinary privileges: for example, a table owner can make the table read-only to theirself by revoking their own `INSERT`, `UPDATE`, `DELETE`, and `TRUNCATE` privileges. This is not possible according to the SQL standard. Cloudberry Database treats the owner's privileges as having been granted by the owner to the owner; therefore they can revoke them too. In the SQL standard, the owner's privileges are granted by an assumed *system* entity. Not being *system*, the owner cannot revoke these rights. +Apache Cloudberry allows an object owner to revoke their own ordinary privileges: for example, a table owner can make the table read-only to theirself by revoking their own `INSERT`, `UPDATE`, `DELETE`, and `TRUNCATE` privileges. This is not possible according to the SQL standard. Apache Cloudberry treats the owner's privileges as having been granted by the owner to the owner; therefore they can revoke them too. In the SQL standard, the owner's privileges are granted by an assumed *system* entity. Not being *system*, the owner cannot revoke these rights. -The SQL standard allows the `GRANTED BY` option to be used in all forms of `GRANT`. Cloudberry Database only supports it when granting role membership, and even then only superusers may use it in nontrivial ways. +The SQL standard allows the `GRANTED BY` option to be used in all forms of `GRANT`. Apache Cloudberry only supports it when granting role membership, and even then only superusers may use it in nontrivial ways. The SQL standard provides for a `USAGE` privilege on other kinds of objects: character sets, collations, translations. -In the SQL standard, sequences only have a `USAGE` privilege, which controls the use of the `NEXT VALUE FOR` expression, which is equivalent to the function `nextval()` in Cloudberry Database. The sequence privileges `SELECT` and `UPDATE` are Cloudberry Database extensions. The application of the sequence `USAGE` privilege to the `currval()` function is also a Cloudberry Database extension (as is the function itself). +In the SQL standard, sequences only have a `USAGE` privilege, which controls the use of the `NEXT VALUE FOR` expression, which is equivalent to the function `nextval()` in Apache Cloudberry. The sequence privileges `SELECT` and `UPDATE` are Apache Cloudberry extensions. The application of the sequence `USAGE` privilege to the `currval()` function is also a Apache Cloudberry extension (as is the function itself). -Privileges on databases, tablespaces, schemas, and languages are Cloudberry Database extensions. +Privileges on databases, tablespaces, schemas, and languages are Apache Cloudberry extensions. ## See also diff --git a/docs/sql-stmts/import-foreign-schema.md b/docs/sql-stmts/import-foreign-schema.md index ebe327e923..8f908b3848 100644 --- a/docs/sql-stmts/import-foreign-schema.md +++ b/docs/sql-stmts/import-foreign-schema.md @@ -46,7 +46,7 @@ The name of the foreign server from which to import the table definitions. **`local_schema`** -The schema in which Cloudberry Database will create the imported foreign tables. +The schema in which Apache Cloudberry will create the imported foreign tables. **`OPTIONS ( option 'value' [, ... ] )`** @@ -71,7 +71,7 @@ IMPORT FOREIGN SCHEMA foreign_films LIMIT TO (actors, directors) ## Compatibility -The `IMPORT FOREIGN SCHEMA` command conforms to the SQL standard, except that the `OPTIONS` clause is a Cloudberry Database extension. +The `IMPORT FOREIGN SCHEMA` command conforms to the SQL standard, except that the `OPTIONS` clause is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/index.md b/docs/sql-stmts/index.md index 7c93977f25..3fd708045f 100644 --- a/docs/sql-stmts/index.md +++ b/docs/sql-stmts/index.md @@ -50,9 +50,9 @@ ## Extension -- [CREATE EXTENSION](./create-extension.md) - Registers an extension in a Cloudberry Database. +- [CREATE EXTENSION](./create-extension.md) - Registers an extension in a Apache Cloudberry. - [ALTER EXTENSION](./alter-extension.md) - Change the definition of an extension. -- [DROP EXTENSION](./drop-extension.md) - Removes an extension from a Cloudberry Database. +- [DROP EXTENSION](./drop-extension.md) - Removes an extension from a Apache Cloudberry. ## External Table @@ -174,7 +174,7 @@ ## Protocol -- [CREATE PROTOCOL](./create-protocol.md) - Registers a custom data access protocol that can be specified when defining a Cloudberry Database external table. +- [CREATE PROTOCOL](./create-protocol.md) - Registers a custom data access protocol that can be specified when defining a Apache Cloudberry external table. - [ALTER PROTOCOL](./alter-protocol.md) - Changes the definition of a protocol. - [DROP PROTOCOL](./drop-protocol.md) - Removes a data access protocol from a database. @@ -320,7 +320,7 @@ ## Configuration Parameter -- [SET](./set.md) - Changes the value of a run-time Cloudberry Database configuration parameter. +- [SET](./set.md) - Changes the value of a run-time Apache Cloudberry configuration parameter. - [RESET](./reset.md) - Restores the value of a run-time system configuration parameter to the default value. - [SHOW](./show.md) - Shows the value of a run-time system configuration parameter. diff --git a/docs/sql-stmts/insert.md b/docs/sql-stmts/insert.md index e681ba0ca2..ab9be7f43f 100644 --- a/docs/sql-stmts/insert.md +++ b/docs/sql-stmts/insert.md @@ -40,7 +40,7 @@ The target column names may be listed in any order. If no list of column names i Each column not present in the explicit or implicit column list will be filled with a default value, either its declared default value or null if there is no default. -If the expression for any column is not of the correct data type, Cloudberry Database attempts automatic type conversion. +If the expression for any column is not of the correct data type, Apache Cloudberry attempts automatic type conversion. `INSERT` into tables that lack unique indexes will not be blocked by concurrent activity. Tables with unique indexes might block if concurrent sessions perform actions that lock or modify rows matching the unique index values being inserted; the details are covered in [Index Uniqueness Checks](https://www.postgresql.org/docs/12/index-unique-checks.html) in the PostgreSQL documentation. `ON CONFLICT` can be used to specify an alternative action to raising a unique constraint or exclusion constraint violation error. (See [ON CONFLICT Clause](#on-conflict-clause) below.) @@ -119,7 +119,7 @@ For a partitioned table, all the child tables are locked during the `INSERT` ope The optional `ON CONFLICT` clause specifies an alternative action to raising a unique violation or exclusion constraint violation error. For each individual row proposed for insertion, either the insertion proceeds, or, if an arbiter constraint or index specified by conflict_target is violated, the alternative conflict_action is taken. `ON CONFLICT DO NOTHING` simply avoids inserting a row as its alternative action. `ON CONFLICT DO UPDATE` updates the existing row that conflicts with the row proposed for insertion as its alternative action. -conflict_target can perform unique index inference. When performing inference, it consists of one or more index_column_name columns and/or index_expression expressions, and an optional index_predicate. All table_name unique indexes that, without regard to order, contain exactly the conflict_target-specified columns/expressions are inferred (chosen) as arbiter indexes. If an index_predicate is specified, it must, as a further requirement for inference, satisfy arbiter indexes. Note that this means a non-partial unique index (a unique index without a predicate) will be inferred (and thus used by `ON CONFLICT`) if such an index satisfying every other criteria is available. If an attempt at inference is unsuccessful, Cloudberry Database raises an error. +conflict_target can perform unique index inference. When performing inference, it consists of one or more index_column_name columns and/or index_expression expressions, and an optional index_predicate. All table_name unique indexes that, without regard to order, contain exactly the conflict_target-specified columns/expressions are inferred (chosen) as arbiter indexes. If an index_predicate is specified, it must, as a further requirement for inference, satisfy arbiter indexes. Note that this means a non-partial unique index (a unique index without a predicate) will be inferred (and thus used by `ON CONFLICT`) if such an index satisfying every other criteria is available. If an attempt at inference is unsuccessful, Apache Cloudberry raises an error. `ON CONFLICT DO UPDATE` guarantees an atomic `INSERT` or `UPDATE` outcome; provided there is no independent error, one of those two outcomes is guaranteed, even under high concurrency. This is also known as *UPSERT* — `UPDATE` or `INSERT`. @@ -183,11 +183,11 @@ If the `INSERT` command contains a `RETURNING` clause, the result will be simila ## Notes -If the specified table is a partitioned table, Cloudberry Database routes each row to the appropriate partition and inserts into it. If the specified table is a partition, an error will occur if one of the input rows violates the partition constraint. +If the specified table is a partitioned table, Apache Cloudberry routes each row to the appropriate partition and inserts into it. If the specified table is a partition, an error will occur if one of the input rows violates the partition constraint. For a partitioned table, all of the child tables are locked during the `INSERT` operation when the Global Deadlock Detector is not enabled (the default). Only some of the leaf child tables are locked when the Global Deadlock Detector is enabled. -Cloudberry Database supports a maximum of 127 concurrent `INSERT` transactions into a single append-optimized table. +Apache Cloudberry supports a maximum of 127 concurrent `INSERT` transactions into a single append-optimized table. ## Examples @@ -306,9 +306,9 @@ INSERT INTO distributors (did, dname) VALUES (10, 'Conrad International') ## Compatibility -`INSERT` conforms to the SQL standard, except that the `RETURNING` clause is a Cloudberry Database extension, as is the ability to use `WITH` with `INSERT`, and the ability to specify an alternative action with `ON CONFLICT`. Also, the case in which a column name list is omitted, but not all of the columns are filled from the `VALUES` clause or query, is disallowed by the standard. +`INSERT` conforms to the SQL standard, except that the `RETURNING` clause is a Apache Cloudberry extension, as is the ability to use `WITH` with `INSERT`, and the ability to specify an alternative action with `ON CONFLICT`. Also, the case in which a column name list is omitted, but not all of the columns are filled from the `VALUES` clause or query, is disallowed by the standard. -The SQL standard specifies that `OVERRIDING SYSTEM VALUE` can only be specified if an identity column that is generated always exists. Cloudberry Database allows the clause in any case and ignores it if it is not applicable. +The SQL standard specifies that `OVERRIDING SYSTEM VALUE` can only be specified if an identity column that is generated always exists. Apache Cloudberry allows the clause in any case and ignores it if it is not applicable. Possible limitations of the query clause are documented under [SELECT](/docs/sql-stmts/select.md). diff --git a/docs/sql-stmts/load.md b/docs/sql-stmts/load.md index 79ee1f3e46..d260bd1dfd 100644 --- a/docs/sql-stmts/load.md +++ b/docs/sql-stmts/load.md @@ -14,11 +14,11 @@ LOAD '' ## Description -This command loads a shared library file into the Cloudberry Database server address space. If the file had been loaded previously, it is first unloaded. This command is primarily useful to unload and reload a shared library file that has been changed since the server first loaded it. To make use of the shared library, function(s) in it need to be declared using the `CREATE FUNCTION` command. +This command loads a shared library file into the Apache Cloudberry server address space. If the file had been loaded previously, it is first unloaded. This command is primarily useful to unload and reload a shared library file that has been changed since the server first loaded it. To make use of the shared library, function(s) in it need to be declared using the `CREATE FUNCTION` command. The library file name is typically given as just a bare file name, which is sought in the server's library search path (set by `dynamic_library_path`). Alternatively it can be given as a full path name. In either case the platform's standard shared library file name extension may be omitted. -Note that in Cloudberry Database the shared library file (`.so` file) must reside in the same path location on every host in the Cloudberry Database array (coordinators, segments, and mirrors). +Note that in Apache Cloudberry the shared library file (`.so` file) must reside in the same path location on every host in the Apache Cloudberry array (coordinators, segments, and mirrors). Non-superusers can only apply `LOAD` to library files located in `$libdir/plugins/` — the specified `filename` must begin with exactly that string. You must ensure that only “safe” libraries are installed there. @@ -26,19 +26,19 @@ Non-superusers can only apply `LOAD` to library files located in `$libdir/plugin **`filename`** -The path and file name of a shared library file. This file must exist in the same location on all hosts in your Cloudberry Database array. +The path and file name of a shared library file. This file must exist in the same location on all hosts in your Apache Cloudberry array. ## Examples Load a shared library file: ```sql -LOAD '/usr/local/cloudberry-db/lib/myfuncs.so'; +LOAD '/usr/local/cloudberry/lib/myfuncs.so'; ``` ## Compatibility -`LOAD` is a Cloudberry Database extension. +`LOAD` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/lock.md b/docs/sql-stmts/lock.md index e07dba9e6b..4041704d47 100644 --- a/docs/sql-stmts/lock.md +++ b/docs/sql-stmts/lock.md @@ -23,7 +23,7 @@ ACCESS SHARE | ROW SHARE | ROW EXCLUSIVE | SHARE UPDATE EXCLUSIVE `LOCK TABLE` obtains a table-level lock, waiting if necessary for any conflicting locks to be released. If `NOWAIT` is specified, `LOCK TABLE` does not wait to acquire the desired lock: if it cannot be acquired immediately, the command is stopped and an error is emitted. Once obtained, the lock is held for the remainder of the current transaction. There is no `UNLOCK TABLE` command; locks are always released at transaction end. -When acquiring locks automatically for commands that reference tables, Cloudberry Database always uses the least restrictive lock mode possible. `LOCK TABLE` provides for cases when you might need more restrictive locking. For example, suppose an application runs a transaction at the `READ COMMITTED` isolation level and needs to ensure that data in a table remains stable for the duration of the transaction. To achieve this you could obtain `SHARE` lock mode over the table before querying. This will prevent concurrent data changes and ensure subsequent reads of the table see a stable view of committed data, because `SHARE` lock mode conflicts with the `ROW EXCLUSIVE` lock acquired by writers, and your `LOCK TABLE IN SHARE MODE` statement will wait until any concurrent holders of `ROW EXCLUSIVE` mode locks commit or rolls back. Thus, once you obtain the lock, there are no uncommitted writes outstanding; furthermore none can begin until you release the lock. +When acquiring locks automatically for commands that reference tables, Apache Cloudberry always uses the least restrictive lock mode possible. `LOCK TABLE` provides for cases when you might need more restrictive locking. For example, suppose an application runs a transaction at the `READ COMMITTED` isolation level and needs to ensure that data in a table remains stable for the duration of the transaction. To achieve this you could obtain `SHARE` lock mode over the table before querying. This will prevent concurrent data changes and ensure subsequent reads of the table see a stable view of committed data, because `SHARE` lock mode conflicts with the `ROW EXCLUSIVE` lock acquired by writers, and your `LOCK TABLE IN SHARE MODE` statement will wait until any concurrent holders of `ROW EXCLUSIVE` mode locks commit or rolls back. Thus, once you obtain the lock, there are no uncommitted writes outstanding; furthermore none can begin until you release the lock. To achieve a similar effect when running a transaction at the `REPEATABLE READ` or `SERIALIZABLE` isolation level, you have to run the `LOCK TABLE` statement before running any `SELECT` or data modification statement. A `REPEATABLE READ` or `SERIALIZABLE` transaction's view of data will be frozen when its first `SELECT` or data modification statement begins. A `LOCK TABLE` later in the transaction will still prevent concurrent writes — but it won't ensure that what the transaction reads corresponds to the latest committed values. @@ -46,11 +46,11 @@ The lock mode specifies which locks this lock conflicts with. If no lock mode is - ROW EXCLUSIVE — Conflicts with the `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. The commands `INSERT` and `COPY` automatically acquire this lock mode on the target table (in addition to `ACCESS SHARE` locks on any other referenced tables) See [Note](#notes). - SHARE UPDATE EXCLUSIVE — Conflicts with the `SHARE UPDATE EXCLUSIVE`, `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. This mode protects a table against concurrent schema changes and `VACUUM` runs. Acquired by `VACUUM` (without `FULL`) on heap tables and `ANALYZE`. - SHARE — Conflicts with the `ROW EXCLUSIVE`, `SHARE UPDATE EXCLUSIVE`, `SHARE ROW EXCLUSIVE, EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. This mode protects a table against concurrent data changes. Acquired automatically by `CREATE INDEX`. -- SHARE ROW EXCLUSIVE — Conflicts with the `ROW EXCLUSIVE`, `SHARE UPDATE EXCLUSIVE`, `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. This mode protects a table against concurrent data changes, and is self-exclusive so that only one session can hold it at a time. This lock mode is not automatically acquired by any Cloudberry Database command. -- EXCLUSIVE — Conflicts with the `ROW SHARE`, `ROW EXCLUSIVE`, `SHARE UPDATE EXCLUSIVE`, `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. This mode allows only concurrent `ACCESS SHARE` locks, i.e., only reads from the table can proceed in parallel with a transaction holding this lock mode. This lock mode is automatically acquired for `UPDATE`, `SELECT FOR UPDATE`, and `DELETE` in Cloudberry Database (which is more restrictive locking than in regular PostgreSQL). See [Note](#notes). +- SHARE ROW EXCLUSIVE — Conflicts with the `ROW EXCLUSIVE`, `SHARE UPDATE EXCLUSIVE`, `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. This mode protects a table against concurrent data changes, and is self-exclusive so that only one session can hold it at a time. This lock mode is not automatically acquired by any Apache Cloudberry command. +- EXCLUSIVE — Conflicts with the `ROW SHARE`, `ROW EXCLUSIVE`, `SHARE UPDATE EXCLUSIVE`, `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE` lock modes. This mode allows only concurrent `ACCESS SHARE` locks, i.e., only reads from the table can proceed in parallel with a transaction holding this lock mode. This lock mode is automatically acquired for `UPDATE`, `SELECT FOR UPDATE`, and `DELETE` in Apache Cloudberry (which is more restrictive locking than in regular PostgreSQL). See [Note](#notes). - ACCESS EXCLUSIVE — Conflicts with locks of all modes (`ACCESS SHARE`, `ROW SHARE`, `ROW EXCLUSIVE`, `SHARE UPDATE EXCLUSIVE`, `SHARE`, `SHARE ROW EXCLUSIVE`, `EXCLUSIVE`, and `ACCESS EXCLUSIVE`). This mode guarantees that the holder is the only transaction accessing the table in any way. Acquired automatically by the `ALTER TABLE`, `DROP TABLE`, `TRUNCATE`, `REINDEX`, `CLUSTER`, and `VACUUM FULL` commands. This is the default lock mode for `LOCK TABLE` statements that do not specify a mode explicitly. This lock is also briefly acquired by `VACUUM` (without `FULL`) on append-optimized tables during processing. -> **Note:** As the default, Cloudberry Database acquires an `EXCLUSIVE` lock on tables for `DELETE`, `UPDATE`, and `SELECT FOR UPDATE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for the operations on heap tables is `ROW EXCLUSIVE`. +> **Note:** As the default, Apache Cloudberry acquires an `EXCLUSIVE` lock on tables for `DELETE`, `UPDATE`, and `SELECT FOR UPDATE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for the operations on heap tables is `ROW EXCLUSIVE`. **`NOWAIT`** @@ -58,7 +58,7 @@ Specifies that `LOCK TABLE` should not wait for any conflicting locks to be rele **`COORDINATOR ONLY`** -Specifies that when a `LOCK TABLE` command is issued, Cloudberry Database will lock tables on the coordinator only, rather than on the coordinator and all of the segments. This is particularly useful for metadata-only operations. +Specifies that when a `LOCK TABLE` command is issued, Apache Cloudberry will lock tables on the coordinator only, rather than on the coordinator and all of the segments. This is particularly useful for metadata-only operations. **Note** This option is only supported in `ACCESS SHARE MODE`. @@ -66,7 +66,7 @@ Specifies that when a `LOCK TABLE` command is issued, Cloudberry Database will l `LOCK TABLE ... IN ACCESS SHARE MODE` requires `SELECT` privileges on the target table. All other forms of `LOCK` require table-level `UPDATE`, `DELETE`, or `TRUNCATE` privileges. -`LOCK TABLE` is useless outside of a transaction block: the lock would be held only to the completion of the `LOCK` statement. Therefore, Cloudberry Database reports an error if `LOCK` is used outside of a transaction block. Use `BEGIN` and `END` to define a transaction block. +`LOCK TABLE` is useless outside of a transaction block: the lock would be held only to the completion of the `LOCK` statement. Therefore, Apache Cloudberry reports an error if `LOCK` is used outside of a transaction block. Use `BEGIN` and `END` to define a transaction block. `LOCK TABLE` only deals with table-level locks, and so the mode names involving `ROW` are all misnomers. These mode names should generally be read as indicating the intention of the user to acquire row-level locks within the locked table. Also, `ROW EXCLUSIVE` mode is a shareable table lock. Keep in mind that all the lock modes have identical semantics so far as `LOCK TABLE` is concerned, differing only in the rules about which modes conflict with which. For information on how to acquire an actual row-level lock, see the `FOR UPDATE/FOR SHARE` clause in the [SELECT](/docs/sql-stmts/select.md) reference documentation. @@ -98,9 +98,9 @@ COMMIT WORK; ## Compatibility -There is no `LOCK TABLE` in the SQL standard, which instead uses `SET TRANSACTION` to specify concurrency levels on transactions. Cloudberry Database supports that too; see [SET TRANSACTION](/docs/sql-stmts/set-transaction.md) for details. +There is no `LOCK TABLE` in the SQL standard, which instead uses `SET TRANSACTION` to specify concurrency levels on transactions. Apache Cloudberry supports that too; see [SET TRANSACTION](/docs/sql-stmts/set-transaction.md) for details. -Except for `ACCESS SHARE`, `ACCESS EXCLUSIVE`, and `SHARE UPDATE EXCLUSIVE` lock modes, the Cloudberry Database lock modes and the `LOCK TABLE` syntax are compatible with those present in Oracle. +Except for `ACCESS SHARE`, `ACCESS EXCLUSIVE`, and `SHARE UPDATE EXCLUSIVE` lock modes, the Apache Cloudberry lock modes and the `LOCK TABLE` syntax are compatible with those present in Oracle. ## See also diff --git a/docs/sql-stmts/move.md b/docs/sql-stmts/move.md index 4ff419888f..570c960fc9 100644 --- a/docs/sql-stmts/move.md +++ b/docs/sql-stmts/move.md @@ -33,7 +33,7 @@ where `` can be empty or one of: > **Note** You cannot `MOVE` a `PARALLEL RETRIEVE CURSOR`. -> **Note** Because Cloudberry Database does not support scrollable cursors, it is not possible to move a cursor position backwards. You can only move a cursor forward in position using `MOVE`. +> **Note** Because Apache Cloudberry does not support scrollable cursors, it is not possible to move a cursor position backwards. You can only move a cursor forward in position using `MOVE`. The parameters for the `MOVE` command are identical to those of the `FETCH` command; refer to [`FETCH`](/docs/sql-stmts/fetch.md) for details on syntax and usage. diff --git a/docs/sql-stmts/notify.md b/docs/sql-stmts/notify.md index 927838ed70..8e41ff722e 100644 --- a/docs/sql-stmts/notify.md +++ b/docs/sql-stmts/notify.md @@ -16,7 +16,7 @@ NOTIFY [ , ] The `NOTIFY` command sends a notification event together with an optional "payload" string to each client application that has previously executed `LISTEN ` for the specified channel name in the current database. Notifications are visible to all users. -`NOTIFY` provides a simple interprocess communication mechanism for a collection of processes accessing the same Cloudberry Database. A payload string can be sent along with the notification, and higher-level mechanisms for passing structured data can be built by using tables in the database to pass additional data from notifier to listener(s). +`NOTIFY` provides a simple interprocess communication mechanism for a collection of processes accessing the same Apache Cloudberry. A payload string can be sent along with the notification, and higher-level mechanisms for passing structured data can be built by using tables in the database to pass additional data from notifier to listener(s). The information passed to the client for a notification event includes the notification channel name, the notifying session's server process PID, and the payload string, which is an empty string if it has not been specified. @@ -26,7 +26,7 @@ When `NOTIFY` is used to signal the occurrence of changes to a particular table, `NOTIFY` interacts with SQL transactions in some important ways. Firstly, if a `NOTIFY` is executed inside a transaction, the notify events are not delivered until and unless the transaction is committed. This is appropriate, since if the transaction is aborted, all the commands within it have had no effect, including `NOTIFY`. But it can be disconcerting if one is expecting the notification events to be delivered immediately. Secondly, if a listening session receives a notification signal while it is within a transaction, the notification event will not be delivered to its connected client until just after the transaction is completed (either committed or aborted). Again, the reasoning is that if a notification were delivered within a transaction that was later aborted, one would want the notification to be undone somehow — but the server cannot "take back" a notification once it has sent it to the client. So notification events are only delivered between transactions. The upshot of this is that applications using `NOTIFY` for real-time signaling should try to keep their transactions short. -If the same channel name is signaled multiple times from the same transaction with identical payload strings, the database server can decide to deliver a single notification only. On the other hand, Cloudberry Database will always deliver notifications with distinct payload strings as distinct notifications. Similarly, notifications from different transactions will never get folded into one notification. Except for dropping later instances of duplicate notifications, `NOTIFY` guarantees that notifications from the same transaction get delivered in the order they were sent. It is also guaranteed that messages from different transactions are delivered in the order in which the transactions committed. +If the same channel name is signaled multiple times from the same transaction with identical payload strings, the database server can decide to deliver a single notification only. On the other hand, Apache Cloudberry will always deliver notifications with distinct payload strings as distinct notifications. Similarly, notifications from different transactions will never get folded into one notification. Except for dropping later instances of duplicate notifications, `NOTIFY` guarantees that notifications from the same transaction get delivered in the order they were sent. It is also guaranteed that messages from different transactions are delivered in the order in which the transactions committed. It is common for a client that executes `NOTIFY` to be listening on the same notification channel itself. In that case it will get back a notification event, just like all the other listening sessions. Depending on the application logic, this could result in useless work, for example, reading a database table to find the same updates that that session just wrote out. It is possible to avoid such extra work by noticing whether the notifying session's server process PID (supplied in the notification event message) is the same as one's own session's PID (available from `libpq`). When they are the same, the notification event is one's own work bouncing back, and can be ignored. diff --git a/docs/sql-stmts/prepare.md b/docs/sql-stmts/prepare.md index 57b2fcc9f5..9c879d9c64 100644 --- a/docs/sql-stmts/prepare.md +++ b/docs/sql-stmts/prepare.md @@ -40,11 +40,11 @@ Any `SELECT`, `INSERT`, `UPDATE`, `DELETE`, or `VALUES` statement. A prepared statement can be run with either a *generic plan* or a *custom plan*. A generic plan is the same across all executions, while a custom plan is generated for a specific execution using the parameter values given in that call. Use of a generic plan avoids planning overhead, but in some situations a custom plan will be much more efficient to run because the planner can make use of knowledge of the parameter values. If the prepared statement has no parameters, a generic plan is always used. -By default (with the default value, `auto`, for the server configuration parameter `plan_cache_mode`), the server automatically chooses whether to use a generic or custom plan for a prepared statement that has parameters. The current rule for this is that the first five executions are done with custom plans and then Cloudberry Database calculates the average estimated cost of those plans. Then a generic plan is created and its estimated cost is compared to the average custom-plan cost. Subsequent executions use the generic plan if its cost is not so much higher than the average custom-plan cost as to make repeated replanning seem preferable. +By default (with the default value, `auto`, for the server configuration parameter `plan_cache_mode`), the server automatically chooses whether to use a generic or custom plan for a prepared statement that has parameters. The current rule for this is that the first five executions are done with custom plans and then Apache Cloudberry calculates the average estimated cost of those plans. Then a generic plan is created and its estimated cost is compared to the average custom-plan cost. Subsequent executions use the generic plan if its cost is not so much higher than the average custom-plan cost as to make repeated replanning seem preferable. You can override this heuristic, forcing the server to use either generic or custom plans, by setting `plan_cache_mode` to `force_generic_plan` or `force_custom_plan` respectively. This setting is primarily useful if the generic plan's cost estimate is badly off for some reason, allowing it to be chosen even though its actual cost is much more than that of a custom plan. -To examine the query plan Cloudberry Database is using for a prepared statement, use [EXPLAIN](/docs/sql-stmts/explain.md), for example: +To examine the query plan Apache Cloudberry is using for a prepared statement, use [EXPLAIN](/docs/sql-stmts/explain.md), for example: ```sql EXPLAIN EXECUTE (); @@ -52,9 +52,9 @@ EXPLAIN EXECUTE (); If a generic plan is in use, it will contain parameter symbols `$n`, while a custom plan will have the supplied parameter values substituted into it. -For more information on query planning and the statistics collected by Cloudberry Database for that purpose, see the [ANALYZE](/docs/sql-stmts/analyze.md) documentation. +For more information on query planning and the statistics collected by Apache Cloudberry for that purpose, see the [ANALYZE](/docs/sql-stmts/analyze.md) documentation. -Although the main point of a prepared statement is to avoid repeated parse analysis and planning of the statement, Cloudberry Database will force re-analysis and re-planning of the statement before using it whenever database objects used in the statement have undergone definitional (DDL) changes since the previous use of the prepared statement. Also, if the value of `search_path` changes from one use to the next, the statement will be re-parsed using the new `search_path`. These rules make use of a prepared statement semantically almost equivalent to re-submitting the same query text over and over, but with a performance benefit if no object definitions are changed, especially if the best plan remains the same across uses. An example of a case where the semantic equivalence is not perfect is that if the statement refers to a table by an unqualified name, and then a new table of the same name is created in a schema appearing earlier in the `search_path`, no automatic re-parse will occur since no object used in the statement changed. However, if some other change forces a re-parse, the new table will be referenced in subsequent uses. +Although the main point of a prepared statement is to avoid repeated parse analysis and planning of the statement, Apache Cloudberry will force re-analysis and re-planning of the statement before using it whenever database objects used in the statement have undergone definitional (DDL) changes since the previous use of the prepared statement. Also, if the value of `search_path` changes from one use to the next, the statement will be re-parsed using the new `search_path`. These rules make use of a prepared statement semantically almost equivalent to re-submitting the same query text over and over, but with a performance benefit if no object definitions are changed, especially if the best plan remains the same across uses. An example of a case where the semantic equivalence is not perfect is that if the statement refers to a table by an unqualified name, and then a new table of the same name is created in a schema appearing earlier in the `search_path`, no automatic re-parse will occur since no object used in the statement changed. However, if some other change forces a re-parse, the new table will be referenced in subsequent uses. You can see all prepared statements available in the session by querying the pg_prepared_statements system view. diff --git a/docs/sql-stmts/reassign-owned.md b/docs/sql-stmts/reassign-owned.md index 3c56d5b411..c4d7f6e9ec 100644 --- a/docs/sql-stmts/reassign-owned.md +++ b/docs/sql-stmts/reassign-owned.md @@ -47,7 +47,7 @@ REASSIGN OWNED BY sally, bob TO admin; ## Compatibility -The `REASSIGN OWNED` command is a Cloudberry Database extension. +The `REASSIGN OWNED` command is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/refresh-materialized-view.md b/docs/sql-stmts/refresh-materialized-view.md index 33a83631a3..f97f9cdd52 100644 --- a/docs/sql-stmts/refresh-materialized-view.md +++ b/docs/sql-stmts/refresh-materialized-view.md @@ -60,7 +60,7 @@ REFRESH MATERIALIZED VIEW annual_statistics_basis WITH NO DATA; ## Compatibility -`REFRESH MATERIALIZED VIEW` is a Cloudberry Database extension of the SQL standard. +`REFRESH MATERIALIZED VIEW` is a Apache Cloudberry extension of the SQL standard. ## See also diff --git a/docs/sql-stmts/reindex.md b/docs/sql-stmts/reindex.md index 65e24108ef..6aca1410f1 100644 --- a/docs/sql-stmts/reindex.md +++ b/docs/sql-stmts/reindex.md @@ -17,7 +17,7 @@ REINDEX [ (VERBOSE) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } `REINDEX` rebuilds an index using the data stored in the index's table, replacing the old copy of the index. There are several scenarios in which to use `REINDEX`: - An index has become corrupted, and no longer contains valid data. Although in theory this should never happen, in practice indexes can become corrupted due to software bugs or hardware failures. `REINDEX` provides a recovery method. -- An index has become bloated, that is, it contains many empty or nearly-empty pages. This can occur with B-tree indexes in Cloudberry Database under certain uncommon access patterns. `REINDEX` provides a way to reduce the space consumption of the index by writing a new version of the index without the dead pages. +- An index has become bloated, that is, it contains many empty or nearly-empty pages. This can occur with B-tree indexes in Apache Cloudberry under certain uncommon access patterns. `REINDEX` provides a way to reduce the space consumption of the index by writing a new version of the index without the dead pages. - You have altered a storage parameter (such as `fillfactor`) for an index, and wish to ensure that the change has taken full effect. ## Parameters @@ -56,7 +56,7 @@ Prints a progress report as each index is reindexed. `REINDEX` is similar to a drop and recreate of the index in that the index contents are rebuilt from scratch. However, the locking considerations are rather different. `REINDEX` locks out writes but not reads of the index's parent table. It also takes an `ACCESS EXCLUSIVE` lock on the specific index being processed, which will block reads that attempt to use that index. In contrast, `DROP INDEX` momentarily takes an `ACCESS EXCLUSIVE` lock on the parent table, blocking both writes and reads. The subsequent `CREATE INDEX` locks out writes but not reads; since the index is not there, no read will attempt to use it, meaning that there will be no blocking but reads may be forced into expensive sequential scans. -Reindexing a single index or table requires being the owner of that index or table. Reindexing a schema or database requires being the owner of the schema or database. Note that it is therefore sometimes possible for non-superusers to rebuild indexes of tables owned by other users. However, as a special exception, when a non-superuser issues `REINDEX DATABASE`, `REINDEX SCHEMA` or `REINDEX SYSTEM`, Cloudberry Database skips indexes on shared catalogs unless the user owns the catalog (which typically won't be the case). Of course, superusers can always reindex anything. +Reindexing a single index or table requires being the owner of that index or table. Reindexing a schema or database requires being the owner of the schema or database. Note that it is therefore sometimes possible for non-superusers to rebuild indexes of tables owned by other users. However, as a special exception, when a non-superuser issues `REINDEX DATABASE`, `REINDEX SCHEMA` or `REINDEX SYSTEM`, Apache Cloudberry skips indexes on shared catalogs unless the user owns the catalog (which typically won't be the case). Of course, superusers can always reindex anything. REINDEX` does not update the `reltuples` and `relpages` statistics for the index. To update those statistics, run `ANALYZE` on the table after reindexing. diff --git a/docs/sql-stmts/release-savepoint.md b/docs/sql-stmts/release-savepoint.md index 3eae7bba73..4f68e055b3 100644 --- a/docs/sql-stmts/release-savepoint.md +++ b/docs/sql-stmts/release-savepoint.md @@ -32,7 +32,7 @@ Specifying a savepoint name that was not previously defined is an error. It is not possible to release a savepoint when the transaction is in an aborted state. -If multiple savepoints have the same name, Cloudberry Database releases only the most recently defined unreleased savepoint. Repeated commands release progressively older savepoints. +If multiple savepoints have the same name, Apache Cloudberry releases only the most recently defined unreleased savepoint. Repeated commands release progressively older savepoints. ## Examples @@ -51,7 +51,7 @@ The above transaction inserts both 3 and 4. ## Compatibility -This command conforms to the SQL standard. The standard specifies that the key word `SAVEPOINT` is mandatory, but Cloudberry Database allows it to be omitted. +This command conforms to the SQL standard. The standard specifies that the key word `SAVEPOINT` is mandatory, but Apache Cloudberry allows it to be omitted. ## See also diff --git a/docs/sql-stmts/reset.md b/docs/sql-stmts/reset.md index 038338a776..fe0c1d3ef5 100644 --- a/docs/sql-stmts/reset.md +++ b/docs/sql-stmts/reset.md @@ -48,7 +48,7 @@ RESET statement_mem; ## Compatibility -`RESET` is a Cloudberry Database extension. +`RESET` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/retrieve.md b/docs/sql-stmts/retrieve.md index 3ab61706d5..e642183fa8 100644 --- a/docs/sql-stmts/retrieve.md +++ b/docs/sql-stmts/retrieve.md @@ -20,7 +20,7 @@ A parallel retrieve cursor has an associated position, which is used by `RETRIEV > **Note:** > -> Because Cloudberry Database does not support scrollable cursors, the `RETRIEVE` command moves a parallel retrieve cursor only forward in position. +> Because Apache Cloudberry does not support scrollable cursors, the `RETRIEVE` command moves a parallel retrieve cursor only forward in position. When it is created, a parallel retrieve cursor is positioned before the first row. After retrieving some rows, the cursor is positioned on the row most recently retrieved. @@ -101,7 +101,7 @@ COMMIT; ## Compatibility -`RETRIEVE` is a Cloudberry Database extension. The SQL standard makes no provisions for parallel retrieve cursors. +`RETRIEVE` is a Apache Cloudberry extension. The SQL standard makes no provisions for parallel retrieve cursors. ## See also diff --git a/docs/sql-stmts/revoke.md b/docs/sql-stmts/revoke.md index da8e1618b0..f5e8ad6242 100644 --- a/docs/sql-stmts/revoke.md +++ b/docs/sql-stmts/revoke.md @@ -107,9 +107,9 @@ If `GRANT OPTION FOR` is specified, only the grant option for the privilege is r If a role holds a privilege with grant option and has granted it to other roles then the privileges held by those other roles are called dependent privileges. If the privilege or the grant option held by the first role is being revoked and dependent privileges exist, those dependent privileges are also revoked if `CASCADE` is specified, else the revoke action will fail. This recursive revocation only affects privileges that were granted through a chain of roles that is traceable to the role that is the subject of this `REVOKE` command. Thus, the affected roles may effectively keep the privilege if it was also granted through other roles. -When you revoke privileges on a table, Cloudberry Database revokes the corresponding column privileges (if any) on each column of the table, as well. On the other hand, if a role has been granted privileges on a table, then revoking the same privileges from individual columns will have no effect. +When you revoke privileges on a table, Apache Cloudberry revokes the corresponding column privileges (if any) on each column of the table, as well. On the other hand, if a role has been granted privileges on a table, then revoking the same privileges from individual columns will have no effect. -By default, when you revoke privileges on a partitioned table, Cloudberry Database recurses the operation to its child tables. To direct Cloudberry Database to perform the `REVOKE` on the partitioned table only, specify the `ONLY ` clause. +By default, when you revoke privileges on a partitioned table, Apache Cloudberry recurses the operation to its child tables. To direct Apache Cloudberry to perform the `REVOKE` on the partitioned table only, specify the `ONLY ` clause. When revoking membership in a role, `GRANT OPTION` is instead called `ADMIN OPTION`, but the behavior is similar. This form of the command also allows a `GRANTED BY` option, but that option is currently ignored (except for checking the existence of the named role). Note also that this form of the command does not allow the noise word `GROUP` in role_specification. @@ -121,11 +121,11 @@ See [GRANT](/docs/sql-stmts/grant.md). A user may revoke only those privileges directly granted by that user. If, for example, user A grants a privilege with grant option to user B, and user B has in turn granted it to user C, then user A cannot revoke the privilege directly from C. Instead, user A could revoke the grant option from user B and use the `CASCADE` option so that the privilege is in turn revoked from user C. For another example, if both A and B grant the same privilege to C, A can revoke their own grant but not B's grant, so C effectively still has the privilege. -When a non-owner of an object attempts to `REVOKE` privileges on the object, the command fails outright if the user has no privileges whatsoever on the object. As long as some privilege is available, the command proceeds, but it will revoke only those privileges for which the user has grant options. The `REVOKE ALL PRIVILEGES` forms issue a warning message if no grant options are held, while the other forms issue a warning if grant options for any of the privileges specifically named in the command are not held. (In principle these statements apply to the object owner as well, but since Cloudberry Database always treats the owner as holding all grant options, the cases can never occur.) +When a non-owner of an object attempts to `REVOKE` privileges on the object, the command fails outright if the user has no privileges whatsoever on the object. As long as some privilege is available, the command proceeds, but it will revoke only those privileges for which the user has grant options. The `REVOKE ALL PRIVILEGES` forms issue a warning message if no grant options are held, while the other forms issue a warning if grant options for any of the privileges specifically named in the command are not held. (In principle these statements apply to the object owner as well, but since Apache Cloudberry always treats the owner as holding all grant options, the cases can never occur.) -If a superuser chooses to issue a `GRANT` or `REVOKE` command, Cloudberry Database performs the command as though it were issued by the owner of the affected object. Since all privileges ultimately come from the object owner (possibly indirectly via chains of grant options), it is possible for a superuser to revoke all privileges, but this might require use of `CASCADE` as stated above. +If a superuser chooses to issue a `GRANT` or `REVOKE` command, Apache Cloudberry performs the command as though it were issued by the owner of the affected object. Since all privileges ultimately come from the object owner (possibly indirectly via chains of grant options), it is possible for a superuser to revoke all privileges, but this might require use of `CASCADE` as stated above. -`REVOKE` may also be invoked by a role that is not the owner of the affected object, but is a member of the role that owns the object, or is a member of a role that holds privileges `WITH GRANT OPTION` on the object. In this case, Cloudberry Database performs the command as though it were issued by the containing role that actually owns the object or holds the privileges `WITH GRANT OPTION`. For example, if table `t1` is owned by role `g1`, of which role `u1` is a member, then `u1` can revoke privileges on `t1` that are recorded as being granted by `g1`. This includes grants made by `u1` as well as by other members of role `g1`. +`REVOKE` may also be invoked by a role that is not the owner of the affected object, but is a member of the role that owns the object, or is a member of a role that holds privileges `WITH GRANT OPTION` on the object. In this case, Apache Cloudberry performs the command as though it were issued by the containing role that actually owns the object or holds the privileges `WITH GRANT OPTION`. For example, if table `t1` is owned by role `g1`, of which role `u1` is a member, then `u1` can revoke privileges on `t1` that are recorded as being granted by `g1`. This includes grants made by `u1` as well as by other members of role `g1`. If the role that runs `REVOKE` holds privileges indirectly via more than one role membership path, it is unspecified which containing role will be used to perform the command. In such cases it is best practice to use `SET ROLE` to become the specific role as which you want to do the `REVOKE`. Failure to do so may lead to revoking privileges other than the ones you intended, or not revoking any privileges at all. @@ -155,7 +155,7 @@ REVOKE admins FROM joe; The compatibility notes of the [GRANT](/docs/sql-stmts/grant.md) command also apply to `REVOKE`. -Either `RESTRICT` or `CASCADE` is required according to the standard, but Cloudberry Database assumes `RESTRICT` by default. +Either `RESTRICT` or `CASCADE` is required according to the standard, but Apache Cloudberry assumes `RESTRICT` by default. ## See also diff --git a/docs/sql-stmts/rollback-to-savepoint.md b/docs/sql-stmts/rollback-to-savepoint.md index 1adf8c8180..6d2585fabf 100644 --- a/docs/sql-stmts/rollback-to-savepoint.md +++ b/docs/sql-stmts/rollback-to-savepoint.md @@ -67,7 +67,7 @@ COMMIT; ## Compatibility -The SQL standard specifies that the key word `SAVEPOINT` is mandatory, but Cloudberry Database (and Oracle) allow it to be omitted. SQL allows only `WORK`, not `TRANSACTION`, as a noise word after `ROLLBACK`. Also, SQL has an optional clause `AND [NO] CHAIN` which is not currently supported by Cloudberry Database. Otherwise, this command conforms to the SQL standard. +The SQL standard specifies that the key word `SAVEPOINT` is mandatory, but Apache Cloudberry (and Oracle) allow it to be omitted. SQL allows only `WORK`, not `TRANSACTION`, as a noise word after `ROLLBACK`. Also, SQL has an optional clause `AND [NO] CHAIN` which is not currently supported by Apache Cloudberry. Otherwise, this command conforms to the SQL standard. ## See also diff --git a/docs/sql-stmts/rollback.md b/docs/sql-stmts/rollback.md index 950c586583..a8f1cbf5d2 100644 --- a/docs/sql-stmts/rollback.md +++ b/docs/sql-stmts/rollback.md @@ -43,7 +43,7 @@ ROLLBACK; ## Compatibility -The command `ROLLBACK` conforms to the SQL standard. The form `ROLLBACK TRANSACTION` is a Cloudberry Database extension. +The command `ROLLBACK` conforms to the SQL standard. The form `ROLLBACK TRANSACTION` is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/savepoint.md b/docs/sql-stmts/savepoint.md index f6a4abcc19..a047675037 100644 --- a/docs/sql-stmts/savepoint.md +++ b/docs/sql-stmts/savepoint.md @@ -86,7 +86,7 @@ The above transaction shows row 3 being rolled back first, then row 2. ## Compatibility -SQL requires a savepoint to be destroyed automatically when another savepoint with the same name is established. In Cloudberry Database, the old savepoint is kept, though only the more recent one is used when rolling back or releasing. (Releasing the newer savepoint will cause the older one to again become accessible to [ROLLBACK TO SAVEPOINT](/docs/sql-stmts/rollback-to-savepoint.md) and [RELEASE SAVEPOINT](/docs/sql-stmts/release-savepoint.md).) Otherwise, `SAVEPOINT` is fully SQL conforming. +SQL requires a savepoint to be destroyed automatically when another savepoint with the same name is established. In Apache Cloudberry, the old savepoint is kept, though only the more recent one is used when rolling back or releasing. (Releasing the newer savepoint will cause the older one to again become accessible to [ROLLBACK TO SAVEPOINT](/docs/sql-stmts/rollback-to-savepoint.md) and [RELEASE SAVEPOINT](/docs/sql-stmts/release-savepoint.md).) Otherwise, `SAVEPOINT` is fully SQL conforming. ## See also diff --git a/docs/sql-stmts/select-into.md b/docs/sql-stmts/select-into.md index 9bcab024fe..0742d926a5 100644 --- a/docs/sql-stmts/select-into.md +++ b/docs/sql-stmts/select-into.md @@ -64,7 +64,7 @@ SELECT * INTO films_recent FROM films WHERE date_prod >= '2016-01-01'; ## Compatibility -The SQL standard uses `SELECT INTO` to represent selecting values into scalar variables of a host program, rather than creating a new table. The Cloudberry Database usage of `SELECT INTO` to represent table creation is historical. It is best to use [CREATE TABLE AS](/docs/sql-stmts/create-table-as.md) for this purpose in new applications. +The SQL standard uses `SELECT INTO` to represent selecting values into scalar variables of a host program, rather than creating a new table. The Apache Cloudberry usage of `SELECT INTO` to represent table creation is historical. It is best to use [CREATE TABLE AS](/docs/sql-stmts/create-table-as.md) for this purpose in new applications. ## See also diff --git a/docs/sql-stmts/select.md b/docs/sql-stmts/select.md index 0252eedf01..69ccb5e03f 100644 --- a/docs/sql-stmts/select.md +++ b/docs/sql-stmts/select.md @@ -134,9 +134,9 @@ A key property of `WITH` queries is that they are evaluated only once per execut However, a `WITH` query can be marked `NOT MATERIALIZED` to remove this guarantee. In that case, the `WITH` query can be folded into the primary query much as though it were a simple sub-`SELECT` in the primary query's `FROM` clause. This results in duplicate computations if the primary query refers to that `WITH` query more than once; but if each such use requires only a few rows of the `WITH` query's total output, `NOT MATERIALIZED` can provide a net savings by allowing the queries to be optimized jointly. `NOT MATERIALIZED` is ignored if it is attached to a `WITH` query that is recursive or is not side-effect-free (for example, is not a plain `SELECT` containing no volatile functions). -By default, a side-effect-free `WITH` query is folded into the primary query if it is used exactly once in the primary query's `FROM` clause. This allows joint optimization of the two query levels in situations where that should be semantically invisible. However, such folding can be prevented by marking the `WITH` query as `MATERIALIZED`. That might be useful, for example, if the `WITH` query is being used as an optimization fence to prevent the planner from choosing a bad plan. Cloudberry Database versions before 7 never did such folding, so queries written for older versions might rely on `WITH` to act as an optimization fence. +By default, a side-effect-free `WITH` query is folded into the primary query if it is used exactly once in the primary query's `FROM` clause. This allows joint optimization of the two query levels in situations where that should be semantically invisible. However, such folding can be prevented by marking the `WITH` query as `MATERIALIZED`. That might be useful, for example, if the `WITH` query is being used as an optimization fence to prevent the planner from choosing a bad plan. Apache Cloudberry versions before 7 never did such folding, so queries written for older versions might rely on `WITH` to act as an optimization fence. -See WITH Queries (Common Table Expressions) in the *Cloudberry Database Administrator Guide* for additional information. +See WITH Queries (Common Table Expressions) in the *Apache Cloudberry Administrator Guide* for additional information. ### The `FROM` clause @@ -155,15 +155,15 @@ A substitute name for the `FROM` item containing the alias. An alias is used for **`TABLESAMPLE sampling_method ( argument [, ...] ) [ REPEATABLE ( seed ) ]`** -A `TABLESAMPLE` clause after a table_name indicates that the specified sampling_method should be used to retrieve a subset of the rows in that table. This sampling precedes the application of any other filters such as `WHERE` clauses. The standard Cloudberry Database distribution includes two sampling methods, `BERNOULLI` and `SYSTEM`. You can install other sampling methods in the database via extensions. +A `TABLESAMPLE` clause after a table_name indicates that the specified sampling_method should be used to retrieve a subset of the rows in that table. This sampling precedes the application of any other filters such as `WHERE` clauses. The standard Apache Cloudberry distribution includes two sampling methods, `BERNOULLI` and `SYSTEM`. You can install other sampling methods in the database via extensions. The `BERNOULLI` and `SYSTEM` sampling methods each accept a single argument which is the fraction of the table to sample, expressed as a percentage between 0 and 100. This argument can be any real-valued expression. (Other sampling methods might accept more or different arguments.) These two methods each return a randomly-chosen sample of the table that will contain approximately the specified percentage of the table's rows. The `BERNOULLI` method scans the whole table and selects or ignores individual rows independently with the specified probability. The `SYSTEM` method does block-level sampling with each block having the specified chance of being selected; all rows in each selected block are returned. The `SYSTEM` method is significantly faster than the `BERNOULLI` method when small sampling percentages are specified, but it may return a less-random sample of the table as a result of clustering effects. -The optional `REPEATABLE` clause specifies a seed number or expression to use for generating random numbers within the sampling method. The seed value can be any non-null floating-point value. Two queries that specify the same seed and argument values will select the same sample of the table, if the table has not been changed meanwhile. But different seed values usually produce different samples. If `REPEATABLE` is not specified, then Cloudberry Database selects a new random sample for each query, based upon a system-generated seed. Note that some add-on sampling methods do not accept `REPEATABLE`, and will always produce new samples on each use. +The optional `REPEATABLE` clause specifies a seed number or expression to use for generating random numbers within the sampling method. The seed value can be any non-null floating-point value. Two queries that specify the same seed and argument values will select the same sample of the table, if the table has not been changed meanwhile. But different seed values usually produce different samples. If `REPEATABLE` is not specified, then Apache Cloudberry selects a new random sample for each query, based upon a system-generated seed. Note that some add-on sampling methods do not accept `REPEATABLE`, and will always produce new samples on each use. **`select`** -A sub-`SELECT` can appear in the `FROM` clause. This acts as though its output were created as a temporary table for the duration of this single `SELECT` command. Note that the sub-`SELECT` must be surrounded by parentheses, and an alias *must* be provided for it. A [VALUES](/docs/sql-stmts/values.md) command can also be used here. See "Non-standard Clauses" in the [Compatibility](#compatibility) section for limitations of using correlated sub-selects in Cloudberry Database. +A sub-`SELECT` can appear in the `FROM` clause. This acts as though its output were created as a temporary table for the duration of this single `SELECT` command. Note that the sub-`SELECT` must be surrounded by parentheses, and an alias *must* be provided for it. A [VALUES](/docs/sql-stmts/values.md) command can also be used here. See "Non-standard Clauses" in the [Compatibility](#compatibility) section for limitations of using correlated sub-selects in Apache Cloudberry. **`with_query_name`** @@ -173,7 +173,7 @@ The `WITH` query hides a table of the same name for the purposes of the primary **`function_name`** -Function calls can appear in the `FROM` clause. (This is especially useful for functions that return result sets, but any function can be used.) This acts as though the function's output were created as a temporary table for the duration of this single `SELECT` command. When you add the optional `WITH ORDINALITY` clause to the function call, Cloudberry Database appends a new column after all of the function's output columns with numbering for each row. +Function calls can appear in the `FROM` clause. (This is especially useful for functions that return result sets, but any function can be used.) This acts as though the function's output were created as a temporary table for the duration of this single `SELECT` command. When you add the optional `WITH ORDINALITY` clause to the function call, Apache Cloudberry appends a new column after all of the function's output columns with numbering for each row. You can provide an alias in the same way as for a table. If an alias is specified, you can also specify a column alias list to provide substitute names for one or more attributes of the function's composite return type, including the column added by `ORDINALITY` if present. @@ -222,7 +222,7 @@ A clause of the form `USING ( a, b, ... )` is shorthand for `ON left_table.a = r **`LATERAL`** -The `LATERAL` key word can precede a sub-`SELECT FROM` item. This allows the sub-`SELECT` to refer to columns of `FROM` items that appear before it in the `FROM` list. (Without `LATERAL`, Cloudberry Database evaluates each sub-`SELECT` independently and so cannot cross-reference any other `FROM` item.) +The `LATERAL` key word can precede a sub-`SELECT FROM` item. This allows the sub-`SELECT` to refer to columns of `FROM` items that appear before it in the `FROM` list. (Without `LATERAL`, Apache Cloudberry evaluates each sub-`SELECT` independently and so cannot cross-reference any other `FROM` item.) `LATERAL` can also precede a function-call `FROM` item. In this case it is a noise word, because the function expression can refer to earlier `FROM` items. @@ -230,7 +230,7 @@ A `LATERAL` item can appear at top level in the `FROM` list, or within a `JOIN` When a `FROM` item contains `LATERAL` cross-references, evaluation proceeds as follows: for each row of the `FROM` item providing the cross-referenced column(s), or set of rows of multiple `FROM` items providing the columns, the `LATERAL` item is evaluated using that row or row set's values of the columns. The resulting row(s) are joined as usual with the rows they were computed from. This is repeated for each row or set of rows from the column source table(s). -The column source table(s) must be `INNER` or `LEFT` joined to the `LATERAL` item, else there would not be a well-defined set of rows from which to compute each set of rows for the `LATERAL` item. Thus, although a construct such as ` RIGHT JOIN LATERAL ` is syntactically valid, Cloudberry Database does not permit `` to reference ``. +The column source table(s) must be `INNER` or `LEFT` joined to the `LATERAL` item, else there would not be a well-defined set of rows from which to compute each set of rows for the `LATERAL` item. Thus, although a construct such as ` RIGHT JOIN LATERAL ` is syntactically valid, Apache Cloudberry does not permit `` to reference ``. ### The `WHERE` clause @@ -272,7 +272,7 @@ Keep in mind that all aggregate functions are evaluated before evaluating any "s Currently, `FOR NO KEY UPDATE`, `FOR UPDATE`, `FOR SHARE`, and `FOR KEY SHARE` cannot be specified with `GROUP BY`. -Cloudberry Database has the following additional OLAP grouping extensions (often referred to as *supergroups*): +Apache Cloudberry has the following additional OLAP grouping extensions (often referred to as *supergroups*): **`ROLLUP`** @@ -436,17 +436,17 @@ Currently, `FOR NO KEY UPDATE`, `FOR UPDATE`, `FOR SHARE`, and `FOR KEY SHARE` c The `SELECT` list (between the key words `SELECT` and `FROM`) specifies expressions that form the output rows of the `SELECT` statement. The expressions can (and usually do) refer to columns computed in the `FROM` clause. -An expression in the `SELECT` list can be a constant value, a column reference, an operator invocation, a function call, an aggregate expression, a window expression, a scalar subquery, and so on. A number of constructs can be classified as an expression but do not follow any general syntax rules. These generally have the semantics of a function or operator. For information about SQL value expressions and function calls, see Querying Data in the *Cloudberry Database Administrator Guide*. +An expression in the `SELECT` list can be a constant value, a column reference, an operator invocation, a function call, an aggregate expression, a window expression, a scalar subquery, and so on. A number of constructs can be classified as an expression but do not follow any general syntax rules. These generally have the semantics of a function or operator. For information about SQL value expressions and function calls, see Querying Data in the *Apache Cloudberry Administrator Guide*. -Just as in a table, every output column of a `SELECT` has a name. In a simple `SELECT` this name is just used to label the column for display, but when the `SELECT` is a sub-query of a larger query, the name is seen by the larger query as the column name of the virtual table produced by the sub-query. To specify the name to use for an output column, write `AS ` after the column's expression. (You can omit `AS`, but only if the desired output name does not match any SQL keyword. For protection against possible future keyword additions, you can always either write `AS` or double-quote the output name.) If you do not specify a column name, Cloudberry Database chooses a name automatically. If the column's expression is a simple column reference then the chosen name is the same as that column's name. In more complex cases, a function or type name may be used, or the system may fall back on a generated name such as `?column?` or `columnN`. +Just as in a table, every output column of a `SELECT` has a name. In a simple `SELECT` this name is just used to label the column for display, but when the `SELECT` is a sub-query of a larger query, the name is seen by the larger query as the column name of the virtual table produced by the sub-query. To specify the name to use for an output column, write `AS ` after the column's expression. (You can omit `AS`, but only if the desired output name does not match any SQL keyword. For protection against possible future keyword additions, you can always either write `AS` or double-quote the output name.) If you do not specify a column name, Apache Cloudberry chooses a name automatically. If the column's expression is a simple column reference then the chosen name is the same as that column's name. In more complex cases, a function or type name may be used, or the system may fall back on a generated name such as `?column?` or `columnN`. An output column's name can be used to refer to the column's value in `ORDER BY` and `GROUP BY` clauses, but not in the `WHERE` or `HAVING` clauses; there you must specify the expression instead. Instead of an expression, you can specify `*` in the output list as a shorthand for all the columns of the selected rows. Also, you can specify `.*` as a shorthand for the columns coming from just that table. In these cases it is not possible to specify new names with `AS`; the output column names will be the same as the table columns' names. -According to the SQL standard, the expressions in the output list should be computed before applying `DISTINCT`, `ORDER BY`, or `LIMIT`. This is obviously necessary when using `DISTINCT`, since otherwise it's not clear what values are being made distinct. However, in many cases it is convenient if output expressions are computed after `ORDER BY` and `LIMIT`; particularly if the output list contains any volatile or expensive functions. With that behavior, the order of function evaluations is more intuitive and there will not be evaluations corresponding to rows that never appear in the output. Cloudberry Database effectively evaluates output expressions after sorting and limiting, so long as those expressions are not referenced in `DISTINCT`, `ORDER BY`, or `GROUP BY`. (As a counterexample, `SELECT f(x) FROM tab ORDER BY 1` clearly must evaluate `f(x)` before sorting.) Output expressions that contain set-returning functions are effectively evaluated after sorting and before limiting, so that `LIMIT` will act to cut off the output from a set-returning function. +According to the SQL standard, the expressions in the output list should be computed before applying `DISTINCT`, `ORDER BY`, or `LIMIT`. This is obviously necessary when using `DISTINCT`, since otherwise it's not clear what values are being made distinct. However, in many cases it is convenient if output expressions are computed after `ORDER BY` and `LIMIT`; particularly if the output list contains any volatile or expensive functions. With that behavior, the order of function evaluations is more intuitive and there will not be evaluations corresponding to rows that never appear in the output. Apache Cloudberry effectively evaluates output expressions after sorting and limiting, so long as those expressions are not referenced in `DISTINCT`, `ORDER BY`, or `GROUP BY`. (As a counterexample, `SELECT f(x) FROM tab ORDER BY 1` clearly must evaluate `f(x)` before sorting.) Output expressions that contain set-returning functions are effectively evaluated after sorting and before limiting, so that `LIMIT` will act to cut off the output from a set-returning function. -> **Note** Cloudberry Database versions prior to 7 did not provide any guarantees about the timing of evaluation of output expressions versus sorting and limiting; it depended on the form of the chosen query plan. +> **Note** Apache Cloudberry versions prior to 7 did not provide any guarantees about the timing of evaluation of output expressions versus sorting and limiting; it depended on the form of the chosen query plan. ### The `DISTINCT` clause @@ -568,20 +568,20 @@ OFFSET If the `` expression evaluates to NULL, it is treated as `LIMIT ALL`, that is, no limit. If `` evaluates to NULL, it is treated the same as `OFFSET 0`. -SQL:2008 introduced a different syntax to achieve the same result, which Cloudberry Database also supports. It is: +SQL:2008 introduced a different syntax to achieve the same result, which Apache Cloudberry also supports. It is: ```sql OFFSET [ ROW | ROWS ] FETCH { FIRST | NEXT } [ ] { ROW | ROWS } ONLY ``` -In this syntax, the `` or `` value is required by the standard to be a literal constant, a parameter, or a variable name; as a Cloudberry Database extension, other expressions are allowed, but will generally need to be enclosed in parentheses to avoid ambiguity. If `` is omitted in a `FETCH` clause, it defaults to 1. `ROW` and `ROWS` as well as `FIRST` and `NEXT` are noise words that don't influence the effects of these clauses. According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present; but Cloudberry Database allows either order. +In this syntax, the `` or `` value is required by the standard to be a literal constant, a parameter, or a variable name; as a Apache Cloudberry extension, other expressions are allowed, but will generally need to be enclosed in parentheses to avoid ambiguity. If `` is omitted in a `FETCH` clause, it defaults to 1. `ROW` and `ROWS` as well as `FIRST` and `NEXT` are noise words that don't influence the effects of these clauses. According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present; but Apache Cloudberry allows either order. When using `LIMIT`, it is a good idea to use an `ORDER BY` clause that constrains the result rows into a unique order. Otherwise you will get an unpredictable subset of the query's rows — you may be asking for the tenth through twentieth rows, but tenth through twentieth in what ordering? You don't know what ordering unless you specify `ORDER BY`. The query optimizer takes `LIMIT` into account when generating a query plan, so you are very likely to get different plans (yielding different row orders) depending on what you use for `LIMIT` and `OFFSET`. Thus, using different `LIMIT/OFFSET` values to select different subsets of a query result will give inconsistent results unless you enforce a predictable result ordering with `ORDER BY`. This is not a defect; it is an inherent consequence of the fact that SQL does not promise to deliver the results of a query in any particular order unless `ORDER BY` is used to constrain the order. -It is even possible for repeated executions of the same `LIMIT` query to return different subsets of the rows of a table, if there is not an `ORDER BY` to enforce selection of a deterministic subset. Again, this is not a bug; Cloudberry Database does not guarantee determinism of the results in such a case. +It is even possible for repeated executions of the same `LIMIT` query to return different subsets of the rows of a table, if there is not an `ORDER BY` to enforce selection of a deterministic subset. Again, this is not a bug; Apache Cloudberry does not guarantee determinism of the results in such a case. ### The `LOCKING` clause @@ -600,7 +600,7 @@ FOR [OF [ , ... ] ] [ NOWAIT | SKIP LOCKED ] - `SHARE` - Locks the table with a `ROW SHARE` lock. - `KEY SHARE` - Locks the table with a `ROW SHARE` lock. -When the Global Deadlock Detector is deactivated (the default), Cloudberry Database uses the specified lock. +When the Global Deadlock Detector is deactivated (the default), Apache Cloudberry uses the specified lock. When the Global Deadlock Detector is enabled, a `ROW SHARE` lock is used to lock the table for simple `SELECT` queries that contain a locking clause, and the query plans contain a `lockrows` node. Simple `SELECT` queries that contain a locking clause fulfill all the following conditions: @@ -611,7 +611,7 @@ When the Global Deadlock Detector is enabled, a `ROW SHARE` lock is used to lock Otherwise, table locking for a `SELECT` query that contains a locking clause behaves as if the Global Deadlock Detector is deactivated. -> **Note** The Global Deadlock Detector also affects the locking used by `DELETE` and `UPDATE` operations. By default, Cloudberry Database acquires an `EXCLUSIVE` lock on tables for `DELETE` and `UPDATE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for `DELETE` and `UPDATE` operations on heap tables is `ROW EXCLUSIVE`. +> **Note** The Global Deadlock Detector also affects the locking used by `DELETE` and `UPDATE` operations. By default, Apache Cloudberry acquires an `EXCLUSIVE` lock on tables for `DELETE` and `UPDATE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for `DELETE` and `UPDATE` operations on heap tables is `ROW EXCLUSIVE`. For more information on each row-level lock mode, refer to [Explicit Locking](https://www.postgresql.org/docs/12/explicit-locking.html) in the PostgreSQL documentation. @@ -807,7 +807,7 @@ The `SELECT` statement is compatible with the SQL standard, but there are some e **Omitted FROM Clauses** -Cloudberry Database allows one to omit the `FROM` clause. It has a straightforward use to compute the results of simple expressions. For example: +Apache Cloudberry allows one to omit the `FROM` clause. It has a straightforward use to compute the results of simple expressions. For example: ```sql SELECT 2+2; @@ -821,19 +821,19 @@ Note that if a `FROM` clause is not specified, the query cannot reference any da SELECT distributors.* WHERE distributors.name = 'Westward'; ``` -In earlier releases, setting a server configuration parameter, `add_missing_from`, to true allowed Cloudberry Database to add an implicit entry to the query's `FROM` clause for each table referenced by the query. This is no longer allowed. +In earlier releases, setting a server configuration parameter, `add_missing_from`, to true allowed Apache Cloudberry to add an implicit entry to the query's `FROM` clause for each table referenced by the query. This is no longer allowed. **Empty SELECT Lists** -The list of output expressions after `SELECT` can be empty, producing a zero-column result table. This is not valid syntax according to the SQL standard. Cloudberry Database allows it to be consistent with allowing zero-column tables. However, an empty list is not allowed when `DISTINCT` is used. +The list of output expressions after `SELECT` can be empty, producing a zero-column result table. This is not valid syntax according to the SQL standard. Apache Cloudberry allows it to be consistent with allowing zero-column tables. However, an empty list is not allowed when `DISTINCT` is used. **Omitting the AS Key Word** -In the SQL standard, the optional key word `AS` can be omitted before an output column name whenever the new column name is a valid column name (that is, not the same as any reserved keyword). Cloudberry Database is slightly more restrictive: `AS` is required if the new column name matches any keyword at all, reserved or not. Recommended practice is to use `AS` or double-quote output column names, to prevent any possible conflict against future keyword additions. +In the SQL standard, the optional key word `AS` can be omitted before an output column name whenever the new column name is a valid column name (that is, not the same as any reserved keyword). Apache Cloudberry is slightly more restrictive: `AS` is required if the new column name matches any keyword at all, reserved or not. Recommended practice is to use `AS` or double-quote output column names, to prevent any possible conflict against future keyword additions. -In `FROM` items, both the standard and Cloudberry Database allow `AS` to be omitted before an alias that is an unreserved keyword. But this is impractical for output column names, because of syntactic ambiguities. +In `FROM` items, both the standard and Apache Cloudberry allow `AS` to be omitted before an alias that is an unreserved keyword. But this is impractical for output column names, because of syntactic ambiguities. **ONLY and Inheritance** @@ -843,9 +843,9 @@ The SQL standard requires parentheses around the table name when writing `ONLY`, SELECT * FROM ONLY (tab1), ONLY (tab2) WHERE ... ``` -Cloudberry Database considers these parentheses to be optional. +Apache Cloudberry considers these parentheses to be optional. -Cloudberry Database allows a trailing `*` to be written to explicitly specify the non-`ONLY` behavior of including child tables. The standard does not allow this. +Apache Cloudberry allows a trailing `*` to be written to explicitly specify the non-`ONLY` behavior of including child tables. The standard does not allow this. Note: The above points apply equally to all SQL commands supporting the `ONLY` option. @@ -855,29 +855,29 @@ The `TABLESAMPLE` clause is currently accepted only on regular tables and materi **Function Calls in FROM** -Cloudberry Database allows you to write a function call directly as a member of the `FROM` list. In the SQL standard it would be necessary to wrap such a function call in a sub-`SELECT`; that is, the syntax `FROM func(...) alias` is approximately equivalent to `FROM LATERAL (SELECT func(...)) alias`. Note that `LATERAL` is considered to be implicit; this is because the standard requires `LATERAL` semantics for an `UNNEST()` item in `FROM`. Cloudberry Database treats `UNNEST()` the same as other set-returning functions. +Apache Cloudberry allows you to write a function call directly as a member of the `FROM` list. In the SQL standard it would be necessary to wrap such a function call in a sub-`SELECT`; that is, the syntax `FROM func(...) alias` is approximately equivalent to `FROM LATERAL (SELECT func(...)) alias`. Note that `LATERAL` is considered to be implicit; this is because the standard requires `LATERAL` semantics for an `UNNEST()` item in `FROM`. Apache Cloudberry treats `UNNEST()` the same as other set-returning functions. **Namespace Available to GROUP BY and ORDER BY** -In the SQL-92 standard, an `ORDER BY` clause may only use output column names or numbers, while a `GROUP BY` clause may only use expressions based on input column names. Cloudberry Database extends each of these clauses to allow the other choice as well (but it uses the standard's interpretation if there is ambiguity). Cloudberry Database also allows both clauses to specify arbitrary expressions. Note that names appearing in an expression are always taken as input-column names, not as output column names. +In the SQL-92 standard, an `ORDER BY` clause may only use output column names or numbers, while a `GROUP BY` clause may only use expressions based on input column names. Apache Cloudberry extends each of these clauses to allow the other choice as well (but it uses the standard's interpretation if there is ambiguity). Apache Cloudberry also allows both clauses to specify arbitrary expressions. Note that names appearing in an expression are always taken as input-column names, not as output column names. -SQL:1999 and later use a slightly different definition which is not entirely upward compatible with SQL-92. In most cases, however, Cloudberry Database interprets an `ORDER BY` or `GROUP BY` expression the same way SQL:1999 does. +SQL:1999 and later use a slightly different definition which is not entirely upward compatible with SQL-92. In most cases, however, Apache Cloudberry interprets an `ORDER BY` or `GROUP BY` expression the same way SQL:1999 does. **Functional Dependencies** -Cloudberry Database recognizes functional dependency (allowing columns to be omitted from `GROUP BY`) only when a table's primary key is included in the `GROUP BY` list. The SQL standard specifies additional conditions that should be recognized. +Apache Cloudberry recognizes functional dependency (allowing columns to be omitted from `GROUP BY`) only when a table's primary key is included in the `GROUP BY` list. The SQL standard specifies additional conditions that should be recognized. **LIMIT and OFFSET** -The clauses `LIMIT` and `OFFSET` are Cloudberry Database-specific syntax, also used by MySQL. The SQL:2008 standard has introduced the clauses `OFFSET .. FETCH {FIRST|NEXT} ...` for the same functionality, as shown above in [LIMIT Clause](#limitclause). This syntax is also used by IBM DB2. (Applications for Oracle frequently use a workaround involving the automatically generated `rownum` column, which is not available in Cloudberry Database, to implement the effects of these clauses.) +The clauses `LIMIT` and `OFFSET` are Apache Cloudberry-specific syntax, also used by MySQL. The SQL:2008 standard has introduced the clauses `OFFSET .. FETCH {FIRST|NEXT} ...` for the same functionality, as shown above in [LIMIT Clause](#limitclause). This syntax is also used by IBM DB2. (Applications for Oracle frequently use a workaround involving the automatically generated `rownum` column, which is not available in Apache Cloudberry, to implement the effects of these clauses.) **FOR NO KEY UPDATE, FOR UPDATE, FOR SHARE, and FOR KEY SHARE** -Although `FOR UPDATE` appears in the SQL standard, the standard allows it only as an option of `DECLARE CURSOR`. Cloudberry Database allows it in any `SELECT` query as well as in sub-`SELECT`s, but this is an extension. The `FOR NO KEY UPDATE`, `FOR SHARE`, and `FOR KEY SHARE` variants, as well as the `NOWAIT` and `SKIP LOCKED` options, do not appear in the standard. +Although `FOR UPDATE` appears in the SQL standard, the standard allows it only as an option of `DECLARE CURSOR`. Apache Cloudberry allows it in any `SELECT` query as well as in sub-`SELECT`s, but this is an extension. The `FOR NO KEY UPDATE`, `FOR SHARE`, and `FOR KEY SHARE` variants, as well as the `NOWAIT` and `SKIP LOCKED` options, do not appear in the standard. **Data-Modifying Statements in WITH** -Cloudberry Database allows `INSERT`, `UPDATE`, and `DELETE` to be used as `WITH` queries. This is not found in the SQL standard. +Apache Cloudberry allows `INSERT`, `UPDATE`, and `DELETE` to be used as `WITH` queries. This is not found in the SQL standard. **Nonstandard Clauses** @@ -889,7 +889,7 @@ The `MATERIALIZED` and `NOT MATERIALIZED` options of `WITH` are extensions of th **Limited Use of STABLE and VOLATILE Functions** -To prevent data from becoming out-of-sync across the segments in Cloudberry Database, any function classified as `STABLE` or `VOLATILE` cannot be run at the segment database level if it contains SQL or modifies the database in any way. See [CREATE FUNCTION](/docs/sql-stmts/create-function.md) for more information. +To prevent data from becoming out-of-sync across the segments in Apache Cloudberry, any function classified as `STABLE` or `VOLATILE` cannot be run at the segment database level if it contains SQL or modifies the database in any way. See [CREATE FUNCTION](/docs/sql-stmts/create-function.md) for more information. ## See also diff --git a/docs/sql-stmts/set-constraints.md b/docs/sql-stmts/set-constraints.md index fe68c16776..ae01e34481 100644 --- a/docs/sql-stmts/set-constraints.md +++ b/docs/sql-stmts/set-constraints.md @@ -30,10 +30,10 @@ The firing of triggers that are declared as "constraint triggers" is also contro ## Notes -Because Cloudberry Database does not require constraint names to be unique within a schema (but only per-table), it is possible that there is more than one match for a specified constraint name. In this case `SET CONSTRAINTS` will act on all matches. For a non-schema-qualified name, once a match or matches have been found in some schema in the search path, schemas appearing later in the path are not searched. +Because Apache Cloudberry does not require constraint names to be unique within a schema (but only per-table), it is possible that there is more than one match for a specified constraint name. In this case `SET CONSTRAINTS` will act on all matches. For a non-schema-qualified name, once a match or matches have been found in some schema in the search path, schemas appearing later in the path are not searched. This command only alters the behavior of constraints within the current transaction. Issuing this outside of a transaction block emits a warning and otherwise has no effect. ## Compatibility -This command complies with the behavior defined in the SQL standard, except for the limitation that, in Cloudberry Database, it does not apply to `NOT NULL` and `CHECK` constraints. Also, Cloudberry Database checks non-deferrable uniqueness constraints immediately, not at end of statement as the standard would suggest. +This command complies with the behavior defined in the SQL standard, except for the limitation that, in Apache Cloudberry, it does not apply to `NOT NULL` and `CHECK` constraints. Also, Apache Cloudberry checks non-deferrable uniqueness constraints immediately, not at end of statement as the standard would suggest. diff --git a/docs/sql-stmts/set-role.md b/docs/sql-stmts/set-role.md index 70db74d7a9..5e8540168a 100644 --- a/docs/sql-stmts/set-role.md +++ b/docs/sql-stmts/set-role.md @@ -70,7 +70,7 @@ SELECT SESSION_USER, CURRENT_USER; ## Compatibility -Cloudberry Database allows identifier syntax (rolename), while the SQL standard requires the role name to be written as a string literal. SQL does not allow this command during a transaction; Cloudberry Database does not make this restriction. The `SESSION` and `LOCAL` modifiers are a Cloudberry Database extension, as is the `RESET` syntax. +Apache Cloudberry allows identifier syntax (rolename), while the SQL standard requires the role name to be written as a string literal. SQL does not allow this command during a transaction; Apache Cloudberry does not make this restriction. The `SESSION` and `LOCAL` modifiers are a Apache Cloudberry extension, as is the `RESET` syntax. ## See also diff --git a/docs/sql-stmts/set-session-authorization.md b/docs/sql-stmts/set-session-authorization.md index fce5facd3f..6019006c70 100644 --- a/docs/sql-stmts/set-session-authorization.md +++ b/docs/sql-stmts/set-session-authorization.md @@ -63,7 +63,7 @@ SELECT SESSION_USER, CURRENT_USER; ## Compatibility -The SQL standard allows some other expressions to appear in place of the literal rolename, but these options are not important in practice. Cloudberry Database allows identifier syntax (rolename), which SQL does not. SQL does not allow this command during a transaction; Cloudberry Database does not make this restriction. The `SESSION` and `LOCAL` modifiers are a Cloudberry Database extension, as is the `RESET` syntax. +The SQL standard allows some other expressions to appear in place of the literal rolename, but these options are not important in practice. Apache Cloudberry allows identifier syntax (rolename), which SQL does not. SQL does not allow this command during a transaction; Apache Cloudberry does not make this restriction. The `SESSION` and `LOCAL` modifiers are a Apache Cloudberry extension, as is the `RESET` syntax. ## See also diff --git a/docs/sql-stmts/set-transaction.md b/docs/sql-stmts/set-transaction.md index 5bfa4c60f5..ad848eaf04 100644 --- a/docs/sql-stmts/set-transaction.md +++ b/docs/sql-stmts/set-transaction.md @@ -31,7 +31,7 @@ The `SET TRANSACTION` command sets the characteristics of the current transactio The available transaction characteristics are the transaction isolation level, the transaction access mode (read/write or read-only), and the deferrable mode. In addition, a snapshot can be selected, though only for the current transaction, not as a session default. :::info -Deferrable transactions require the transaction to be serializable. Cloudberry Database does not support serializable transactions, so including the `DEFERRABLE` clause has no effect. +Deferrable transactions require the transaction to be serializable. Apache Cloudberry does not support serializable transactions, so including the `DEFERRABLE` clause has no effect. ::: The isolation level of a transaction determines what data the transaction can see when other transactions are running concurrently. @@ -39,13 +39,13 @@ The isolation level of a transaction determines what data the transaction can se - **READ COMMITTED** — A statement can only see rows committed before it began. This is the default. - **REPEATABLE READ** — All statements in the current transaction can only see rows committed before the first query or data-modification statement run in the transaction. -The SQL standard defines two additional levels, `READ UNCOMMITTED` and `SERIALIZABLE`. In Cloudberry Database, `READ UNCOMMITTED` is treated as `READ COMMITTED`. If you specify `SERIALIZABLE`, Cloudberry Database falls back to `REPEATABLE READ`. +The SQL standard defines two additional levels, `READ UNCOMMITTED` and `SERIALIZABLE`. In Apache Cloudberry, `READ UNCOMMITTED` is treated as `READ COMMITTED`. If you specify `SERIALIZABLE`, Apache Cloudberry falls back to `REPEATABLE READ`. The transaction isolation level cannot be changed after the first query or data-modification statement (`SELECT`, `INSERT`, `DELETE`, `UPDATE`, `FETCH`, or `COPY`) of a transaction has been run. The transaction access mode determines whether the transaction is read/write or read-only. Read/write is the default. When a transaction is read-only, the following SQL commands are disallowed: `INSERT`, `UPDATE`, `DELETE`, and `COPY FROM` if the table they would write to is not a temporary table; all `CREATE`, `ALTER`, and `DROP` commands; `COMMENT`, `GRANT`, `REVOKE`, `TRUNCATE`; and `EXPLAIN ANALYZE` and `EXECUTE` if the command they would run is among those listed. This is a high-level notion of read-only that does not prevent all writes to disk. -The `DEFERRABLE` transaction property has no effect unless the transaction is also `SERIALIZABLE` and `READ ONLY`. When all of these properties are set on a transaction, the transaction may block when first acquiring its snapshot, after which it is able to run without the normal overhead of a `SERIALIZABLE` transaction and without any risk of contributing to or being cancelled by a serialization failure. This mode is well suited for long-running reports or backups. *Because Cloudberry Database does not support serializable transactions, the `DEFERRABLE` transaction property has no effect in Cloudberry Database.* +The `DEFERRABLE` transaction property has no effect unless the transaction is also `SERIALIZABLE` and `READ ONLY`. When all of these properties are set on a transaction, the transaction may block when first acquiring its snapshot, after which it is able to run without the normal overhead of a `SERIALIZABLE` transaction and without any risk of contributing to or being cancelled by a serialization failure. This mode is well suited for long-running reports or backups. *Because Apache Cloudberry does not support serializable transactions, the `DEFERRABLE` transaction property has no effect in Apache Cloudberry.* The `SET TRANSACTION SNAPSHOT` command allows a new transaction to run with the same snapshot as an existing transaction. The pre-existing transaction must have exported its snapshot with the `pg_export_snapshot()` function. That function returns a snapshot identifier, which must be given to `SET TRANSACTION SNAPSHOT` to specify which snapshot is to be imported. The identifier must be written as a string literal in this command, for example `'000003A1-1'`. `SET TRANSACTION SNAPSHOT` can only be executed at the start of a transaction, before the first query or data-modification statement (`SELECT`, `INSERT`, `DELETE`, `UPDATE`, `FETCH`, or `COPY`) of the transaction. Furthermore, the transaction must already be set to `SERIALIZABLE` or `REPEATABLE READ` isolation level (otherwise, the snapshot would be discarded immediately, since `READ COMMITTED` mode takes a new snapshot for each command). If the importing transaction uses `SERIALIZABLE` isolation level, then the transaction that exported the snapshot must also use that isolation level. Also, a non-read-only serializable transaction cannot import a snapshot from a read-only transaction. @@ -81,13 +81,13 @@ SET TRANSACTION SNAPSHOT '00000003-0000001B-1'; ## Compatibility -These commands are defined in the SQL standard, except for the `DEFERRABLE` transaction mode and the `SET TRANSACTION SNAPSHOT` form, which are Cloudberry Database extensions. +These commands are defined in the SQL standard, except for the `DEFERRABLE` transaction mode and the `SET TRANSACTION SNAPSHOT` form, which are Apache Cloudberry extensions. -`SERIALIZABLE` is the default transaction isolation level in the standard. In Cloudberry Database, the default is `READ COMMITTED`. Due to lack of predicate locking, Cloudberry Database does not fully support the `SERIALIZABLE` level, so it falls back to the `REPEATABLE READ` level when `SERIALIZABLE` is specified. Essentially, a predicate-locking system prevents phantom reads by restricting what is written, whereas a multi-version concurrency control model (MVCC) as used in Cloudberry Database prevents them by restricting what is read. +`SERIALIZABLE` is the default transaction isolation level in the standard. In Apache Cloudberry, the default is `READ COMMITTED`. Due to lack of predicate locking, Apache Cloudberry does not fully support the `SERIALIZABLE` level, so it falls back to the `REPEATABLE READ` level when `SERIALIZABLE` is specified. Essentially, a predicate-locking system prevents phantom reads by restricting what is written, whereas a multi-version concurrency control model (MVCC) as used in Apache Cloudberry prevents them by restricting what is read. -In the SQL standard, there is one other transaction characteristic that can be set with these commands: the size of the diagnostics area. This concept is specific to embedded SQL, and therefore is not implemented in the Cloudberry Database server. +In the SQL standard, there is one other transaction characteristic that can be set with these commands: the size of the diagnostics area. This concept is specific to embedded SQL, and therefore is not implemented in the Apache Cloudberry server. -The SQL standard requires commas between successive transaction_modes, but for historical reasons Cloudberry Database allows the commas to be omitted. +The SQL standard requires commas between successive transaction_modes, but for historical reasons Apache Cloudberry allows the commas to be omitted. ## See also diff --git a/docs/sql-stmts/set.md b/docs/sql-stmts/set.md index b33b3b599b..85d0ec9c8a 100644 --- a/docs/sql-stmts/set.md +++ b/docs/sql-stmts/set.md @@ -4,7 +4,7 @@ title: SET # SET -Changes the value of a run-time Cloudberry Database configuration parameter. +Changes the value of a run-time Apache Cloudberry configuration parameter. ## Synopsis @@ -40,7 +40,7 @@ Specifies that the command takes effect for only the current transaction. After **`configuration_parameter`** -The name of a settable Cloudberry Database run-time configuration parameter. Only parameters classified as *session* can be changed with `SET`. See Server Configuration Parameters for details. +The name of a settable Apache Cloudberry run-time configuration parameter. Only parameters classified as *session* can be changed with `SET`. See Server Configuration Parameters for details. **`value`** @@ -80,7 +80,7 @@ SELECT setseed(value); Set the time zone to your local time zone (that is, server's default value of timezone). -See the [Time Zones](https://www.postgresql.org/docs/12/datatype-datetime.html#DATATYPE-TIMEZONES) section of the PostgreSQL documentation for more information about time zones in Cloudberry Database. +See the [Time Zones](https://www.postgresql.org/docs/12/datatype-datetime.html#DATATYPE-TIMEZONES) section of the PostgreSQL documentation for more information about time zones in Apache Cloudberry. ## Examples @@ -116,7 +116,7 @@ SET TIME ZONE 'Europe/Rome'; ## Compatibility -`SET TIME ZONE` extends syntax defined in the SQL standard. The standard allows only numeric time zone offsets while Cloudberry Database allows more flexible time-zone specifications. All other `SET` features are Cloudberry Database extensions. +`SET TIME ZONE` extends syntax defined in the SQL standard. The standard allows only numeric time zone offsets while Apache Cloudberry allows more flexible time-zone specifications. All other `SET` features are Apache Cloudberry extensions. ## See also diff --git a/docs/sql-stmts/show.md b/docs/sql-stmts/show.md index 9b9b5e912b..b57a971c98 100644 --- a/docs/sql-stmts/show.md +++ b/docs/sql-stmts/show.md @@ -16,7 +16,7 @@ SHOW ALL ## Description -`SHOW` displays the current settings of Cloudberry Database run-time system configuration parameters. You can set these parameters with the `SET` statement, by editing the `postgresql.conf` configuration file of the Cloudberry Database coordinator, through the `PGOPTIONS` environment variable (when using libpq or a libpq-based application), or through command-line flags when starting the Cloudberry Database server. +`SHOW` displays the current settings of Apache Cloudberry run-time system configuration parameters. You can set these parameters with the `SET` statement, by editing the `postgresql.conf` configuration file of the Apache Cloudberry coordinator, through the `PGOPTIONS` environment variable (when using libpq or a libpq-based application), or through command-line flags when starting the Apache Cloudberry server. ## Parameters @@ -29,7 +29,7 @@ Some parameters viewable by `SHOW` are read-only — you can view their values b **`SERVER_VERSION`** -Shows the version number of the Cloudberry Database server. +Shows the version number of the Apache Cloudberry server. **`SERVER_ENCODING`** @@ -94,7 +94,7 @@ SHOW ALL; ## Compatibility -The `SHOW` command is a Cloudberry Database extension. +The `SHOW` command is a Apache Cloudberry extension. ## See also diff --git a/docs/sql-stmts/start-transaction.md b/docs/sql-stmts/start-transaction.md index a9f5fee73d..2ffcaa9986 100644 --- a/docs/sql-stmts/start-transaction.md +++ b/docs/sql-stmts/start-transaction.md @@ -29,11 +29,11 @@ Refer to [SET TRANSACTION](/docs/sql-stmts/set-transaction.md) for information o ## Compatibility -In the standard, it is not necessary to issue `START TRANSACTION` to start a transaction block: any SQL command implicitly begins a block. Cloudberry Database's behavior can be seen as implicitly issuing a `COMMIT` after each command that does not follow `START TRANSACTION` (or `BEGIN`), and it is therefore often called 'autocommit'. Other relational database systems may offer an autocommit feature as a convenience. +In the standard, it is not necessary to issue `START TRANSACTION` to start a transaction block: any SQL command implicitly begins a block. Apache Cloudberry's behavior can be seen as implicitly issuing a `COMMIT` after each command that does not follow `START TRANSACTION` (or `BEGIN`), and it is therefore often called 'autocommit'. Other relational database systems may offer an autocommit feature as a convenience. -The `DEFERRABLE` transaction_mode is a Cloudberry Database language extension. +The `DEFERRABLE` transaction_mode is a Apache Cloudberry language extension. -The SQL standard requires commas between successive transaction_modes, but for historical reasons Cloudberry Database allows the commas to be omitted. +The SQL standard requires commas between successive transaction_modes, but for historical reasons Apache Cloudberry allows the commas to be omitted. See also the compatibility section of [SET TRANSACTION](/docs/sql-stmts/set-transaction.md). diff --git a/docs/sql-stmts/truncate.md b/docs/sql-stmts/truncate.md index 1aa087823f..80a18c8117 100644 --- a/docs/sql-stmts/truncate.md +++ b/docs/sql-stmts/truncate.md @@ -7,7 +7,7 @@ title: TRUNCATE Empties a table or set of tables of all rows. :::info -Cloudberry Database does not enforce referential integrity syntax (foreign key constraints). `TRUNCATE` truncates a table that is referenced in a foreign key constraint even if the `CASCADE` option is omitted. +Apache Cloudberry does not enforce referential integrity syntax (foreign key constraints). `TRUNCATE` truncates a table that is referenced in a foreign key constraint even if the `CASCADE` option is omitted. ::: ## Synopsis @@ -39,11 +39,11 @@ Do not change the values of sequences. This is the default. **`CASCADE`** -Because this key word applies to foreign key references (which are not supported in Cloudberry Database) it has no effect. +Because this key word applies to foreign key references (which are not supported in Apache Cloudberry) it has no effect. **`RESTRICT`** -Because this key word applies to foreign key references (which are not supported in Cloudberry Database) it has no effect. +Because this key word applies to foreign key references (which are not supported in Apache Cloudberry) it has no effect. ## Notes diff --git a/docs/sql-stmts/unlisten.md b/docs/sql-stmts/unlisten.md index 5d9a245279..c1c11fc6c5 100644 --- a/docs/sql-stmts/unlisten.md +++ b/docs/sql-stmts/unlisten.md @@ -14,7 +14,7 @@ UNLISTEN { | * } ## Description -`UNLISTEN` is used to remove an existing registration for `NOTIFY` events. `UNLISTEN` cancels any existing registration of the current Cloudberry Database session as a listener on the notification channel named channel. The special wildcard `*` cancels all listener registrations for the current session. +`UNLISTEN` is used to remove an existing registration for `NOTIFY` events. `UNLISTEN` cancels any existing registration of the current Apache Cloudberry session as a listener on the notification channel named channel. The special wildcard `*` cancels all listener registrations for the current session. [NOTIFY](/docs/sql-stmts/notify.md) contains a more extensive discussion of the use of `LISTEN` and `NOTIFY`. diff --git a/docs/sql-stmts/update.md b/docs/sql-stmts/update.md index bd776f6925..9488985584 100644 --- a/docs/sql-stmts/update.md +++ b/docs/sql-stmts/update.md @@ -26,11 +26,11 @@ UPDATE [ ONLY ] [ [ AS ] ] There are two ways to modify a table using information contained in other tables in the database: using sub-selects, or specifying additional tables in the `FROM` clause. Which technique is more appropriate depends on the specific circumstances. -The optional `RETURNING` clause causes `UPDATE` to compute and return value(s) based on each row actually updated. Cloudberry Database can compute any expression using the table's columns, and/or columns of other tables mentioned in `FROM`. The new (post-update) values of the table's columns are used. The syntax of the `RETURNING` list is identical to that of the output list of `SELECT`. +The optional `RETURNING` clause causes `UPDATE` to compute and return value(s) based on each row actually updated. Apache Cloudberry can compute any expression using the table's columns, and/or columns of other tables mentioned in `FROM`. The new (post-update) values of the table's columns are used. The syntax of the `RETURNING` list is identical to that of the output list of `SELECT`. You must have the `UPDATE` privilege on the table, or at least on the column(s) that are listed to be updated. You must also have the `SELECT` privilege on any column whose values are read in the expressions or condition. -> **Note** As the default, Cloudberry Database acquires an `EXCLUSIVE` lock on tables for `UPDATE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for `UPDATE` operations on heap tables is `ROW EXCLUSIVE`. +> **Note** As the default, Apache Cloudberry acquires an `EXCLUSIVE` lock on tables for `UPDATE` operations on heap tables. When the Global Deadlock Detector is enabled, the lock mode for `UPDATE` operations on heap tables is `ROW EXCLUSIVE`. ## Parameters @@ -148,7 +148,7 @@ temp_lo+15, DEFAULT) WHERE city = 'San Francisco' AND date = '2016-07-03'; ``` -Increment the sales count of the salesperson who manages the account for Acme Corporation, using the `FROM` clause syntax (assuming both tables being joined are distributed in Cloudberry Database on the `id` column): +Increment the sales count of the salesperson who manages the account for Acme Corporation, using the `FROM` clause syntax (assuming both tables being joined are distributed in Apache Cloudberry on the `id` column): ```sql UPDATE employees SET sales_count = sales_count + 1 FROM @@ -208,11 +208,11 @@ COMMIT; ## Compatibility -This command conforms to the SQL standard, except that the `FROM` and `RETURNING` clauses are Cloudberry Database extensions, as is the ability to use `WITH` with `UPDATE`. +This command conforms to the SQL standard, except that the `FROM` and `RETURNING` clauses are Apache Cloudberry extensions, as is the ability to use `WITH` with `UPDATE`. -Some other database systems offer a `FROM` option in which the target table is supposed to be listed again within `FROM`. That is not how Cloudberry Database interprets `FROM`. Be careful when porting applications that use this extension. +Some other database systems offer a `FROM` option in which the target table is supposed to be listed again within `FROM`. That is not how Apache Cloudberry interprets `FROM`. Be careful when porting applications that use this extension. -According to the standard, the source value for a parenthesized sub-list of target column names can be any row-valued expression yielding the correct number of columns. Cloudberry Database only allows the source value to be a [row constructor](https://www.postgresql.org/docs/12/sql-expressions.html#SQL-SYNTAX-ROW-CONSTRUCTORS) or a sub-`SELECT`. You can specify an individual column's updated value as `DEFAULT` in the row-constructor case, but not inside a sub-`SELECT`. +According to the standard, the source value for a parenthesized sub-list of target column names can be any row-valued expression yielding the correct number of columns. Apache Cloudberry only allows the source value to be a [row constructor](https://www.postgresql.org/docs/12/sql-expressions.html#SQL-SYNTAX-ROW-CONSTRUCTORS) or a sub-`SELECT`. You can specify an individual column's updated value as `DEFAULT` in the row-constructor case, but not inside a sub-`SELECT`. ## See also diff --git a/docs/sql-stmts/vacuum.md b/docs/sql-stmts/vacuum.md index c93bc30201..15b654b36e 100644 --- a/docs/sql-stmts/vacuum.md +++ b/docs/sql-stmts/vacuum.md @@ -21,7 +21,7 @@ VACUUM AO_AUX_ONLY ## Description -`VACUUM` reclaims storage occupied by deleted tuples. In normal Cloudberry Database operation, tuples that are deleted or obsoleted by an update are not physically removed from their table; they remain present on disk until a `VACUUM` is done. Therefore it is necessary to do `VACUUM` periodically, especially on frequently-updated tables. +`VACUUM` reclaims storage occupied by deleted tuples. In normal Apache Cloudberry operation, tuples that are deleted or obsoleted by an update are not physically removed from their table; they remain present on disk until a `VACUUM` is done. Therefore it is necessary to do `VACUUM` periodically, especially on frequently-updated tables. With no parameter, `VACUUM` processes every table in the current database. With a parameter, `VACUUM` processes only that table. @@ -33,7 +33,7 @@ With append-optimized tables, `VACUUM` compacts a table by first vacuuming the i `VACUUM FULL` does more extensive processing, including moving of tuples across blocks to try to compact the table to the minimum number of disk blocks. This form is much slower and requires an Access Exclusive lock on each table while it is being processed. The Access Exclusive lock guarantees that the holder is the only transaction accessing the table in any way. -When the option list is surrounded by parentheses, the options can be written in any order. Without parentheses, options must be specified in exactly the order shown above. The parenthesized syntax was added in Cloudberry Database 6.0; the unparenthesized syntax is deprecated. +When the option list is surrounded by parentheses, the options can be written in any order. Without parentheses, options must be specified in exactly the order shown above. The parenthesized syntax was added in Apache Cloudberry 6.0; the unparenthesized syntax is deprecated. > **Important**: For information on the use of `VACUUM`, `VACUUM FULL`, and `VACUUM ANALYZE`, see [Notes](#notes). @@ -91,9 +91,9 @@ The name of a specific column to analyze. Defaults to all columns. If a column l `VACUUM` cannot be run inside a transaction block. -Vacuum active databases frequently (at least nightly), in order to remove expired rows. After adding or deleting a large number of rows, running the `VACUUM ANALYZE` command for the affected table might be useful. This updates the system catalogs with the results of all recent changes, and allows the Cloudberry Database query optimizer to make better choices in planning queries. +Vacuum active databases frequently (at least nightly), in order to remove expired rows. After adding or deleting a large number of rows, running the `VACUUM ANALYZE` command for the affected table might be useful. This updates the system catalogs with the results of all recent changes, and allows the Apache Cloudberry query optimizer to make better choices in planning queries. -> **Important** PostgreSQL has a separate optional server process called the *autovacuum daemon*, whose purpose is to automate the execution of `VACUUM` and `ANALYZE` commands. Cloudberry Database enables the autovacuum daemon to perform `VACUUM` operations only on the Cloudberry Database template database `template0`. Autovacuum is enabled for `template0` because connections are not allowed to `template0`. The autovacuum daemon performs `VACUUM` operations on `template0` to manage transaction IDs (XIDs) and help avoid transaction ID wraparound issues in `template0`. +> **Important** PostgreSQL has a separate optional server process called the *autovacuum daemon*, whose purpose is to automate the execution of `VACUUM` and `ANALYZE` commands. Apache Cloudberry enables the autovacuum daemon to perform `VACUUM` operations only on the Apache Cloudberry template database `template0`. Autovacuum is enabled for `template0` because connections are not allowed to `template0`. The autovacuum daemon performs `VACUUM` operations on `template0` to manage transaction IDs (XIDs) and help avoid transaction ID wraparound issues in `template0`. Manual `VACUUM` operations must be performed in user-defined databases to manage transaction IDs (XIDs) in those databases. @@ -101,7 +101,7 @@ Manual `VACUUM` operations must be performed in user-defined databases to manage `VACUUM` commands skip external and foreign tables. -`VACUUM FULL` reclaims all expired row space, however it requires an exclusive lock on each table being processed, is a very expensive operation, and might take a long time to complete on large, distributed Cloudberry Database tables. Perform `VACUUM FULL` operations during database maintenance periods. +`VACUUM FULL` reclaims all expired row space, however it requires an exclusive lock on each table being processed, is a very expensive operation, and might take a long time to complete on large, distributed Apache Cloudberry tables. Perform `VACUUM FULL` operations during database maintenance periods. The `FULL` option is not recommended for routine use, but might be useful in special cases. An example is when you have deleted or updated most of the rows in a table and would like the table to physically shrink to occupy less disk space and allow faster table scans. `VACUUM FULL` will usually shrink the table more than a plain `VACUUM` would. diff --git a/docs/sql-stmts/values.md b/docs/sql-stmts/values.md index f43c0016f3..25ec1a1356 100644 --- a/docs/sql-stmts/values.md +++ b/docs/sql-stmts/values.md @@ -93,7 +93,7 @@ target, increase) WHERE employees.depno = v.depno AND employees.sales >= v.target; ``` -Note that an `AS` clause is required when `VALUES` is used in a `FROM` clause, just as is true for `SELECT`. It is not required that the `AS` clause specify names for all the columns, but it is good practice to do so. The default column names for `VALUES` are `column1`, `column2`, etc. in Cloudberry Database, but these names might be different in other database systems. +Note that an `AS` clause is required when `VALUES` is used in a `FROM` clause, just as is true for `SELECT`. It is not required that the `AS` clause specify names for all the columns, but it is good practice to do so. The default column names for `VALUES` are `column1`, `column2`, etc. in Apache Cloudberry, but these names might be different in other database systems. When `VALUES` is used in `INSERT`, the values are all automatically coerced to the data type of the corresponding destination column. When it is used in other contexts, it may be necessary to specify the correct data type. If the entries are all quoted literal constants, coercing the first is sufficient to determine the assumed type for all: @@ -107,7 +107,7 @@ SELECT * FROM machines WHERE ip_address IN ## Compatibility -`VALUES` conforms to the SQL standard. `LIMIT` and `OFFSET` are Cloudberry Database extensions; see also under [SELECT](/docs/sql-stmts/select.md). +`VALUES` conforms to the SQL standard. `LIMIT` and `OFFSET` are Apache Cloudberry extensions; see also under [SELECT](/docs/sql-stmts/select.md). ## See also diff --git a/docs/start-and-stop-cbdb-database.md b/docs/start-and-stop-cbdb-database.md index 302cda653b..0fdb4e3f55 100644 --- a/docs/start-and-stop-cbdb-database.md +++ b/docs/start-and-stop-cbdb-database.md @@ -2,13 +2,13 @@ title: Start and Stop Database --- -# Start and Stop Cloudberry Database +# Start and Stop Apache Cloudberry -In a Cloudberry Database DBMS, the database server instances (the coordinator and all segments) are started or stopped across all of the hosts in the system in such a way that they can work together as a unified DBMS. +In a Apache Cloudberry DBMS, the database server instances (the coordinator and all segments) are started or stopped across all of the hosts in the system in such a way that they can work together as a unified DBMS. -Because a Cloudberry Database system is distributed across many machines, the process for starting and stopping a Cloudberry Database system is different than the process for starting and stopping a regular PostgreSQL DBMS. +Because a Apache Cloudberry system is distributed across many machines, the process for starting and stopping a Apache Cloudberry system is different than the process for starting and stopping a regular PostgreSQL DBMS. -Use the `gpstart` and `gpstop` utilities to start and stop Cloudberry Database, respectively. These utilities are located in the `$GPHOME/bin` directory on your Cloudberry Database coordinator host. +Use the `gpstart` and `gpstop` utilities to start and stop Apache Cloudberry, respectively. These utilities are located in the `$GPHOME/bin` directory on your Apache Cloudberry coordinator host. :::tip Do not issue a `kill` command to end any Postgres process. Instead, use the database command `pg_cancel_backend()`. @@ -16,25 +16,25 @@ Do not issue a `kill` command to end any Postgres process. Instead, use the data Issuing a `kill -9` or `kill -11` can introduce database corruption and prevent root cause analysis from being performed. -## Start Cloudberry Database +## Start Apache Cloudberry -Start an initialized Cloudberry Database system by running the `gpstart` utility on the coordinator instance. +Start an initialized Apache Cloudberry system by running the `gpstart` utility on the coordinator instance. -Use the `gpstart` utility to start a Cloudberry Database system that has already been initialized by the `gpinitsystem` utility, but has been stopped by the `gpstop` utility. The `gpstart` utility starts Cloudberry Database by starting all the `postgres` instances on the Cloudberry Database cluster. `gpstart` orchestrates this process and performs the process in parallel. +Use the `gpstart` utility to start a Apache Cloudberry system that has already been initialized by the `gpinitsystem` utility, but has been stopped by the `gpstop` utility. The `gpstart` utility starts Apache Cloudberry by starting all the `postgres` instances on the Apache Cloudberry cluster. `gpstart` orchestrates this process and performs the process in parallel. -Run `gpstart` on the coordinator host to start Cloudberry Database: +Run `gpstart` on the coordinator host to start Apache Cloudberry: ```shell $ gpstart ``` -## Restart Cloudberry Database +## Restart Apache Cloudberry -Stop the Cloudberry Database system and then restart it. +Stop the Apache Cloudberry system and then restart it. -The `gpstop` utility with the `-r` option can stop and then restart Cloudberry Database after the shutdown completes. +The `gpstop` utility with the `-r` option can stop and then restart Apache Cloudberry after the shutdown completes. -To restart Cloudberry Database, enter the following command on the coordinator host: +To restart Apache Cloudberry, enter the following command on the coordinator host: ```shell $ gpstop -r @@ -42,11 +42,11 @@ $ gpstop -r ## Reload configuration file changes only -Reload changes to Cloudberry Database configuration files without interrupting the system. +Reload changes to Apache Cloudberry configuration files without interrupting the system. The `gpstop` utility can reload changes to the `pg_hba.conf` configuration file and to *runtime* parameters in the coordinator `postgresql.conf` file without service interruption. Active sessions pick up changes when they reconnect to the database. Many server configuration parameters require a full system restart (`gpstop -r`) to activate. -Reload configuration file changes without shutting down the Cloudberry Database system using the `gpstop` utility: +Reload configuration file changes without shutting down the Apache Cloudberry system using the `gpstop` utility: ```shell $ gpstop -u @@ -79,31 +79,31 @@ Start only the coordinator to perform maintenance or administrative tasks withou Incorrect use of maintenance mode connections can result in an inconsistent system state. It is recommended that Technical Support perform this operation. ::: -## Stop Cloudberry Database +## Stop Apache Cloudberry -The `gpstop` utility stops or restarts your Cloudberry Database system and always runs on the coordinator host. When activated, `gpstop` stops all `postgres` processes in the system, including the coordinator and all segment instances. The `gpstop` utility uses a default of up to multiple parallel worker threads to bring down the Postgres instances that make up the Cloudberry Database cluster. To stop Cloudberry Database immediately, use the fast mode. +The `gpstop` utility stops or restarts your Apache Cloudberry system and always runs on the coordinator host. When activated, `gpstop` stops all `postgres` processes in the system, including the coordinator and all segment instances. The `gpstop` utility uses a default of up to multiple parallel worker threads to bring down the Postgres instances that make up the Apache Cloudberry cluster. To stop Apache Cloudberry immediately, use the fast mode. :::tip Immediate shut down mode is not recommended. This mode stops all database processes without allowing the database server to complete transaction processing or clean up any temporary or in-process work files. ::: -- To stop Cloudberry Database: +- To stop Apache Cloudberry: ```shell $ gpstop ``` -- To stop Cloudberry Database in fast mode: +- To stop Apache Cloudberry in fast mode: ```shell $ gpstop -M fast ``` - By default, you are not allowed to shut down Cloudberry Database if there are any client connections to the database. Use the `-M fast` option to roll back all in progress transactions and terminate any connections before shutting down. + By default, you are not allowed to shut down Apache Cloudberry if there are any client connections to the database. Use the `-M fast` option to roll back all in progress transactions and terminate any connections before shutting down. ## Stop client processes -Cloudberry Database launches a new backend process for each client connection. A Cloudberry Database user with `SUPERUSER` privileges can cancel and terminate these client backend processes. +Apache Cloudberry launches a new backend process for each client connection. A Apache Cloudberry user with `SUPERUSER` privileges can cancel and terminate these client backend processes. Canceling a backend process with the `pg_cancel_backend()` function ends a specific queued or active client query. Terminating a backend process with the `pg_terminate_backend()` function terminates a client connection to a database. @@ -117,7 +117,7 @@ The `pg_terminate_backend()` function has two similar signatures: - `pg_terminate_backend( pid int4 )` - `pg_terminate_backend( pid int4, msg text )` -If you provide a `msg`, Cloudberry Database includes the text in the cancel message returned to the client. `msg` is limited to 128 bytes; Cloudberry Database truncates anything longer. +If you provide a `msg`, Apache Cloudberry includes the text in the cancel message returned to the client. `msg` is limited to 128 bytes; Apache Cloudberry truncates anything longer. The `pg_cancel_backend()` and `pg_terminate_backend()` functions return `true` if successful, and `false` otherwise. diff --git a/docs/sys-admin/backup-and-restore/index.md b/docs/sys-admin/backup-and-restore/index.md index 8aa2f2b0b4..f36dfd6e64 100644 --- a/docs/sys-admin/backup-and-restore/index.md +++ b/docs/sys-admin/backup-and-restore/index.md @@ -4,11 +4,11 @@ title: Backup and Restore Overview # Backup and Restore Overview -Cloudberry Database offers both parallel and non-parallel methods for database backups and restores. Parallel operations handle large systems efficiently because each segment host writes data to its local disk at the same time. Non-parallel operations, however, transfer all data over the network to the coordinator, which then writes it to its storage. This method not only concentrates I/O on a single host but also requires the coordinator to have enough local disk space for the entire database. +Apache Cloudberry offers both parallel and non-parallel methods for database backups and restores. Parallel operations handle large systems efficiently because each segment host writes data to its local disk at the same time. Non-parallel operations, however, transfer all data over the network to the coordinator, which then writes it to its storage. This method not only concentrates I/O on a single host but also requires the coordinator to have enough local disk space for the entire database. ## Parallel backup with `gpbackup` and `gprestore` -Cloudberry Database provides `gpbackup` and `gprestore` for parallel backup and restore utilities. `gpbackup` uses table-level `ACCESS SHARE` locks instead of `EXCLUSIVE` locks on the `pg_class` catalog table. This enables you to execute DDL statements such as `CREATE`, `ALTER`, `DROP`, and `TRUNCATE` during backups, as long as these statements do not target the current backup set. +Apache Cloudberry provides `gpbackup` and `gprestore` for parallel backup and restore utilities. `gpbackup` uses table-level `ACCESS SHARE` locks instead of `EXCLUSIVE` locks on the `pg_class` catalog table. This enables you to execute DDL statements such as `CREATE`, `ALTER`, `DROP`, and `TRUNCATE` during backups, as long as these statements do not target the current backup set. Backup files created with `gpbackup` are designed to provide future capabilities for restoring individual database objects along with their dependencies, such as functions and required user-defined data types. @@ -18,13 +18,13 @@ For details about backup and restore using `gpbackup` and `gprestore`, see [Perf You can also use the PostgreSQL non-parallel backup utilitiesm`pg_dump` and `pg_dumpall` to create a single dump file on the coordinator host that contains all data from all active segments. -The PostgreSQL non-parallel utilities should be used only for special cases. They are much slower than using `gpbackup` and `gprestore` because all of the data must pass through the coordinator. In addition, it is often the case that the coordinator host has insufficient disk space to save a backup of an entire distributed Cloudberry Database. +The PostgreSQL non-parallel utilities should be used only for special cases. They are much slower than using `gpbackup` and `gprestore` because all of the data must pass through the coordinator. In addition, it is often the case that the coordinator host has insufficient disk space to save a backup of an entire distributed Apache Cloudberry. -The `pg_restore` utility requires compressed dump files created by `pg_dump` or `pg_dumpall`. Before starting the restore, you should modify the `CREATE TABLE` statements in the dump files to include the Cloudberry Database `DISTRIBUTED` clause. If you do not include the `DISTRIBUTED` clause, Cloudberry Database assigns default values, which might not be optimal. +The `pg_restore` utility requires compressed dump files created by `pg_dump` or `pg_dumpall`. Before starting the restore, you should modify the `CREATE TABLE` statements in the dump files to include the Apache Cloudberry `DISTRIBUTED` clause. If you do not include the `DISTRIBUTED` clause, Apache Cloudberry assigns default values, which might not be optimal. To perform a non-parallel restore using parallel backup files, you can copy the backup files from each segment host to the coordinator host, and then load them through the coordinator. -Another non-parallel method for backing up Cloudberry Database data is to use the `COPY TO` SQL command to copy all or a portion of a table out of the database to a delimited text file on the coordinator host. +Another non-parallel method for backing up Apache Cloudberry data is to use the `COPY TO` SQL command to copy all or a portion of a table out of the database to a delimited text file on the coordinator host. ```mdx-code-block import DocCardList from '@theme/DocCardList'; diff --git a/docs/sys-admin/backup-and-restore/perform-full-backup-and-restore.md b/docs/sys-admin/backup-and-restore/perform-full-backup-and-restore.md index 29ddf07a5a..a01f402774 100644 --- a/docs/sys-admin/backup-and-restore/perform-full-backup-and-restore.md +++ b/docs/sys-admin/backup-and-restore/perform-full-backup-and-restore.md @@ -4,36 +4,36 @@ title: Perform Full Backup and Restore # Perform Full Backup and Restore -Cloudberry Database supports backing up and restoring the full database in parallel. Parallel operations scale regardless of the number of segments in your system, because segment hosts each write their data to local disk storage at the same time. +Apache Cloudberry supports backing up and restoring the full database in parallel. Parallel operations scale regardless of the number of segments in your system, because segment hosts each write their data to local disk storage at the same time. -`gpbackup` and `gprestore` are Cloudberry Database command-line utilities that create and restore backup sets for Cloudberry Database. By default, `gpbackup` stores only the object metadata files and DDL files for a backup in the Cloudberry Database coordinator data directory. Cloudberry Database segments use the `COPY ... ON SEGMENT` command to store their data for backed-up tables in compressed CSV data files, located in each segment's backups directory. +`gpbackup` and `gprestore` are Apache Cloudberry command-line utilities that create and restore backup sets for Apache Cloudberry. By default, `gpbackup` stores only the object metadata files and DDL files for a backup in the Apache Cloudberry coordinator data directory. Apache Cloudberry segments use the `COPY ... ON SEGMENT` command to store their data for backed-up tables in compressed CSV data files, located in each segment's backups directory. -The backup metadata files contain all of the information that `gprestore` needs to restore a full backup set in parallel. Each `gpbackup` task uses a single transaction in Cloudberry Database. During this transaction, metadata is backed up on the coordinator host, and data for each table on each segment host is written to CSV backup files using `COPY ... ON SEGMENT` commands in parallel. The backup process acquires an `ACCESS SHARE` lock on each table that is backed up. +The backup metadata files contain all of the information that `gprestore` needs to restore a full backup set in parallel. Each `gpbackup` task uses a single transaction in Apache Cloudberry. During this transaction, metadata is backed up on the coordinator host, and data for each table on each segment host is written to CSV backup files using `COPY ... ON SEGMENT` commands in parallel. The backup process acquires an `ACCESS SHARE` lock on each table that is backed up. ## Install the `gpbackup` and `gprestore` utilities Before installing the `gpbackup` and `gprestore` utilities, make sure that you have the [Golang](https://golang.org/doc/) (v1.11 or later) installed and that you have set the [Go `PATH` environment variable](https://go.dev/doc/install). -1. Pull the `cloudberrydb/gpbackup` GitHub repository to the target machine. +1. Pull the `apache/cloudberry-gpbackup` GitHub repository to the target machine. ```bash - go install github.com/cloudberrydb/gpbackup@latest + go install github.com/apache/cloudberry-gpbackup@latest ``` - The repository is placed in `$GOPATH/pkg/mod/github.com/cloudberrydb/gpbackup`. + The repository is placed in `$GOPATH/pkg/mod/github.com/apache/cloudberry-gpbackup`. -2. Enter the `cloudberrydb/gpbackup` directory. Then, build and install the source code: +2. Enter the `apache/cloudberry-gpbackup` directory. Then, build and install the source code: ```bash - cd <$GOPATH/pkg/mod/github.com/cloudberrydb/gpbackup> + cd <$GOPATH/pkg/mod/github.com/apache/cloudberry-gpbackup> make depend make build ``` You might encounter the `fatal: Not a git repository (or any of the parent directories): .git` prompt after running `make depend`. Ignore this prompt, because this does not affect the building. - The `build` target will put the `gpbackup` and `gprestore` binaries in `$HOME/go/bin`. This operation will also try to copy `gpbackup_helper` to the Cloudberry Database segments (by retrieving hostnames from `gp_segment_configuration`). + The `build` target will put the `gpbackup` and `gprestore` binaries in `$HOME/go/bin`. This operation will also try to copy `gpbackup_helper` to the Apache Cloudberry segments (by retrieving hostnames from `gp_segment_configuration`). 3. Check whether the build is successful by checking whether your `$HOME/go/bin` directory contains `gpback`, `gprestore`, and `gpbackup_helper`. @@ -50,7 +50,7 @@ variable](https://go.dev/doc/install). ## Back up the full database -To perform a complete backup of a database, as well as Cloudberry Database system metadata, use the command: +To perform a complete backup of a database, as well as Apache Cloudberry system metadata, use the command: ```bash gpbackup --dbname @@ -78,12 +78,12 @@ $ gpbackup --dbname test_04 20240108:17:17:18 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Pre-data metadata metadata backup complete 20240108:17:17:18 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Writing post-data metadata 20240108:17:17:18 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Post-data metadata backup complete -20240108:17:17:19 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Found neither /usr/local/cloudberry-db-1.0.0/bin/gp_email_contacts.yaml nor /home/gpadmin//gp_email_contacts.yaml +20240108:17:17:19 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Found neither /usr/local/cloudberry-1.0.0/bin/gp_email_contacts.yaml nor /home/gpadmin//gp_email_contacts.yaml 20240108:17:17:19 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Email containing gpbackup report /data0/coordinator/gpseg-1/backups/20240108/20240108171718/gpbackup_20240108171718_report will not be sent 20240108:17:17:19 gpbackup:gpadmin:cbdb-coordinator:001945-[INFO]:-Backup completed successfully ``` -The above command creates a file that contains global and database-specific metadata on the Cloudberry Database coordinator host in the default directory, `$COORDINATOR_DATA_DIRECTORY/backups///`. For example: +The above command creates a file that contains global and database-specific metadata on the Apache Cloudberry coordinator host in the default directory, `$COORDINATOR_DATA_DIRECTORY/backups///`. For example: ```bash ls $COORDINATOR_DATA_DIRECTORY/backups/20240108/20240108171718 @@ -146,7 +146,7 @@ Pre-data objects restored: 3 / 3 [=================================] 100.00% 0s 20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Pre-data metadata restore complete 20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Restoring post-data metadata 20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Post-data metadata restore complete -20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Found neither /usr/local/cloudberry-db-1.0.0/bin/gp_email_contacts.yaml nor /home/gpadmin//gp_email_contacts.yaml +20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Found neither /usr/local/cloudberry-1.0.0/bin/gp_email_contacts.yaml nor /home/gpadmin//gp_email_contacts.yaml 20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Email containing gprestore report /data0/coordinator/gpseg-1/backups/20240108/20240108171718/gprestore_20240108171718_20240108174226_report will not be sent 20240108:17:42:26 gprestore:gpadmin:cbdb-coordinator:004115-[INFO]:-Restore completed successfully ``` @@ -162,7 +162,7 @@ $ gprestore --backup-dir /home/gpadmin/backups/ --timestamp 20240109102646 --cre 20240109:10:33:17 gprestore:gpadmin:cbdb-coordinator:017112-[INFO]:-Restore completed successfully ``` -`gprestore` does not attempt to restore global metadata for the Cloudberry Database system by default. If this is required, include the `--with-globals` argument. +`gprestore` does not attempt to restore global metadata for the Apache Cloudberry system by default. If this is required, include the `--with-globals` argument. By default, `gprestore` uses 1 connection to restore table data and metadata. If you have a large backup set, you can improve performance of the restore by increasing the number of parallel connections with the `--jobs` option. For example: @@ -250,7 +250,7 @@ After creating a backup set with `gpbackup`, you can filter the schemas and tabl - The tables that you attempt to restore must not already exist in the database. - If you attempt to restore a schema or table that does not exist in the backup set, the `gprestore` does not execute. - If you use the `--include-schema` option, `gprestore` cannot restore objects that have dependencies on multiple schemas. -- If you use the `--include-table-file` option, `gprestore` does not create roles or set the owner of the tables. The utility restores table indexes and rules. Triggers are also restored but are not supported in Cloudberry Database. +- If you use the `--include-table-file` option, `gprestore` does not create roles or set the owner of the tables. The utility restores table indexes and rules. Triggers are also restored but are not supported in Apache Cloudberry. - The file that you specify with `--include-table-file` cannot include a leaf partition name, as it can when you specify this option with `gpbackup`. If you specified leaf partitions in the backup set, specify the partitioned table to restore the leaf partition data. When restoring a backup set that contains data from some leaf partitions of a partitioned table, the partitioned table is restored along with the data for the leaf partitions. For example, you create a backup with the `gpbackup` option `--include-table-file` and the text file lists some leaf partitions of a partitioned table. Restoring the backup creates the partitioned table and restores the data only for the leaf partitions listed in the file. @@ -318,7 +318,7 @@ When leaf partitions are backed up, the leaf partition data is backed up along w When performing a backup or restore operation, `gpbackup` and `gprestore` generate a report file that contains the detailed information of the operations. When email notification is configured, the email sent contains the contents of the report file. For information about email notification, see [Configure email notifications](#configure-email-notifications). -The report file is placed in the Cloudberry Database coordinator backup directory. The report file name contains the timestamp of the operation. Thes following are the formats of the `gpbackup` and `gprestore` report file names. +The report file is placed in the Apache Cloudberry coordinator backup directory. The report file name contains the timestamp of the operation. Thes following are the formats of the `gpbackup` and `gprestore` report file names. ``` gpbackup__report @@ -332,7 +332,7 @@ gpbackup_20240109111719_report gprestore_20240109111719_20240109112545_report ``` -This backup directory on a Cloudberry Database coordinator host contains both a `gpbackup` and `gprestore` report file. +This backup directory on a Apache Cloudberry coordinator host contains both a `gpbackup` and `gprestore` report file. ```bash $ ls -l /data0/coordinator/gpseg-1/backups/20240109/20240109111719/ @@ -388,7 +388,7 @@ gpbackup 20200925140738 on mdw completed: Failure The email contains summary information about the operation including options, duration, and number of objects backed up or restored. For information about the contents of a notification email, see Report Files. :::tip -The UNIX mail utility must be running on the Cloudberry Database host and must be configured to allow the Cloudberry Database superuser (`gpadmin`) to send email. Also ensure that the mail program executable is locatable via the `gpadmin` user's `$PATH`. +The UNIX mail utility must be running on the Apache Cloudberry host and must be configured to allow the Apache Cloudberry superuser (`gpadmin`) to send email. Also ensure that the mail program executable is locatable via the `gpadmin` user's `$PATH`. ::: ### gpbackup and gprestore email file format diff --git a/docs/sys-admin/backup-and-restore/perform-incremental-backup-and-restore.md b/docs/sys-admin/backup-and-restore/perform-incremental-backup-and-restore.md index 13b7f3b4f0..6b1cc02706 100644 --- a/docs/sys-admin/backup-and-restore/perform-incremental-backup-and-restore.md +++ b/docs/sys-admin/backup-and-restore/perform-incremental-backup-and-restore.md @@ -75,7 +75,7 @@ You created incremental backups with this command: gpbackup --dbname mytest --backup-dir /mybackup --leaf-partition-data --incremental ``` -When you specify the `--backup-dir` option, the backups are created in the `/mybackup` directory on each Cloudberry Database host. +When you specify the `--backup-dir` option, the backups are created in the `/mybackup` directory on each Apache Cloudberry host. In the example, the full backups have the timestamp keys `20230514054532` and `20231114064330`. The other backups are incremental backups. The example consists of two backup sets, the first with two incremental backups, and second with one incremental backup. The backups are listed from earliest to most recent. @@ -191,5 +191,5 @@ The incremental back up set, a full backup and associated incremental backups, m If you specify the `gprestore` option `--incremental` to restore data from a specific incremental backup, you must also specify the `--data-only` option. Before performing the restore operation, `gprestore` ensures that the tables being restored exist. If a table does not exist, `gprestore` returns an error and exits. :::warning -Changes to the Cloudberry Database segment configuration invalidate incremental backups. After you change the segment configuration (add or remove segment instances), you must create a full backup before you can create an incremental backup. +Changes to the Apache Cloudberry segment configuration invalidate incremental backups. After you change the segment configuration (add or remove segment instances), you must create a full backup before you can create an incremental backup. ::: \ No newline at end of file diff --git a/docs/sys-admin/check-database-system.md b/docs/sys-admin/check-database-system.md index b2fb78855e..8cb7035ebd 100644 --- a/docs/sys-admin/check-database-system.md +++ b/docs/sys-admin/check-database-system.md @@ -3,14 +3,14 @@ title: Check Database System toc_max_heading_level: 5 --- -# Check Cloudberry Database System +# Check Apache Cloudberry System -You can check a Cloudberry Database system using a variety of tools included with the system or available as plugins. +You can check a Apache Cloudberry system using a variety of tools included with the system or available as plugins. -Observing the Cloudberry Database system day-to-day performance helps administrators understand the system behavior, plan workflow, and troubleshoot problems. This document introduces scenarios for diagnosing database performance and activity. +Observing the Apache Cloudberry system day-to-day performance helps administrators understand the system behavior, plan workflow, and troubleshoot problems. This document introduces scenarios for diagnosing database performance and activity. -As a Cloudberry Database administrator, you need to check the system for problem events such as a segment going down or running out of disk space on a segment host. The following topics describe how to check the health of a Cloudberry Database system and examine certain state information for a Cloudberry Database system. +As a Apache Cloudberry administrator, you need to check the system for problem events such as a segment going down or running out of disk space on a segment host. The following topics describe how to check the health of a Apache Cloudberry system and examine certain state information for a Apache Cloudberry system. - [Check system state](#check-system-state) - [Check disk space usage](#check-disk-space-usage) @@ -23,17 +23,17 @@ As a Cloudberry Database administrator, you need to check the system for problem ## Check system state -A Cloudberry Database system is comprised of multiple PostgreSQL instances (the coordinator and segments) spanning multiple machines. To check a Cloudberry Database system, you need to know information about the system as a whole, as well as status information of the individual instances. The `gpstate` utility provides status information about a Cloudberry Database system. +A Apache Cloudberry system is comprised of multiple PostgreSQL instances (the coordinator and segments) spanning multiple machines. To check a Apache Cloudberry system, you need to know information about the system as a whole, as well as status information of the individual instances. The `gpstate` utility provides status information about a Apache Cloudberry system. ### View coordinator and segment status and configuration -The default `gpstate` action is to check segment instances and show a brief status of the valid and failed segments. For example, to see a quick status of your Cloudberry Database system: +The default `gpstate` action is to check segment instances and show a brief status of the valid and failed segments. For example, to see a quick status of your Apache Cloudberry system: ```shell gpstate ``` -To see more detailed information about your Cloudberry Database array configuration, use `gpstate` with the `-s` option: +To see more detailed information about your Apache Cloudberry array configuration, use `gpstate` with the `-s` option: ```shell gpstate -s @@ -71,7 +71,7 @@ SELECT * FROM gp_toolkit.gp_disk_free ORDER BY dfsegment; ### Check the sizing of distributed databases and tables -The `gp_toolkit` administrative schema contains several views that you can use to determine the disk space usage for a distributed Cloudberry Database database, schema, table, or index. +The `gp_toolkit` administrative schema contains several views that you can use to determine the disk space usage for a distributed Apache Cloudberry database, schema, table, or index. #### View disk space usage for a database @@ -125,7 +125,7 @@ AND pg_class.relname = 'test_index'; ## Check for data distribution skew -All tables in Cloudberry Database are distributed, meaning their data is divided across all of the segments in the system. Unevenly distributed data might diminish query processing performance. A table's distribution policy, set at table creation time, determines how the table's rows are distributed. For information about choosing the table distribution policy, see the following topics: +All tables in Apache Cloudberry are distributed, meaning their data is divided across all of the segments in the system. Unevenly distributed data might diminish query processing performance. A table's distribution policy, set at table creation time, determines how the table's rows are distributed. For information about choosing the table distribution policy, see the following topics: - [View a table's distribution key](#view-a-tables-distribution-key) - [View data distribution](#view-data-distribution) @@ -150,7 +150,7 @@ Has OIDs: no Distributed by: (sale_id) ``` -When you create a replicated table, Cloudberry Database stores all rows in the table on every segment. Replicated tables have no distribution key. Where the `\d+` meta-command reports the distribution key for a normally distributed table, it shows `Distributed Replicated` for a replicated table. +When you create a replicated table, Apache Cloudberry stores all rows in the table on every segment. Replicated tables have no distribution key. Where the `\d+` meta-command reports the distribution key for a normally distributed table, it shows `Distributed Replicated` for a replicated table. ### View data distribution @@ -164,7 +164,7 @@ FROM GROUP BY gp_segment_id; A table is considered to have a balanced distribution if all segments have roughly the same number of rows. :::tip -If you run this query on a replicated table, it fails because Cloudberry Database does not permit user queries to reference the system column `gp_segment_id` (or the system columns `ctid`, `cmin`, `cmax`, `xmin`, and `xmax`) in replicated tables. Because every segment has all of the tables' rows, replicated tables are evenly distributed by definition. +If you run this query on a replicated table, it fails because Apache Cloudberry does not permit user queries to reference the system column `gp_segment_id` (or the system columns `ctid`, `cmin`, `cmax`, `xmin`, and `xmax`) in replicated tables. Because every segment has all of the tables' rows, replicated tables are evenly distributed by definition. ::: ### Check for query processing skew @@ -202,9 +202,9 @@ This occurs when the input to a hash join operator is skewed. It does not preven +When you invoke `gp_log_backend_memory_contexts()` on the Apache Cloudberry coordinator host, it invokes `pg_log_backend_memory_contexts()` on the individual segments, which in turn triggers a memory usage dump to each segment log. The functions return an integer identifying the number of segments on which memory context logging was successfully activated. --> ### Sample log messages @@ -394,7 +394,7 @@ triggered the dumping of the following (subset of) memory context messages to th ## View query workfile usage information -The Cloudberry Database administrative schema `gp_toolkit` contains views that display information about Cloudberry Database workfiles. Cloudberry Database creates workfiles on disk if it does not have sufficient memory to run the query in memory. This information can be used for troubleshooting and tuning queries. The information in the views can also be used to specify the values for the Cloudberry Database configuration parameters `gp_workfile_limit_per_query` and `gp_workfile_limit_per_segment`. +The Apache Cloudberry administrative schema `gp_toolkit` contains views that display information about Apache Cloudberry workfiles. Apache Cloudberry creates workfiles on disk if it does not have sufficient memory to run the query in memory. This information can be used for troubleshooting and tuning queries. The information in the views can also be used to specify the values for the Apache Cloudberry configuration parameters `gp_workfile_limit_per_query` and `gp_workfile_limit_per_segment`. These are the views in the schema `gp_toolkit`: @@ -406,7 +406,7 @@ For information about using `gp_toolkit`, see [Using `gp_toolkit`](#use-gp_toolk ## View the database server log files -Every database instance in Cloudberry Database (coordinator and segments) runs a PostgreSQL database server with its own server log file. Log files are created in the `log` directory of the coordinator and each segment data directory. +Every database instance in Apache Cloudberry (coordinator and segments) runs a PostgreSQL database server with its own server log file. Log files are created in the `log` directory of the coordinator and each segment data directory. ### Log file format @@ -447,9 +447,9 @@ The following fields are written to the log: |29|file\_line|int|The line of the code file where the message originated| |30|stack\_trace|text|Stack trace text associated with this message| -### Search the Cloudberry Database server log files +### Search the Apache Cloudberry server log files -Cloudberry Database provides a utility called `gplogfilter` can search through a Cloudberry Database log file for entries matching the specified criteria. By default, this utility searches through the Cloudberry Database coordinator log file in the default logging location. For example, to display the last three lines of each of the log files under the coordinator directory: +Apache Cloudberry provides a utility called `gplogfilter` can search through a Apache Cloudberry log file for entries matching the specified criteria. By default, this utility searches through the Apache Cloudberry coordinator log file in the default logging location. For example, to display the last three lines of each of the log files under the coordinator directory: ```shell gplogfilter -n 3 @@ -462,13 +462,13 @@ gpssh -f ``` ```shell -=> source /usr/local/cloudberry-db/greenplum_path.sh +=> source /usr/local/cloudberry/greenplum_path.sh => gplogfilter -n 3 ``` --> ## Use `gp_toolkit` -Use the Cloudberry Database administrative schema `gp_toolkit` to query the system catalogs, log files, and operating environment for system status information. The `gp_toolkit` schema contains several views you can access using SQL commands. The `gp_toolkit` schema is accessible to all database users. Some objects require superuser permissions. Use a command similar to the following to add the `gp_toolkit` schema to your schema search path: +Use the Apache Cloudberry administrative schema `gp_toolkit` to query the system catalogs, log files, and operating environment for system status information. The `gp_toolkit` schema contains several views you can access using SQL commands. The `gp_toolkit` schema is accessible to all database users. Some objects require superuser permissions. Use a command similar to the following to add the `gp_toolkit` schema to your schema search path: ```sql => ALTER ROLE myrole SET search_path TO myschema,gp_toolkit; @@ -483,7 +483,7 @@ The PL/pgSQL condition name for each error code is the same as the phrase shown :::tip **How to view error codes** -When you execute SQL queries or perform other database operations in Cloudberry Database, and an error occurs, the system returns an error message. However, this standard error message may not directly display the SQLSTATE error code. Here are some methods to view these error codes: +When you execute SQL queries or perform other database operations in Apache Cloudberry, and an error occurs, the system returns an error message. However, this standard error message may not directly display the SQLSTATE error code. Here are some methods to view these error codes: - Use PL/pgSQL exception handling. For example: @@ -498,7 +498,7 @@ When you execute SQL queries or perform other database operations in Cloudberry $$; ``` -- Check the database log. Cloudberry Database records detailed error information, including error codes, in its log files. Depending on your system setup, you can check the log files on the database server for this information. +- Check the database log. Apache Cloudberry records detailed error information, including error codes, in its log files. Depending on your system setup, you can check the log files on the database server for this information. - Use advanced database client tools. Some advanced database client or management tools may offer more detailed error reporting features that can directly display SQLSTATE error codes. @@ -722,7 +722,7 @@ PL/pgSQL does not recognize warning, as opposed to error, condition names; those |57P01|ADMIN SHUTDOWN|admin\_shutdown| |57P02|CRASH SHUTDOWN|crash\_shutdown| |57P03|CANNOT CONNECT NOW|cannot\_connect\_now| -|**Class 58** — System Error (errors external to Cloudberry Database )| +|**Class 58** — System Error (errors external to Apache Cloudberry )| |58030|IO ERROR|io\_error| |58P01|UNDEFINED FILE|undefined\_file| |58P02|DUPLICATE FILE|duplicate\_file| diff --git a/docs/sys-admin/configure-database-system.md b/docs/sys-admin/configure-database-system.md index e65ca86940..b99ec3bf9f 100644 --- a/docs/sys-admin/configure-database-system.md +++ b/docs/sys-admin/configure-database-system.md @@ -5,11 +5,11 @@ toc_max_heading_level: 5 # Configure Database System -Server configuration parameters affect the behavior of Cloudberry Database. They are part of the PostgreSQL "Grand Unified Configuration" system, so they are sometimes called "GUCs". Most of the Cloudberry Database server configuration parameters are the same as the PostgreSQL configuration parameters, but some are specific to Cloudberry Database. +Server configuration parameters affect the behavior of Apache Cloudberry. They are part of the PostgreSQL "Grand Unified Configuration" system, so they are sometimes called "GUCs". Most of the Apache Cloudberry server configuration parameters are the same as the PostgreSQL configuration parameters, but some are specific to Apache Cloudberry. ## Coordinator and local parameters -Server configuration files contain parameters that configure server behavior. The Cloudberry Database configuration file, `postgresql.conf`, resides in the data directory of the database instance. +Server configuration files contain parameters that configure server behavior. The Apache Cloudberry configuration file, `postgresql.conf`, resides in the data directory of the database instance. The coordinator and each segment instance have their own `postgresql.conf` file. Some parameters are local: each segment instance examines its `postgresql.conf` file to get the value of that parameter. Set local parameters on the coordinator and on each segment instance. @@ -17,19 +17,19 @@ Other parameters are coordinator parameters that you set on the coordinator inst ## Set configuration parameters -Many configuration parameters limit who can change them and where or when they can be set. For example, to change certain parameters, you must be a Cloudberry Database superuser. Other parameters can be set only at the system level in the `postgresql.conf` file or require a system restart to take effect. +Many configuration parameters limit who can change them and where or when they can be set. For example, to change certain parameters, you must be a Apache Cloudberry superuser. Other parameters can be set only at the system level in the `postgresql.conf` file or require a system restart to take effect. Many configuration parameters are session parameters. You can set session parameters at the system level, the database level, the role level or the session level. Database users can change most session parameters within their session, but some require superuser permissions. ### Set a local configuration parameter -To change a local configuration parameter across multiple segments, update the parameter in the `postgresql.conf` file of each targeted segment, both primary and mirror. Use the `gpconfig` utility to set a parameter in all Cloudberry Database `postgresql.conf` files. For example: +To change a local configuration parameter across multiple segments, update the parameter in the `postgresql.conf` file of each targeted segment, both primary and mirror. Use the `gpconfig` utility to set a parameter in all Apache Cloudberry `postgresql.conf` files. For example: ```shell $ gpconfig -c gp_vmem_protect_limit -v 4096 ``` -Restart Cloudberry Database to make the configuration changes effective: +Restart Apache Cloudberry to make the configuration changes effective: ```shell $ gpstop -r @@ -37,7 +37,7 @@ $ gpstop -r ### Set a coordinator configuration parameter -To set a coordinator configuration parameter, set it at the Cloudberry Database coordinator instance. If it is also a session parameter, you can set the parameter for a particular database, role or session. If a parameter is set at multiple levels, the most granular level takes precedence. For example, session overrides role, role overrides database, and database overrides system. +To set a coordinator configuration parameter, set it at the Apache Cloudberry coordinator instance. If it is also a session parameter, you can set the parameter for a particular database, role or session. If a parameter is set at multiple levels, the most granular level takes precedence. For example, session overrides role, role overrides database, and database overrides system. #### Set parameters at the system level @@ -52,7 +52,7 @@ Coordinator parameter settings in the coordinator `postgresql.conf` file are the $ gpstop -u ``` - For parameter changes that require a server restart, restart Cloudberry Database as follows: + For parameter changes that require a server restart, restart Apache Cloudberry as follows: ```shell $ gpstop -r diff --git a/docs/sys-admin/enable-coordinator-mirroring.md b/docs/sys-admin/enable-coordinator-mirroring.md index c4646a939a..74d8e339f9 100644 --- a/docs/sys-admin/enable-coordinator-mirroring.md +++ b/docs/sys-admin/enable-coordinator-mirroring.md @@ -1,11 +1,11 @@ --- -title: Enable Coordinator Mirroring for Cloudberry Database +title: Enable Coordinator Mirroring for Apache Cloudberry toc_max_heading_level: 5 --- -# Enable Coordinator Mirroring for Cloudberry Database +# Enable Coordinator Mirroring for Apache Cloudberry -Cloudberry Database provides a series of high-availability features to make sure your database system can tolerate unexpected incidents such as a hardware platform failure and can be quickly recovered from such incidents. +Apache Cloudberry provides a series of high-availability features to make sure your database system can tolerate unexpected incidents such as a hardware platform failure and can be quickly recovered from such incidents. This topic describes how to configure coordinator mirroring to ensure a smooth coordinator node failover. @@ -33,7 +33,7 @@ Make sure that you have already configured a standby coordinator on a different :::note -If you follow the steps described in the [Prepare to Deploy](https://cloudberrydb.org/docs/cbdb-op-prepare-to-deploy) and [Deploy Cloudberry Database Manually Using RPM Package](https://cloudberrydb.org/docs/cbdb-op-deploy-guide) topics to deploy the cluster, a host for the standby coordinator ( `cbdb-standbycoordinator`) is already configured in the cluster. +If you follow the steps described in the [Prepare to Deploy](https://cloudberry.apache.org/docs/cbdb-op-prepare-to-deploy) and [Deploy Apache Cloudberry Manually Using RPM Package](https://cloudberry.apache.org/docs/cbdb-op-deploy-guide) topics to deploy the cluster, a host for the standby coordinator ( `cbdb-standbycoordinator`) is already configured in the cluster. ::: diff --git a/docs/sys-admin/recommended-maintenance-monitoring-tasks.md b/docs/sys-admin/recommended-maintenance-monitoring-tasks.md index 0d8fa096cd..8b4d1e7a77 100644 --- a/docs/sys-admin/recommended-maintenance-monitoring-tasks.md +++ b/docs/sys-admin/recommended-maintenance-monitoring-tasks.md @@ -4,9 +4,9 @@ title: Recommended Monitoring and Maintenance Tasks # Recommended Monitoring and Maintenance Tasks -This section lists monitoring and maintenance operations recommended to ensure high availability and consistent performance of your Cloudberry Database cluster. +This section lists monitoring and maintenance operations recommended to ensure high availability and consistent performance of your Apache Cloudberry cluster. -The tables in the following sections suggest operations that a Cloudberry Database system administrator can perform periodically to ensure that all components of the system are operating optimally. Monitoring operations help you to detect and diagnose problems early. Maintenance operations help you to keep the system up-to-date and avoid deteriorating performance, for example, from bloated system tables or diminishing free disk space. +The tables in the following sections suggest operations that a Apache Cloudberry system administrator can perform periodically to ensure that all components of the system are operating optimally. Monitoring operations help you to detect and diagnose problems early. Maintenance operations help you to keep the system up-to-date and avoid deteriorating performance, for example, from bloated system tables or diminishing free disk space. It is not necessary to implement all of these suggestions in every cluster; use the frequency and severity recommendations as a guide to implement measures according to your service requirements. @@ -132,7 +132,7 @@ GROUP BY 1;
@@ -330,7 +330,7 @@ psql <dbname> -c

Severity: IMPORTANT

@@ -342,7 +342,7 @@ psql <dbname> -c

Severity: IMPORTANT

@@ -354,7 +354,7 @@ psql <dbname> -c

Severity: IMPORTANT

@@ -521,17 +521,17 @@ psql <dbname> -c
diff --git a/docs/sys-utilities/analyzedb.md b/docs/sys-utilities/analyzedb.md index ad341d8770..65553deb0d 100644 --- a/docs/sys-utilities/analyzedb.md +++ b/docs/sys-utilities/analyzedb.md @@ -32,7 +32,7 @@ analyzedb { -? | -h | --help } ## Description -The analyzedb utility updates statistics on table data for the specified tables in a Cloudberry Database incrementally and concurrently. +The analyzedb utility updates statistics on table data for the specified tables in a Apache Cloudberry incrementally and concurrently. While performing [`ANALYZE`](/docs/sql-stmts/analyze.md) operations, analyzedb creates a snapshot of the table metadata and stores it on disk on the coordinator host. An `ANALYZE` operation is performed only if the table has been modified. If a table or partition has not been modified since the last time it was analyzed, analyzedb automatically skips the table or partition because it already contains up-to-date statistics. @@ -51,7 +51,7 @@ For a partitioned, append-optimized table, analyzedb checks the root partitioned ## Notes -The analyzedb utility updates append optimized table statistics if the table has been modified by DML or DDL commands, including INSERT, DELETE, UPDATE, CREATE TABLE, ALTER TABLE and TRUNCATE. The utility determines if a table has been modified by comparing catalog metadata of tables with the previous snapshot of metadata taken during a previous analyzedb operation. The snapshots of table metadata are stored as state files in the directory `db_analyze//` in the Cloudberry Database coordinator data directory. +The analyzedb utility updates append optimized table statistics if the table has been modified by DML or DDL commands, including INSERT, DELETE, UPDATE, CREATE TABLE, ALTER TABLE and TRUNCATE. The utility determines if a table has been modified by comparing catalog metadata of tables with the previous snapshot of metadata taken during a previous analyzedb operation. The snapshots of table metadata are stored as state files in the directory `db_analyze//` in the Apache Cloudberry coordinator data directory. The utility preserves old snapshot information from the past 8 days, and the 3 most recent state directories regardless of age, while all other directories are automatically removed. You can also specify the `--clean_last` or `--clean_all` option to remove state files generated by analyzedb. @@ -194,7 +194,7 @@ If you do not specify a table, set of tables, or schema, the analyzedb utility c analyzedb -d mytest ``` -You can create a PL/Python function to run the `analyzedb` utility as a Cloudberry Database function. This example `CREATE FUNCTION` command creates a user defined PL/Python function that runs the `analyzedb` utility and displays output on the command line. Specify `analyzedb` options as the function parameter. +You can create a PL/Python function to run the `analyzedb` utility as a Apache Cloudberry function. This example `CREATE FUNCTION` command creates a user defined PL/Python function that runs the `analyzedb` utility and displays output on the command line. Specify `analyzedb` options as the function parameter. ```sql CREATE OR REPLACE FUNCTION analyzedb(params TEXT) diff --git a/docs/sys-utilities/clusterdb.md b/docs/sys-utilities/clusterdb.md index 0a05056450..5a79383908 100644 --- a/docs/sys-utilities/clusterdb.md +++ b/docs/sys-utilities/clusterdb.md @@ -118,7 +118,7 @@ Default connection parameters. Specifies whether to use color in diagnostic messages. Possible values are `always`, `auto`, and `never`. -This utility, like most other Cloudberry Database utilities, also uses the environment variables supported by `libpq`. +This utility, like most other Apache Cloudberry utilities, also uses the environment variables supported by `libpq`. ## Diagnostics diff --git a/docs/sys-utilities/createdb.md b/docs/sys-utilities/createdb.md index c794e6f28b..1b023b9817 100644 --- a/docs/sys-utilities/createdb.md +++ b/docs/sys-utilities/createdb.md @@ -18,7 +18,7 @@ createdb -V | --version ## Description -`createdb` creates a new database in a Cloudberry Database system. +`createdb` creates a new database in a Apache Cloudberry system. Normally, the database user who runs this command becomes the owner of the new database. However, a different owner can be specified via the `-O` option, if the executing user has appropriate privileges. @@ -28,7 +28,7 @@ Normally, the database user who runs this command becomes the owner of the new d **`dbname`** -The name of the database to be created. The name must be unique among all other databases in the Cloudberry Database system. If not specified, reads from the environment variable `PGDATABASE`, then `PGUSER` or defaults to the current system user. +The name of the database to be created. The name must be unique among all other databases in the Apache Cloudberry system. If not specified, reads from the environment variable `PGDATABASE`, then `PGUSER` or defaults to the current system user. **`description`** @@ -80,11 +80,11 @@ The options `-D`, `-l`, `-E`, `-O`, and `-T` correspond to options of the underl **`-h host | --host=HOSTNAME`** -The host name of the machine on which the Cloudberry Database coordinator server is running. If not specified, reads from the environment variable `PGHOST` or defaults to localhost. +The host name of the machine on which the Apache Cloudberry coordinator server is running. If not specified, reads from the environment variable `PGHOST` or defaults to localhost. **`-p port | --port=PORT`** -The TCP port on which the Cloudberry Database coordinator server is listening for connections. If not specified, reads from the environment variable `PGPORT` or defaults to 5432. +The TCP port on which the Apache Cloudberry coordinator server is listening for connections. If not specified, reads from the environment variable `PGPORT` or defaults to 5432. **`-U username | --username=USERNAME`** @@ -110,7 +110,7 @@ To create the database `test` using the default options: createdb test ``` -To create the database `demo` using the Cloudberry Database coordinator on host `gpcoord`, port `54321`, using the `LATIN1` encoding scheme: +To create the database `demo` using the Apache Cloudberry coordinator on host `gpcoord`, port `54321`, using the `LATIN1` encoding scheme: ```shell createdb -p 54321 -h gpcoord -E LATIN1 demo diff --git a/docs/sys-utilities/createuser.md b/docs/sys-utilities/createuser.md index 5cf0e2a21a..5d7079b286 100644 --- a/docs/sys-utilities/createuser.md +++ b/docs/sys-utilities/createuser.md @@ -18,7 +18,7 @@ createuser -V | --version ## Description -`createuser` creates a new Cloudberry Database role. You must be a superuser or have the `CREATEROLE` privilege to create new roles. You must connect to the database as a superuser to create new superusers. +`createuser` creates a new Apache Cloudberry role. You must be a superuser or have the `CREATEROLE` privilege to create new roles. You must connect to the database as a superuser to create new superusers. Superusers can bypass all access permission checks within the database, so superuser privileges should not be granted lightly. @@ -28,7 +28,7 @@ Superusers can bypass all access permission checks within the database, so super **`role_name`** -The name of the role to be created. This name must be different from all existing roles in this Cloudberry Database installation. +The name of the role to be created. This name must be different from all existing roles in this Apache Cloudberry installation. **`-c number | --connection-limit=number`** @@ -64,7 +64,7 @@ Prompt for the user name if none is specified on the command line, and also prom **`-l | --login`** -The new role will be allowed to log in to Cloudberry Database. This is the default. +The new role will be allowed to log in to Apache Cloudberry. This is the default. **`-L | --no-login`** diff --git a/docs/sys-utilities/dropuser.md b/docs/sys-utilities/dropuser.md index 99218f0386..d074a55cbf 100644 --- a/docs/sys-utilities/dropuser.md +++ b/docs/sys-utilities/dropuser.md @@ -18,7 +18,7 @@ dropuser -V | --version ## Description -`dropuser` removes an existing role from Cloudberry Database. Only superusers and users with the `CREATEROLE` privilege can remove roles. To remove a superuser role, you must yourself be a superuser. +`dropuser` removes an existing role from Apache Cloudberry. Only superusers and users with the `CREATEROLE` privilege can remove roles. To remove a superuser role, you must yourself be a superuser. `dropuser` is a wrapper around the SQL command `DROP ROLE`. diff --git a/docs/sys-utilities/gpactivatestandby.md b/docs/sys-utilities/gpactivatestandby.md index f476aeb8c5..b09e3b45c3 100644 --- a/docs/sys-utilities/gpactivatestandby.md +++ b/docs/sys-utilities/gpactivatestandby.md @@ -4,7 +4,7 @@ title: gpactivatestandby # gpactivatestandby -Activates a standby coordinator host and makes it the active coordinator for the Cloudberry Database system. +Activates a standby coordinator host and makes it the active coordinator for the Apache Cloudberry system. ## Synopsis @@ -19,7 +19,7 @@ gpactivatestandby -? | -h | --help ## Description -The `gpactivatestandby` utility activates a backup, standby coordinator host and brings it into operation as the active coordinator instance for a Cloudberry Database system. The activated standby coordinator effectively becomes the Cloudberry Database coordinator, accepting client connections on the coordinator port. +The `gpactivatestandby` utility activates a backup, standby coordinator host and brings it into operation as the active coordinator instance for a Apache Cloudberry system. The activated standby coordinator effectively becomes the Apache Cloudberry coordinator, accepting client connections on the coordinator port. >**NOTE** >Before running `gpactivatestandby`, be sure to run `gpstate -f` to confirm that the standby coordinator is synchronized with the current coordinator node. If synchronized, the final line of the `gpstate -f` output will look similar to this: `20230607:06:50:06:004205 gpstate:test1-m:gpadmin-[INFO]:--Sync state: sync` @@ -33,7 +33,7 @@ The utility will perform the following steps: - Stops the synchronization process (`walreceiver`) on the standby coordinator - Updates the system catalog tables of the standby coordinator using the logs - Activates the standby coordinator to be the new active coordinator for the system -- Restarts the Cloudberry Database system with the new coordinator host +- Restarts the Apache Cloudberry system with the new coordinator host A backup, standby Cloudberry coordinator host serves as a 'warm standby' in the event of the primary Cloudberry coordinator host becoming non-operational. The standby coordinator is kept up to date by transaction log replication processes (the `walsender` and `walreceiver`), which run on the primary coordinator and standby coordinator hosts and keep the data between the primary and standby coordinator hosts synchronized. @@ -47,7 +47,7 @@ After activating a standby coordinator, run `ANALYZE` to update the database que psql -c 'ANALYZE;' ``` -After you activate the standby coordinator as the primary coordinator, the Cloudberry Database system no longer has a standby coordinator configured. You might want to specify another host to be the new standby with the [gpinitstandby](/docs/sys-utilities/gpinitstandby.md) utility. +After you activate the standby coordinator as the primary coordinator, the Apache Cloudberry system no longer has a standby coordinator configured. You might want to specify another host to be the new standby with the [gpinitstandby](/docs/sys-utilities/gpinitstandby.md) utility. ## Options @@ -85,7 +85,7 @@ Displays the online help. ## Example -Activate the standby coordinator host and make it the active coordinator instance for a Cloudberry Database system (run from backup coordinator host you are activating): +Activate the standby coordinator host and make it the active coordinator instance for a Apache Cloudberry system (run from backup coordinator host you are activating): ```shell gpactivatestandby -d /gpdata diff --git a/docs/sys-utilities/gpaddmirrors.md b/docs/sys-utilities/gpaddmirrors.md index 241c2d68ed..75bc7ca587 100644 --- a/docs/sys-utilities/gpaddmirrors.md +++ b/docs/sys-utilities/gpaddmirrors.md @@ -4,7 +4,7 @@ title: gpaddmirrors # gpaddmirrors -Adds mirror segments to a Cloudberry Database system that was initially configured without mirroring. +Adds mirror segments to a Apache Cloudberry system that was initially configured without mirroring. ## Synopsis @@ -25,9 +25,9 @@ gpaddmirrors --version ## Description -The `gpaddmirrors` utility configures mirror segment instances for an existing Cloudberry Database system that was initially configured with primary segment instances only. The utility will create the mirror instances and begin the online replication process between the primary and mirror segment instances. Once all mirrors are synchronized with their primaries, your Cloudberry Database system is fully data redundant. +The `gpaddmirrors` utility configures mirror segment instances for an existing Apache Cloudberry system that was initially configured with primary segment instances only. The utility will create the mirror instances and begin the online replication process between the primary and mirror segment instances. Once all mirrors are synchronized with their primaries, your Apache Cloudberry system is fully data redundant. -> **Important** During the online replication process, Cloudberry Database should be in a quiescent state, workloads and other queries should not be running. +> **Important** During the online replication process, Apache Cloudberry should be in a quiescent state, workloads and other queries should not be running. By default, the utility will prompt you for the file system location(s) where it will create the mirror segment data directories. If you do not want to be prompted, you can pass in a file containing the file system locations using the `-m` option. @@ -70,11 +70,11 @@ The `gp_segment_configuration` system catalog table can help you determine your    ORDER BY dbid; ``` -If you are creating mirrors on alternate mirror hosts, the new mirror segment hosts must be pre-installed with the Cloudberry Database software and configured exactly the same as the existing primary segment hosts. +If you are creating mirrors on alternate mirror hosts, the new mirror segment hosts must be pre-installed with the Apache Cloudberry software and configured exactly the same as the existing primary segment hosts. You must make sure that the user who runs `gpaddmirrors` (the `gpadmin` user) has permissions to write to the data directory locations specified. You may want to create these directories on the segment hosts and `chown` them to the appropriate user before running `gpaddmirrors`. -> **Note** This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Cloudberry Database deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` configuration parameter to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. +> **Note** This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` configuration parameter to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. ## Options @@ -96,7 +96,7 @@ The coordinator data directory. If not specified, the value set for `$COORDINATO **`--hba-hostnames boolean`** -Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Cloudberry Database. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Cloudberry Database system was initialized. +Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Apache Cloudberry. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Apache Cloudberry system was initialized. **`-i mirror_config_file`** @@ -139,7 +139,7 @@ For example, if a primary segment has port 50001, then its mirror will use a dat **`-s (spread mirrors)`** -Spreads the mirror segments across the available hosts. The default is to group a set of mirror segments together on an alternate host from their primary segment set. Mirror spreading will place each mirror on a different host within the Cloudberry Database array. Spreading is only allowed if there is a sufficient number of hosts in the array (number of hosts is greater than the number of segment instances per host). +Spreads the mirror segments across the available hosts. The default is to group a set of mirror segments together on an alternate host from their primary segment set. Mirror spreading will place each mirror on a different host within the Apache Cloudberry array. Spreading is only allowed if there is a sufficient number of hosts in the array (number of hosts is greater than the number of segment instances per host). **`-v (verbose)`** @@ -160,13 +160,13 @@ When specifying a mirroring configuration using the `gpaddmirrors` option `-i`, - If you specify a hostname, the resolution of the hostname to an IP address should be done locally for security. For example, you should use entries in a local `/etc/hosts` file to map the hostname to an IP address. The resolution of a hostname to an IP address should not be performed by an external service such as a public DNS server. You must stop the Cloudberry system before you change the mapping of a hostname to a different IP address. - If you specify an IP address, the address should not be changed after the initial configuration. When segment mirroring is enabled, replication from the primary to the mirror segment will fail if the IP address changes from the configured value. For this reason, you should use a hostname when enabling mirroring using the `-i` option unless you have a specific requirement to use IP addresses. -When enabling a mirroring configuration that adds hosts to the Cloudberry system, `gpaddmirrors` populates the `gp_segment_configuration` catalog table with the mirror segment instance information. Cloudberry Database uses the address value of the `gp_segment_configuration` catalog table when looking up host systems for Cloudberry interconnect (internal) communication between the coordinator and segment instances and between segment instances, and for other internal communication. +When enabling a mirroring configuration that adds hosts to the Cloudberry system, `gpaddmirrors` populates the `gp_segment_configuration` catalog table with the mirror segment instance information. Apache Cloudberry uses the address value of the `gp_segment_configuration` catalog table when looking up host systems for Cloudberry interconnect (internal) communication between the coordinator and segment instances and between segment instances, and for other internal communication. ## Use Host Systems with Multiple NICs -If hosts systems are configured with multiple NICs, you can initialize a Cloudberry Database system to use each NIC as a Cloudberry host system. You must ensure that the host systems are configured with sufficient resources to support all the segment instances being added to the host. Also, if you enable segment mirroring, you must ensure that the Cloudberry system configuration supports failover if a host system fails. +If hosts systems are configured with multiple NICs, you can initialize a Apache Cloudberry system to use each NIC as a Cloudberry host system. You must ensure that the host systems are configured with sufficient resources to support all the segment instances being added to the host. Also, if you enable segment mirroring, you must ensure that the Cloudberry system configuration supports failover if a host system fails. -For example, this is a segment instance configuration for a simple Cloudberry system. The segment host `gp7c` is configured with two NICs, `gp7c-1` and `gp7c-2`, where the Cloudberry Database system uses `gp7c-1` for the coordinator segment and `gp7c-2` for segment instances. +For example, this is a segment instance configuration for a simple Cloudberry system. The segment host `gp7c` is configured with two NICs, `gp7c-1` and `gp7c-2`, where the Apache Cloudberry system uses `gp7c-1` for the coordinator segment and `gp7c-2` for segment instances. ```sql select content, role, port, hostname, address from gp_segment_configuration ; @@ -183,7 +183,7 @@ select content, role, port, hostname, address from gp_segment_configuration ; ## Examples -Add mirroring to an existing Cloudberry Database system using the same set of hosts as your primary data. Calculate the mirror database ports by adding 100 to the current primary segment port numbers: +Add mirroring to an existing Apache Cloudberry system using the same set of hosts as your primary data. Calculate the mirror database ports by adding 100 to the current primary segment port numbers: ```shell $ gpaddmirrors -p 100 @@ -195,7 +195,7 @@ Generate a sample mirror configuration file with the `-o` option to use with `gp $ gpaddmirrors -o /home/gpadmin/sample_mirror_config ``` -Add mirroring to an existing Cloudberry Database system using a different set of hosts from your primary data: +Add mirroring to an existing Apache Cloudberry system using a different set of hosts from your primary data: ```shell $ gpaddmirrors -i mirror_config_file diff --git a/docs/sys-utilities/gpbackup.md b/docs/sys-utilities/gpbackup.md index 715879c8a2..3927d0dc68 100644 --- a/docs/sys-utilities/gpbackup.md +++ b/docs/sys-utilities/gpbackup.md @@ -4,7 +4,7 @@ title: gpbackup # gpbackup -Create a Cloudberry Database backup for use with the `gprestore` utility. +Create a Apache Cloudberry backup for use with the `gprestore` utility. ## Synopsis @@ -44,17 +44,17 @@ gpbackup --help The `gpbackup` utility backs up the contents of a database into a collection of metadata files and data files that can be used to restore the database at a later time using `gprestore`. When you back up a database, you can specify table level and schema level filter options to back up specific tables. For example, you can combine schema level and table level options to back up all the tables in a schema except for a single table. -By default, `gpbackup` backs up objects in the specified database as well as global Cloudberry Database system objects. Use `--without-globals` to omit global objects. `gprestore` does not restore global objects by default; use `--with-globals` to restore them. +By default, `gpbackup` backs up objects in the specified database as well as global Apache Cloudberry system objects. Use `--without-globals` to omit global objects. `gprestore` does not restore global objects by default; use `--with-globals` to restore them. For materialized views, data is not backed up, only the materialized view definition is backed up. -`gpbackup` stores the object metadata files and DDL files for a backup in the Cloudberry Database master data directory by default. Cloudberry Database segments use the `COPY ... ON SEGMENT` command to store their data for backed-up tables in compressed CSV data files, located in each segment's data directory. +`gpbackup` stores the object metadata files and DDL files for a backup in the Apache Cloudberry master data directory by default. Apache Cloudberry segments use the `COPY ... ON SEGMENT` command to store their data for backed-up tables in compressed CSV data files, located in each segment's data directory. -You can add the `--backup-dir` option to copy all backup files from the Cloudberry Database master and segment hosts to an absolute path for later use. Additional options are provided to filter the backup set in order to include or exclude specific tables. +You can add the `--backup-dir` option to copy all backup files from the Apache Cloudberry master and segment hosts to an absolute path for later use. Additional options are provided to filter the backup set in order to include or exclude specific tables. You can create an incremental backup with the `--incremental` option. Incremental backups are efficient when the total amount of data in append-optimized tables or table partitions that changed is small compared to the data has not changed. -With the default `--jobs` option (1 job), each `gpbackup` operation uses a single transaction on the Cloudberry Database master host. The `COPY ... ON SEGMENT` command performs the backup task in parallel on each segment host. The backup process acquires an `ACCESS SHARE` lock on each table that is backed up. During the table locking process, the database should be in a quiescent state. +With the default `--jobs` option (1 job), each `gpbackup` operation uses a single transaction on the Apache Cloudberry master host. The `COPY ... ON SEGMENT` command performs the backup task in parallel on each segment host. The backup process acquires an `ACCESS SHARE` lock on each table that is backed up. During the table locking process, the database should be in a quiescent state. When a back up operation completes, `gpbackup` returns a status code. @@ -62,13 +62,13 @@ The `gpbackup` utility cannot be run while `gpexpand` is initializing new segmen `gpbackup` can send status email notifications after a back up operation completes. You specify when the utility sends the mail and the email recipients in a configuration file. -**Note**: This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Cloudberry Database deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. +**Note**: This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. ## Options **--dbname database_name:** Required. Specifies the database to back up. -**--backup-dir directory:** Optional. Copies all required backup files (metadata files and data files) to the specified directory. You must specify directory as an absolute path (not relative). If you do not supply this option, metadata files are created on the Cloudberry Database master host in the $MASTER_DATA_DIRECTORY/backups/YYYYMMDD/YYYYMMDDhhmmss/ directory. Segment hosts create CSV data files in the `/backups/YYYYMMDD/YYYYMMDDhhmmss/` directory. When you specify a custom backup directory, files are copied to these paths in subdirectories of the backup directory. +**--backup-dir directory:** Optional. Copies all required backup files (metadata files and data files) to the specified directory. You must specify directory as an absolute path (not relative). If you do not supply this option, metadata files are created on the Apache Cloudberry master host in the $MASTER_DATA_DIRECTORY/backups/YYYYMMDD/YYYYMMDDhhmmss/ directory. Segment hosts create CSV data files in the `/backups/YYYYMMDD/YYYYMMDDhhmmss/` directory. When you specify a custom backup directory, files are copied to these paths in subdirectories of the backup directory. You cannot combine this option with the option `--plugin-config`. @@ -156,7 +156,7 @@ You cannot combine this option with the option `--backup-dir`. **--with-stats**: Optional. Include query plan statistics in the backup set. -**--without-globals**: Optional. Omit the global Cloudberry Database system objects during backup. +**--without-globals**: Optional. Omit the global Apache Cloudberry system objects during backup. **--help:** Displays the online help. @@ -207,7 +207,7 @@ my#1schema.my_$590_Table ## Examples -Backup all schemas and tables in the "demo" database, including global Cloudberry Database system objects statistics: +Backup all schemas and tables in the "demo" database, including global Apache Cloudberry system objects statistics: ``` $ gpbackup --dbname demo @@ -225,7 +225,7 @@ Backup only the "twitter" schema in the "demo" database: $ gpbackup --dbname demo --include-schema twitter ``` -Backup all schemas and tables in the "demo" database, including global Cloudberry Database system objects and query statistics, and copy all backup files to the /home/gpadmin/backup directory: +Backup all schemas and tables in the "demo" database, including global Apache Cloudberry system objects and query statistics, and copy all backup files to the /home/gpadmin/backup directory: ``` $ gpbackup --dbname demo --with-stats --backup-dir /home/gpadmin/backup diff --git a/docs/sys-utilities/gpcheckcat.md b/docs/sys-utilities/gpcheckcat.md index 54ac24820a..d185422f0d 100644 --- a/docs/sys-utilities/gpcheckcat.md +++ b/docs/sys-utilities/gpcheckcat.md @@ -4,7 +4,7 @@ title: gpcheckcat # gpcheckcat -The `gpcheckcat` utility tests Cloudberry Database catalog tables for inconsistencies. +The `gpcheckcat` utility tests Apache Cloudberry catalog tables for inconsistencies. The utility is in `$GPHOME/bin/lib`. @@ -39,7 +39,7 @@ The `gpcheckcat` utility runs multiple tests that check for database catalog inc > **Note** Any time you run the utility, it checks for and deletes orphaned, temporary database schemas (temporary schemas without a session ID) in the specified databases. The utility displays the results of the orphaned, temporary schema check on the command line and also logs the results. -Catalog inconsistencies are inconsistencies that occur between Cloudberry Database system tables. In general, there are three types of inconsistencies: +Catalog inconsistencies are inconsistencies that occur between Apache Cloudberry system tables. In general, there are three types of inconsistencies: - Inconsistencies in system tables at the segment level. For example, an inconsistency between a system table that contains table data and a system table that contains column data. As another, a system table that contains duplicates in a column that should to be unique. @@ -50,13 +50,13 @@ Catalog inconsistencies are inconsistencies that occur between Cloudberry Databa **`-A`** -Run `gpcheckcat` on all databases in the Cloudberry Database installation. +Run `gpcheckcat` on all databases in the Apache Cloudberry installation. **`-B `** The number of processes to run in parallel. -The `gpcheckcat` utility attempts to determine the number of simultaneous processes (the batch size) to use. The utility assumes it can use a buffer with a minimum of 20MB for each process. The maximum number of parallel processes is the number of Cloudberry Database segment instances. The utility displays the number of parallel processes that it uses when it starts checking the catalog. +The `gpcheckcat` utility attempts to determine the number of simultaneous processes (the batch size) to use. The utility assumes it can use a buffer with a minimum of 20MB for each process. The maximum number of parallel processes is the number of Apache Cloudberry segment instances. The utility displays the number of parallel processes that it uses when it starts checking the catalog. > **Note** The utility might run out of memory if the number of errors returned exceeds the buffer size. If an out of memory error occurs, you can lower the batch size with the `-B` option. For example, if the utility displays a batch size of 936 and runs out of memory, you can specify `-B 468` to run 468 processes in parallel. @@ -78,17 +78,17 @@ Run only the `gpcheckcat` tests that can be run in online (not restricted) mode. **`-p port`** -This option specifies the port that is used by the Cloudberry Database. +This option specifies the port that is used by the Apache Cloudberry. **`-P password`** -The password of the user connecting to Cloudberry Database. +The password of the user connecting to Apache Cloudberry. **`-R test_name | 'test_name1,test_name2 [, ...]'`** Specify one or more tests to run. Specify multiple tests as a comma-delimited list of test names enclosed in quotes. -Some tests can be run only when Cloudberry Database is in restricted mode. +Some tests can be run only when Apache Cloudberry is in restricted mode. These are the tests that can be performed: @@ -128,13 +128,13 @@ Specify one ore more tests to skip. Specify multiple tests as a comma-delimited **`-S {none | only}`** -Specify this option to control the testing of catalog tables that are shared across all databases in the Cloudberry Database installation, such as *pg_database*. +Specify this option to control the testing of catalog tables that are shared across all databases in the Apache Cloudberry installation, such as *pg_database*. The value `none` deactivates testing of shared catalog tables. The value `only` tests only the shared catalog tables. **`-U user_name`** -The user connecting to Cloudberry Database. +The user connecting to Apache Cloudberry. **`-? | --help`** diff --git a/docs/sys-utilities/gpcheckperf.md b/docs/sys-utilities/gpcheckperf.md index e9f5419796..4be44d9c3f 100644 --- a/docs/sys-utilities/gpcheckperf.md +++ b/docs/sys-utilities/gpcheckperf.md @@ -27,8 +27,8 @@ gpcheckperf --version The `gpcheckperf` utility starts a session on the specified hosts and runs the following performance tests: - **Disk I/O Test (dd test)** — To test the sequential throughput performance of a logical disk or file system, the utility uses the **dd** command, which is a standard UNIX utility. It times how long it takes to write and read a large file to and from disk and calculates your disk I/O performance in megabytes (MB) per second. By default, the file size that is used for the test is calculated at two times the total random access memory (RAM) on the host. This ensures that the test is truly testing disk I/O and not using the memory cache. -- **Memory Bandwidth Test (stream)** — To test memory bandwidth, the utility uses the STREAM benchmark program to measure sustainable memory bandwidth (in MB/s). This tests that your system is not limited in performance by the memory bandwidth of the system in relation to the computational performance of the CPU. In applications where the data set is large (as in Cloudberry Database), low memory bandwidth is a major performance issue. If memory bandwidth is significantly lower than the theoretical bandwidth of the CPU, then it can cause the CPU to spend significant amounts of time waiting for data to arrive from system memory. -- **Network Performance Test (gpnetbench*)** — To test network performance (and thereby the performance of the Cloudberry Database interconnect), the utility runs a network benchmark program that transfers a 5 second stream of data from the current host to each remote host included in the test. The data is transferred in parallel to each remote host and the minimum, maximum, average and median network transfer rates are reported in megabytes (MB) per second. If the summary transfer rate is slower than expected (less than 100 MB/s), you can run the network test serially using the `-r n` option to obtain per-host results. To run a full-matrix bandwidth test, you can specify `-r M` which will cause every host to send and receive data from every other host specified. This test is best used to validate if the switch fabric can tolerate a full-matrix workload. +- **Memory Bandwidth Test (stream)** — To test memory bandwidth, the utility uses the STREAM benchmark program to measure sustainable memory bandwidth (in MB/s). This tests that your system is not limited in performance by the memory bandwidth of the system in relation to the computational performance of the CPU. In applications where the data set is large (as in Apache Cloudberry), low memory bandwidth is a major performance issue. If memory bandwidth is significantly lower than the theoretical bandwidth of the CPU, then it can cause the CPU to spend significant amounts of time waiting for data to arrive from system memory. +- **Network Performance Test (gpnetbench*)** — To test network performance (and thereby the performance of the Apache Cloudberry interconnect), the utility runs a network benchmark program that transfers a 5 second stream of data from the current host to each remote host included in the test. The data is transferred in parallel to each remote host and the minimum, maximum, average and median network transfer rates are reported in megabytes (MB) per second. If the summary transfer rate is slower than expected (less than 100 MB/s), you can run the network test serially using the `-r n` option to obtain per-host results. To run a full-matrix bandwidth test, you can specify `-r M` which will cause every host to send and receive data from every other host specified. This test is best used to validate if the switch fabric can tolerate a full-matrix workload. To specify the hosts to test, use the `-f` option to specify a file containing a list of host names, or use the `-h` option to name single host names on the command-line. If running the network performance test, all entries in the host file must be for network interfaces within the same subnet. If your segment hosts have multiple network interfaces configured on different subnets, run the network test once for each subnet. @@ -40,7 +40,7 @@ Before using `gpcheckperf`, you must have a trusted host setup between the hosts **`-B block_size`** -Specifies the block size (in KB or MB) to use for disk I/O test. The default is 32KB, which is the same as the Cloudberry Database page size. The maximum block size is 1 MB. +Specifies the block size (in KB or MB) to use for disk I/O test. The default is 32KB, which is the same as the Apache Cloudberry page size. The maximum block size is 1 MB. **`-d test_directory`** diff --git a/docs/sys-utilities/gpconfig.md b/docs/sys-utilities/gpconfig.md index fd1a129e17..741f03bbb0 100644 --- a/docs/sys-utilities/gpconfig.md +++ b/docs/sys-utilities/gpconfig.md @@ -4,7 +4,7 @@ title: gpconfig # gpconfig -Sets server configuration parameters on all segments within a Cloudberry Database system. +Sets server configuration parameters on all segments within a Apache Cloudberry system. ## Synopsis @@ -21,7 +21,7 @@ gpconfig --help ## Description -The `gpconfig` utility allows you to set, unset, or view configuration parameters from the `postgresql.conf` files of all instances (coordinator, segments, and mirrors) in your Cloudberry Database system. When setting a parameter, you can also specify a different value for the coordinator if necessary. For example, parameters such as `max_connections` require a different setting on the coordinator than what is used for the segments. If you want to set or unset a global or coordinator only parameter, use the `--coordinatoronly` option. +The `gpconfig` utility allows you to set, unset, or view configuration parameters from the `postgresql.conf` files of all instances (coordinator, segments, and mirrors) in your Apache Cloudberry system. When setting a parameter, you can also specify a different value for the coordinator if necessary. For example, parameters such as `max_connections` require a different setting on the coordinator than what is used for the segments. If you want to set or unset a global or coordinator only parameter, use the `--coordinatoronly` option. > **Note** For configuration parameters of vartype `string`, you may not pass values enclosed in single quotes to `gpconfig -c`. @@ -29,11 +29,11 @@ The `gpconfig` utility allows you to set, unset, or view configuration parameter When `gpconfig` sets a configuration parameter in a segment `postgresql.conf` file, the new parameter setting always displays at the bottom of the file. When you use `gpconfig` to remove a configuration parameter setting, `gpconfig` comments out the parameter in all segment `postgresql.conf` files, thereby restoring the system default setting. For example, if you use `gpconfig`to remove (comment out) a parameter and later add it back (set a new value), there will be two instances of the parameter; one that is commented out, and one that is enabled and inserted at the bottom of the `postgresql.conf` file. -After setting a parameter, you must restart your Cloudberry Database system or reload the `postgresql.conf` files in order for the change to take effect. Whether you require a restart or a reload depends on the parameter. +After setting a parameter, you must restart your Apache Cloudberry system or reload the `postgresql.conf` files in order for the change to take effect. Whether you require a restart or a reload depends on the parameter. To show the currently set values for a parameter across the system, use the `-s` option. -`gpconfig` uses the following environment variables to connect to the Cloudberry Database coordinator instance and obtain system configuration information: +`gpconfig` uses the following environment variables to connect to the Apache Cloudberry coordinator instance and obtain system configuration information: - `PGHOST` - `PGPORT` @@ -73,11 +73,11 @@ Lists all configuration parameters supported by the `gpconfig` utility. **`-s | --show param_name`** -Shows the value for a configuration parameter used on all instances (coordinator and segments) in the Cloudberry Database system. If there is a difference in a parameter value among the instances, the utility displays an error message. Running `gpconfig` with the `-s` option reads parameter values directly from the database, and not the `postgresql.conf` file. If you are using `gpconfig` to set configuration parameters across all segments, then running `gpconfig -s` to verify the changes, you might still see the previous (old) values. You must reload the configuration files (`gpstop -u`) or restart the system (`gpstop -r`) for changes to take effect. +Shows the value for a configuration parameter used on all instances (coordinator and segments) in the Apache Cloudberry system. If there is a difference in a parameter value among the instances, the utility displays an error message. Running `gpconfig` with the `-s` option reads parameter values directly from the database, and not the `postgresql.conf` file. If you are using `gpconfig` to set configuration parameters across all segments, then running `gpconfig -s` to verify the changes, you might still see the previous (old) values. You must reload the configuration files (`gpstop -u`) or restart the system (`gpstop -r`) for changes to take effect. **`--file`** -For a configuration parameter, shows the value from the `postgresql.conf` file on all instances (coordinator and segments) in the Cloudberry Database system. If there is a difference in a parameter value among the instances, the utility displays a message. Must be specified with the `-s` option. +For a configuration parameter, shows the value from the `postgresql.conf` file on all instances (coordinator and segments) in the Apache Cloudberry system. If there is a difference in a parameter value among the instances, the utility displays a message. Must be specified with the `-s` option. For example, the configuration parameter `statement_mem` is set to 64MB for a user with the `ALTER ROLE` command, and the value in the `postgresql.conf` file is 128MB. Running the command `gpconfig -s statement_mem --file` displays 128MB. The command `gpconfig -s statement_mem` run by the user displays 64MB. @@ -85,7 +85,7 @@ Not valid with the `--file-compare` option. **`--file-compare`** -For a configuration parameter, compares the current Cloudberry Database value with the value in the `postgresql.conf` files on hosts (coordinator and segments). The values in the `postgresql.conf files` represent the value when Cloudberry Database is restarted. +For a configuration parameter, compares the current Apache Cloudberry value with the value in the `postgresql.conf` files on hosts (coordinator and segments). The values in the `postgresql.conf files` represent the value when Apache Cloudberry is restarted. If the values are not the same, the utility displays the values from all hosts. If all hosts have the same value, the utility displays a summary report. diff --git a/docs/sys-utilities/gpdeletesystem.md b/docs/sys-utilities/gpdeletesystem.md index 13058e8b95..7d0f1db05d 100644 --- a/docs/sys-utilities/gpdeletesystem.md +++ b/docs/sys-utilities/gpdeletesystem.md @@ -4,7 +4,7 @@ title: gpdeletesystem # gpdeletesystem -Deletes a Cloudberry Database system that was initialized using `gpinitsystem`. +Deletes a Apache Cloudberry system that was initialized using `gpinitsystem`. ## Synopsis @@ -27,10 +27,10 @@ The `gpdeletesystem` utility performs the following actions: Before running `gpdeletesystem`: - Move any backup files out of the coordinator and segment data directories. -- Make sure that Cloudberry Database is running. +- Make sure that Apache Cloudberry is running. - If you are currently in a segment data directory, change directory to another location. The utility fails with an error when run from within a segment data directory. -This utility will not uninstall the Cloudberry Database software. +This utility will not uninstall the Apache Cloudberry software. ## Options @@ -44,7 +44,7 @@ The number of segments to delete in parallel. If not specified, the utility will **`-f (force)`** -Force a delete even if backup files are found in the data directories. The default is to not delete Cloudberry Database instances if backup files are present. +Force a delete even if backup files are found in the data directories. The default is to not delete Apache Cloudberry instances if backup files are present. **`-l logfile_directory`** @@ -64,13 +64,13 @@ Displays the version, status, last updated date, and check sum of this utility. ## Examples -Delete a Cloudberry Database system: +Delete a Apache Cloudberry system: ```shell gpdeletesystem -d /gpdata/gp-1 ``` -Delete a Cloudberry Database system even if backup files are present: +Delete a Apache Cloudberry system even if backup files are present: ```shell gpdeletesystem -d /gpdata/gp-1 -f diff --git a/docs/sys-utilities/gpdemo.md b/docs/sys-utilities/gpdemo.md index 0e0313953f..39a5e7b84a 100644 --- a/docs/sys-utilities/gpdemo.md +++ b/docs/sys-utilities/gpdemo.md @@ -4,7 +4,7 @@ title: gpdemo # gpdemo (New in v1.5.0) -Before v1.5.0, if you want to deploy a small Cloudberry Database cluster with segments on a single node to make a demo, you need to spend time writing configuration files and parameters. Starting from v1.5.0, you can use the built-in `gpdemo` script to quickly deploy a cluster with a specified number of segments only with a single command. +Before v1.5.0, if you want to deploy a small Apache Cloudberry cluster with segments on a single node to make a demo, you need to spend time writing configuration files and parameters. Starting from v1.5.0, you can use the built-in `gpdemo` script to quickly deploy a cluster with a specified number of segments only with a single command. `gpdemo` is installed with other system utilities (such as `gpinitsystem`, `gpstart`, and `gpstop`) in the `GPHOME/bin` directory. @@ -52,7 +52,7 @@ NUM_PRIMARY_MIRROR_PAIRS=3 gpdemo :::info - Each segment node consists of a primary node and a mirror node. So every time the parameter value increases by `1`, 2 more nodes will be created. To better capture data distribution issues, it is recommended to set the value to an odd number. -- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Cloudberry Database with a Single Computing Node](/docs/deploy-cbdb-with-single-node.md) for details. +- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](/docs/deploy-cbdb-with-single-node.md) for details. ::: #### Specify the data directory of a node diff --git a/docs/sys-utilities/gpexpand.md b/docs/sys-utilities/gpexpand.md index a7cd5ce8e5..4c7d23b5a3 100644 --- a/docs/sys-utilities/gpexpand.md +++ b/docs/sys-utilities/gpexpand.md @@ -4,7 +4,7 @@ title: gpexpand # gpexpand -Expands an existing Cloudberry Database across new hosts in the system. +Expands an existing Apache Cloudberry across new hosts in the system. ## Synopsis @@ -27,14 +27,14 @@ gpexpand --version ## Prerequisites -- You are logged in as the Cloudberry Database superuser (`gpadmin`). +- You are logged in as the Apache Cloudberry superuser (`gpadmin`). - The new segment hosts have been installed and configured as per the existing segment hosts. This involves: - Configuring the hardware and OS - Installing the Cloudberry software - Creating the `gpadmin` user account - Exchanging SSH keys. - Enough disk space on your segment hosts to temporarily hold a copy of your largest table. -- When redistributing data, Cloudberry Database must be running in production mode. Cloudberry Database cannot be running in restricted mode or in coordinator mode. The `gpstart` options `-R` or `-m` cannot be specified to start Cloudberry Database. +- When redistributing data, Apache Cloudberry must be running in production mode. Apache Cloudberry cannot be running in restricted mode or in coordinator mode. The `gpstart` options `-R` or `-m` cannot be specified to start Apache Cloudberry. > **Note** These utilities cannot be run while `gpexpand` is performing segment initialization. > @@ -44,7 +44,7 @@ gpexpand --version > - `gppkg` > - `gprestore` -> **Important** When expanding a Cloudberry Database system, you must deactivate Cloudberry interconnect proxies before adding new hosts and segment instances to the system, and you must update the `gp_interconnect_proxy_addresses` parameter with the newly-added segment instances before you re-enable interconnect proxies. +> **Important** When expanding a Apache Cloudberry system, you must deactivate Cloudberry interconnect proxies before adding new hosts and segment instances to the system, and you must update the `gp_interconnect_proxy_addresses` parameter with the newly-added segment instances before you re-enable interconnect proxies. ## Description @@ -64,7 +64,7 @@ In the table data redistribution phase, `gpexpand` redistributes table data to r To begin the redistribution phase, run `gpexpand` with no options or with the `-d` (duration), `-e` (end time), or `-i` options. If you specify an end time or duration, then the utility redistributes tables in the expansion schema until the specified end time or duration is reached. If you specify `-i` or no options, then the utility redistribution phase continues until all tables in the expansion schema are reorganized. Each table is reorganized using `ALTER TABLE` commands to rebalance the tables across new segments, and to set tables to their original distribution policy. If `gpexpand` completes the reorganization of all tables, it displays a success message and ends. -> **Note** This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Cloudberry Database deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. +> **Note** This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. ## Options @@ -98,7 +98,7 @@ Specifies the name of a file that contains a list of new hosts for system expans This file can contain hostnames with or without network interfaces specified. The `gpexpand` utility handles either case, adding interface numbers to end of the hostname if the original nodes are configured with multiple network interfaces. -> **Note** The Cloudberry Database segment host naming convention is `sdwN` where `sdw` is a prefix and `N` is an integer. For example, `sdw1`, `sdw2` and so on. For hosts with multiple interfaces, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. +> **Note** The Apache Cloudberry segment host naming convention is `sdwN` where `sdw` is a prefix and `N` is an integer. For example, `sdw1`, `sdw2` and so on. For hosts with multiple interfaces, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. For information about using a hostname or IP address, see [Specifying Hosts using Hostnames or IP Addresses](#specify-hosts-using-hostnames-or-ip-addresses). Also, see [Using Host Systems with Multiple NICs](#using-host-systems-with-multiple-nics). @@ -124,13 +124,13 @@ Runs in silent mode. Does not prompt for confirmation to proceed on warnings. **`-S | --simple-progress`** -If specified, the `gpexpand` utility records only the minimum progress information in the Cloudberry Database table *gpexpand.expansion_progress*. The utility does not record the relation size information and status information in the table *gpexpand.status_detail*. +If specified, the `gpexpand` utility records only the minimum progress information in the Apache Cloudberry table *gpexpand.expansion_progress*. The utility does not record the relation size information and status information in the table *gpexpand.status_detail*. Specifying this option can improve performance by reducing the amount of progress information written to the *gpexpand* tables. **`[-t | --tardir] directory`** -The fully qualified path to a directory on segment hosts where the `gpexpand` utility copies a temporary tar file. The file contains Cloudberry Database files that are used to create segment instances. The default directory is the user home directory. +The fully qualified path to a directory on segment hosts where the `gpexpand` utility copies a temporary tar file. The file contains Apache Cloudberry files that are used to create segment instances. The default directory is the user home directory. **`-v | --verbose`** @@ -146,18 +146,18 @@ Displays the online help. ## Specify hosts using hostnames or IP addresses -When expanding a Cloudberry Database system, you can specify either a hostname or an IP address for the value. +When expanding a Apache Cloudberry system, you can specify either a hostname or an IP address for the value. - If you specify a hostname, the resolution of the hostname to an IP address should be done locally for security. For example, you should use entries in a local `/etc/hosts` file to map a hostname to an IP address. The resolution of a hostname to an IP address should not be performed by an external service such as a public DNS server. You must stop the Cloudberry system before you change the mapping of a hostname to a different IP address. -- If you specify an IP address, the address should not be changed after the initial configuration. When segment mirroring is enabled, replication from the primary to the mirror segment will fail if the IP address changes from the configured value. For this reason, you should use a hostname when expanding a Cloudberry Database system unless you have a specific requirement to use IP addresses. +- If you specify an IP address, the address should not be changed after the initial configuration. When segment mirroring is enabled, replication from the primary to the mirror segment will fail if the IP address changes from the configured value. For this reason, you should use a hostname when expanding a Apache Cloudberry system unless you have a specific requirement to use IP addresses. -When expanding a Cloudberry system, `gpexpand` populates `gp_segment_configuration` catalog table with the new segment instance information. Cloudberry Database uses the `address` value of the `gp_segment_configuration` catalog table when looking up host systems for Cloudberry interconnect (internal) communication between the coordinator and segment instances and between segment instances, and for other internal communication. +When expanding a Cloudberry system, `gpexpand` populates `gp_segment_configuration` catalog table with the new segment instance information. Apache Cloudberry uses the `address` value of the `gp_segment_configuration` catalog table when looking up host systems for Cloudberry interconnect (internal) communication between the coordinator and segment instances and between segment instances, and for other internal communication. ## Use host systems with multiple NICs -If host systems are configured with multiple NICs, you can expand a Cloudberry Database system to use each NIC as a Cloudberry host system. You must ensure that the host systems are configured with sufficient resources to support all the segment instances being added to the host. Also, if you enable segment mirroring, you must ensure that the expanded Cloudberry system configuration supports failover if a host system fails. +If host systems are configured with multiple NICs, you can expand a Apache Cloudberry system to use each NIC as a Cloudberry host system. You must ensure that the host systems are configured with sufficient resources to support all the segment instances being added to the host. Also, if you enable segment mirroring, you must ensure that the expanded Cloudberry system configuration supports failover if a host system fails. -For example, this is a `gpexpand` configuration file for a simple Cloudberry system. The segment host `gp7s1` and `gp7s2` are configured with two NICs, `-s1` and `-s2`, where the Cloudberry Database system uses each NIC as a host system. +For example, this is a `gpexpand` configuration file for a simple Cloudberry system. The segment host `gp7s1` and `gp7s2` are configured with two NICs, `-s1` and `-s2`, where the Apache Cloudberry system uses each NIC as a host system. ```shell gp7s1-s2|gp7s1-s2|40001|/data/data1/gpseg2|6|2|p diff --git a/docs/sys-utilities/gpfdist.md b/docs/sys-utilities/gpfdist.md index b6f938e048..fdc9715e50 100644 --- a/docs/sys-utilities/gpfdist.md +++ b/docs/sys-utilities/gpfdist.md @@ -4,7 +4,7 @@ title: gpfdist # gpfdist -Serves data files to or writes data files out from Cloudberry Database segments. +Serves data files to or writes data files out from Apache Cloudberry segments. ## Synopsis @@ -22,15 +22,15 @@ gpfdist --version ## Description -`gpfdist` is Cloudberry Database parallel file distribution program. It is used by readable external tables and `gpload` to serve external table files to all Cloudberry Database segments in parallel. It is used by writable external tables to accept output streams from Cloudberry Database segments in parallel and write them out to a file. +`gpfdist` is Apache Cloudberry parallel file distribution program. It is used by readable external tables and `gpload` to serve external table files to all Apache Cloudberry segments in parallel. It is used by writable external tables to accept output streams from Apache Cloudberry segments in parallel and write them out to a file. -In order for `gpfdist` to be used by an external table, the `LOCATION` clause of the external table definition must specify the external table data using the `gpfdist://` protocol (see the Cloudberry Database command `CREATE EXTERNAL TABLE`). +In order for `gpfdist` to be used by an external table, the `LOCATION` clause of the external table definition must specify the external table data using the `gpfdist://` protocol (see the Apache Cloudberry command `CREATE EXTERNAL TABLE`). > **Note** If the `--ssl` option is specified to enable SSL security, create the external table with the `gpfdists://` protocol. The benefit of using `gpfdist` is that you are guaranteed maximum parallelism while reading from or writing to external tables, thereby offering the best performance as well as easier administration of external tables. -For readable external tables, `gpfdist` parses and serves data files evenly to all the segment instances in the Cloudberry Database system when users `SELECT` from the external table. For writable external tables, `gpfdist` accepts parallel output streams from the segments when users `INSERT` into the external table, and writes to an output file. +For readable external tables, `gpfdist` parses and serves data files evenly to all the segment instances in the Apache Cloudberry system when users `SELECT` from the external table. For writable external tables, `gpfdist` accepts parallel output streams from the segments when users `INSERT` into the external table, and writes to an output file. > **Note** When `gpfdist` reads data and encounters a data formatting error, the error message includes a row number indicating the location of the formatting error. `gpfdist` attempts to capture the row that contains the error. However, `gpfdist` might not capture the exact row for some formatting errors. @@ -38,9 +38,9 @@ For readable external tables, if load files are compressed using `gzip` or `bzip > **Note** Compression is not supported for readable and writeable external tables when the `gpfdist` utility runs on Windows platforms. -When reading or writing data with the `gpfdist` or `gpfdists` protocol, Cloudberry Database includes `X-GP-PROTO` in the HTTP request header to indicate that the request is from Cloudberry Database. The utility rejects HTTP requests that do not include `X-GP-PROTO` in the request header. +When reading or writing data with the `gpfdist` or `gpfdists` protocol, Apache Cloudberry includes `X-GP-PROTO` in the HTTP request header to indicate that the request is from Apache Cloudberry. The utility rejects HTTP requests that do not include `X-GP-PROTO` in the request header. -Most likely, you will want to run `gpfdist` on your ETL machines rather than the hosts where Cloudberry Database is installed. To install `gpfdist` on another host, simply copy the utility over to that host and add `gpfdist` to your `$PATH`. +Most likely, you will want to run `gpfdist` on your ETL machines rather than the hosts where Apache Cloudberry is installed. To install `gpfdist` on another host, simply copy the utility over to that host and add `gpfdist` to your `$PATH`. > **Note** When using IPv6, always enclose the numeric IP address in brackets. @@ -64,7 +64,7 @@ The last port number in a range of HTTP port numbers (http_port to last_http_por **`-t timeout`** -Sets the time allowed for Cloudberry Database to establish a connection to a `gpfdist` process. Default is 5 seconds. Allowed values are 2 to 7200 seconds (2 hours). May need to be increased on systems with a lot of network traffic. +Sets the time allowed for Apache Cloudberry to establish a connection to a `gpfdist` process. Default is 5 seconds. Allowed values are 2 to 7200 seconds (2 hours). May need to be increased on systems with a lot of network traffic. **`-m max_length`** @@ -84,9 +84,9 @@ Opens the file for synchronous I/O with the `O_SYNC` flag. Any writes to the res **`-w time`** -Sets the number of seconds that Cloudberry Database delays before closing a target file such as a named pipe. The default value is 0, no delay. The maximum value is 7200 seconds (2 hours). +Sets the number of seconds that Apache Cloudberry delays before closing a target file such as a named pipe. The default value is 0, no delay. The maximum value is 7200 seconds (2 hours). -For a Cloudberry Database with multiple segments, there might be a delay between segments when writing data from different segments to the file. You can specify a time to wait before Cloudberry Database closes the file to ensure all the data is written to the file. +For a Apache Cloudberry with multiple segments, there might be a delay between segments when writing data from different segments to the file. You can specify a time to wait before Apache Cloudberry closes the file to ensure all the data is written to the file. **`--ssl certificate_path`** @@ -102,7 +102,7 @@ The root directory (`/`) cannot be specified as certificate_path. **`--sslclean wait_time`** -When the utility is run with the `--ssl` option, sets the number of seconds that the utility delays before closing an SSL session and cleaning up the SSL resources after it completes writing data to or from a Cloudberry Database segment. The default value is 0, no delay. The maximum value is 500 seconds. If the delay is increased, the transfer speed decreases. +When the utility is run with the `--ssl` option, sets the number of seconds that the utility delays before closing an SSL session and cleaning up the SSL resources after it completes writing data to or from a Apache Cloudberry segment. The default value is 0, no delay. The maximum value is 500 seconds. If the delay is increased, the transfer speed decreases. In some cases, this error might occur when copying large amounts of data: `gpfdist server closed connection`. To avoid the error, you can add a delay, for example `--sslclean 5`. @@ -144,14 +144,14 @@ Displays the version of this utility. ## Notes -The server configuration parameter `verify_gpfdists_cert` controls whether SSL certificate authentication is enabled when Cloudberry Database communicates with the `gpfdist` utility to either read data from or write data to an external data source. You can set the parameter value to `false` to deactivate authentication when testing the communication between the Cloudberry Database external table and the `gpfdist` utility that is serving the external data. If the value is `false`, these SSL exceptions are ignored: +The server configuration parameter `verify_gpfdists_cert` controls whether SSL certificate authentication is enabled when Apache Cloudberry communicates with the `gpfdist` utility to either read data from or write data to an external data source. You can set the parameter value to `false` to deactivate authentication when testing the communication between the Apache Cloudberry external table and the `gpfdist` utility that is serving the external data. If the value is `false`, these SSL exceptions are ignored: -- The self-signed SSL certificate that is used by `gpfdist` is not trusted by Cloudberry Database. +- The self-signed SSL certificate that is used by `gpfdist` is not trusted by Apache Cloudberry. - The host name contained in the SSL certificate does not match the host name that is running `gpfdist`. > **Caution** Deactivating SSL certificate authentication exposes a security risk by not validating the `gpfdists` SSL certificate. -You can set the server configuration parameter `gpfdist_retry_timeout` to control the time that Cloudberry Database waits before returning an error when a `gpfdist` server does not respond while Cloudberry Database is attempting to write data to `gpfdist`. The default is 300 seconds (5 minutes). +You can set the server configuration parameter `gpfdist_retry_timeout` to control the time that Apache Cloudberry waits before returning an error when a `gpfdist` server does not respond while Apache Cloudberry is attempting to write data to `gpfdist`. The default is 300 seconds (5 minutes). If the `gpfdist` utility hangs with no read or write activity occurring, you can generate a core dump the next time a hang occurs to help debug the issue. Set the environment variable `GPFDIST_WATCHDOG_TIMER` to the number of seconds of no activity to wait before `gpfdist` is forced to exit. When the environment variable is set and `gpfdist` hangs, the utility is stopped after the specified number of seconds, creates a core dump, and sends relevant information to the log file. diff --git a/docs/sys-utilities/gpinitstandby.md b/docs/sys-utilities/gpinitstandby.md index f324147b3b..a7219173aa 100644 --- a/docs/sys-utilities/gpinitstandby.md +++ b/docs/sys-utilities/gpinitstandby.md @@ -4,7 +4,7 @@ title: gpinitstandby # gpinitstandby -Adds and/or initializes a standby coordinator host for a Cloudberry Database system. +Adds and/or initializes a standby coordinator host for a Apache Cloudberry system. ## Synopsis @@ -20,23 +20,23 @@ gpinitstandby -? ## Description -The `gpinitstandby` utility adds a backup, standby coordinator instance to your Cloudberry Database system. If your system has an existing standby coordinator instance configured, use the `-r` option to remove it before adding the new standby coordinator instance. +The `gpinitstandby` utility adds a backup, standby coordinator instance to your Apache Cloudberry system. If your system has an existing standby coordinator instance configured, use the `-r` option to remove it before adding the new standby coordinator instance. -Before running this utility, make sure that the Cloudberry Database software is installed on the standby coordinator host and that you have exchanged SSH keys between the hosts. It is recommended that the coordinator port is set to the same port number on the coordinator host and the standby coordinator host. +Before running this utility, make sure that the Apache Cloudberry software is installed on the standby coordinator host and that you have exchanged SSH keys between the hosts. It is recommended that the coordinator port is set to the same port number on the coordinator host and the standby coordinator host. This utility should be run on the currently active *primary* coordinator host. The utility performs the following steps: -- Updates the Cloudberry Database system catalog to remove the existing standby coordinator information (if the `-r` option is supplied) -- Updates the Cloudberry Database system catalog to add the new standby coordinator instance information -- Edits the `pg_hba.conf` file of the Cloudberry Database coordinator to allow access from the newly added standby coordinator +- Updates the Apache Cloudberry system catalog to remove the existing standby coordinator information (if the `-r` option is supplied) +- Updates the Apache Cloudberry system catalog to add the new standby coordinator instance information +- Edits the `pg_hba.conf` file of the Apache Cloudberry coordinator to allow access from the newly added standby coordinator - Sets up the standby coordinator instance on the alternate coordinator host - Starts the synchronization process A backup, standby coordinator instance serves as a 'warm standby' in the event of the primary coordinator becoming non-operational. The standby coordinator is kept up to date by transaction log replication processes (the `walsender` and `walreceiver`), which run on the primary coordinator and standby coordinator hosts and keep the data between the primary and standby coordinator instances synchronized. If the primary coordinator fails, the log replication process is shut down, and the standby coordinator can be activated in its place by using the `gpactivatestandby` utility. Upon activation of the standby coordinator, the replicated logs are used to reconstruct the state of the coordinator instance at the time of the last successfully committed transaction. -The activated standby coordinator effectively becomes the Cloudberry Database coordinator, accepting client connections on the coordinator port and performing normal coordinator operations such as SQL command processing and resource management. +The activated standby coordinator effectively becomes the Apache Cloudberry coordinator, accepting client connections on the coordinator port and performing normal coordinator operations such as SQL command processing and resource management. > **Important** If the `gpinitstandby` utility previously failed to initialize the standby coordinator, you must delete the files in the standby coordinator data directory before running `gpinitstandby` again. The standby coordinator data directory is not cleaned up after an initialization failure because it contains log files that can help in determining the reason for the failure. @@ -54,7 +54,7 @@ Sets logging level to debug. **`--hba-hostnames boolean`** -Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Cloudberry Database. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Cloudberry Database system was initialized. +Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Apache Cloudberry. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Apache Cloudberry system was initialized. **`-l logfile_directory`** @@ -62,13 +62,13 @@ The directory to write the log file. Defaults to `~/gpAdminLogs`. **`-n (restart standby coordinator)`** -Specify this option to start a Cloudberry Database standby coordinator that has been configured but has stopped for some reason. +Specify this option to start a Apache Cloudberry standby coordinator that has been configured but has stopped for some reason. **`-P port`** -This option specifies the port that is used by the Cloudberry Database standby coordinator. The default is the same port used by the active Cloudberry Database coordinator. +This option specifies the port that is used by the Apache Cloudberry standby coordinator. The default is the same port used by the active Apache Cloudberry coordinator. -If the Cloudberry Database standby coordinator is on the same host as the active coordinator, the ports must be different. If the ports are the same for the active and standby coordinator and the host is the same, the utility returns an error. +If the Apache Cloudberry standby coordinator is on the same host as the active coordinator, the ports must be different. If the ports are the same for the active and standby coordinator and the host is the same, the utility returns an error. **`-q (no screen output)`** @@ -76,7 +76,7 @@ Run in quiet mode. Command output is not displayed on the screen, but is still w **`-r (remove standby coordinator)`** -Removes the currently configured standby coordinator instance from your Cloudberry Database system. +Removes the currently configured standby coordinator instance from your Apache Cloudberry system. **`-s standby_hostname`** @@ -98,7 +98,7 @@ Displays the online help. ## Examples -Add a standby coordinator instance to your Cloudberry Database system and start the synchronization process: +Add a standby coordinator instance to your Apache Cloudberry system and start the synchronization process: ```shell gpinitstandby -s host09 @@ -112,13 +112,13 @@ gpinitstandby -n > **Note** Do not specify the -n and -s options in the same command. -Add a standby coordinator instance to your Cloudberry Database system specifying a different port: +Add a standby coordinator instance to your Apache Cloudberry system specifying a different port: ```shell gpinitstandby -s myhost -P 2222 ``` -If you specify the same host name as the active Cloudberry Database coordinator, you must also specify a different port number with the `-P` option and a standby data directory with the `-S` option. +If you specify the same host name as the active Apache Cloudberry coordinator, you must also specify a different port number with the `-P` option and a standby data directory with the `-S` option. Remove the existing standby coordinator from your Cloudberry system configuration: diff --git a/docs/sys-utilities/gpinitsystem.md b/docs/sys-utilities/gpinitsystem.md index bcaeb689d2..273224a0f0 100644 --- a/docs/sys-utilities/gpinitsystem.md +++ b/docs/sys-utilities/gpinitsystem.md @@ -4,7 +4,7 @@ title: gpinitsystem # gpinitsystem -Initializes a Cloudberry Database system using configuration parameters specified in the `gpinitsystem_config` file. +Initializes a Apache Cloudberry system using configuration parameters specified in the `gpinitsystem_config` file. ## Synopsis @@ -33,13 +33,13 @@ gpinitsystem -? | --help ## Description -The `gpinitsystem` utility creates a Cloudberry Database instance or writes an input configuration file using the values defined in a cluster configuration file and any command-line options that you provide. See [Initialization Configuration File Format](#initialization-configuration-file-format) for more information about the configuration file. Before running this utility, make sure that you have installed the Cloudberry Database software on all the hosts in the array. +The `gpinitsystem` utility creates a Apache Cloudberry instance or writes an input configuration file using the values defined in a cluster configuration file and any command-line options that you provide. See [Initialization Configuration File Format](#initialization-configuration-file-format) for more information about the configuration file. Before running this utility, make sure that you have installed the Apache Cloudberry software on all the hosts in the array. With the `<-O output_configuration_file>` option, `gpinitsystem` writes all provided configuration information to the specified output file. This file can be used with the `-I` option to create a new cluster or re-create a cluster from a backed up configuration. See [Initialization Configuration File Format](#initialization-configuration-file-format) for more information. -In a Cloudberry Database DBMS, each database instance (the coordinator instance and all segment instances) must be initialized across all of the hosts in the system in such a way that they can all work together as a unified DBMS. The `gpinitsystem` utility takes care of initializing the Cloudberry coordinator and each segment instance, and configuring the system as a whole. +In a Apache Cloudberry DBMS, each database instance (the coordinator instance and all segment instances) must be initialized across all of the hosts in the system in such a way that they can all work together as a unified DBMS. The `gpinitsystem` utility takes care of initializing the Cloudberry coordinator and each segment instance, and configuring the system as a whole. -Before running `gpinitsystem`, you must set the `$GPHOME` environment variable to point to the location of your Cloudberry Database installation on the coordinator host and exchange SSH keys between all host addresses in the array using `gpssh-exkeys`. +Before running `gpinitsystem`, you must set the `$GPHOME` environment variable to point to the location of your Apache Cloudberry installation on the coordinator host and exchange SSH keys between all host addresses in the array using `gpssh-exkeys`. This utility performs the following tasks: @@ -51,10 +51,10 @@ This utility performs the following tasks: - Initializes the standby coordinator instance (if specified). - Initializes the primary segment instances. - Initializes the mirror segment instances (if mirroring is configured). -- Configures the Cloudberry Database system and checks for errors. -- Starts the Cloudberry Database system. +- Configures the Apache Cloudberry system and checks for errors. +- Starts the Apache Cloudberry system. -> **Note** This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Cloudberry Database deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. +> **Note** This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. ## Options @@ -68,7 +68,7 @@ The number of segments to create in parallel. If not specified, the utility will **`-c cluster_configuration_file`** -Required. The full path and filename of the configuration file, which contains all of the defined parameters to configure and initialize a new Cloudberry Database system. See [Initialization Configuration File Format](#initialization-configuration-file-format) for a description of this file. You must provide either the `-c ` option or the `-I ` option to `gpinitsystem`. +Required. The full path and filename of the configuration file, which contains all of the defined parameters to configure and initialize a new Apache Cloudberry system. See [Initialization Configuration File Format](#initialization-configuration-file-format) for a description of this file. You must provide either the `-c ` option or the `-I ` option to `gpinitsystem`. **`-D`** @@ -80,23 +80,23 @@ Optional. The full path and filename of a file that contains the host addresses **`-I input_configuration_file`** -The full path and filename of an input configuration file, which defines the Cloudberry Database host systems, the coordinator instance and segment instances on the hosts, using the `QD_PRIMARY_ARRAY`, `PRIMARY_ARRAY`, and `MIRROR_ARRAY` parameters. The input configuration file is typically created by using `gpinitsystem` with the `-O output_configuration_file` option. Edit those parameters in order to initialize a new cluster or re-create a cluster from a backed up configuration. You must provide either the `-c ` option or the `-I ` option to `gpinitsystem`. +The full path and filename of an input configuration file, which defines the Apache Cloudberry host systems, the coordinator instance and segment instances on the hosts, using the `QD_PRIMARY_ARRAY`, `PRIMARY_ARRAY`, and `MIRROR_ARRAY` parameters. The input configuration file is typically created by using `gpinitsystem` with the `-O output_configuration_file` option. Edit those parameters in order to initialize a new cluster or re-create a cluster from a backed up configuration. You must provide either the `-c ` option or the `-I ` option to `gpinitsystem`. **`-n locale | --locale=locale`** -Sets the default locale used by Cloudberry Database. If not specified, the default locale is `en_US.utf8`. A locale identifier consists of a language identifier and a region identifier, and optionally a character set encoding. For example, `sv_SE` is Swedish as spoken in Sweden, `en_US` is U.S. English, and `fr_CA` is French Canadian. If more than one character set can be useful for a locale, then the specifications look like this: `en_US.UTF-8` (locale specification and character set encoding). On most systems, the command `locale` will show the locale environment settings and `locale -a` will show a list of all available locales. +Sets the default locale used by Apache Cloudberry. If not specified, the default locale is `en_US.utf8`. A locale identifier consists of a language identifier and a region identifier, and optionally a character set encoding. For example, `sv_SE` is Swedish as spoken in Sweden, `en_US` is U.S. English, and `fr_CA` is French Canadian. If more than one character set can be useful for a locale, then the specifications look like this: `en_US.UTF-8` (locale specification and character set encoding). On most systems, the command `locale` will show the locale environment settings and `locale -a` will show a list of all available locales. **`--lc-collate=locale`** -Similar to `--locale`, but sets the locale used for collation (sorting data). The sort order cannot be changed after Cloudberry Database is initialized, so it is important to choose a collation locale that is compatible with the character set encodings that you plan to use for your data. There is a special collation name of `C` or `POSIX` (byte-order sorting as opposed to dictionary-order sorting). The `C` collation can be used with any character encoding. +Similar to `--locale`, but sets the locale used for collation (sorting data). The sort order cannot be changed after Apache Cloudberry is initialized, so it is important to choose a collation locale that is compatible with the character set encodings that you plan to use for your data. There is a special collation name of `C` or `POSIX` (byte-order sorting as opposed to dictionary-order sorting). The `C` collation can be used with any character encoding. **`--lc-ctype=locale`** -Similar to `--locale`, but sets the locale used for character classification (what character sequences are valid and how they are interpreted). This cannot be changed after Cloudberry Database is initialized, so it is important to choose a character classification locale that is compatible with the data you plan to store in Cloudberry Database. +Similar to `--locale`, but sets the locale used for character classification (what character sequences are valid and how they are interpreted). This cannot be changed after Apache Cloudberry is initialized, so it is important to choose a character classification locale that is compatible with the data you plan to store in Apache Cloudberry. **`--lc-messages=locale`** -Similar to `--locale`, but sets the locale used for messages output by Cloudberry Database. The current version of Cloudberry Database does not support multiple locales for output messages (all messages are in English), so changing this setting will not have any effect. +Similar to `--locale`, but sets the locale used for messages output by Apache Cloudberry. The current version of Apache Cloudberry does not support multiple locales for output messages (all messages are in English), so changing this setting will not have any effect. **`--lc-monetary=locale`** @@ -120,11 +120,11 @@ Sets the maximum number of client connections allowed to the coordinator. The de **`-O output_configuration_file`** -Optional, used during new cluster initialization. This option writes the `cluster_configuration_file` information (used with -c) to the specified `output_configuration_file`. This file defines the Cloudberry Database members using the `QD_PRIMARY_ARRAY`, `PRIMARY_ARRAY`, and `MIRROR_ARRAY` parameters. Use this file as a template for the `-I` `input_configuration_file` option. See [Examples](#examples) for more information. +Optional, used during new cluster initialization. This option writes the `cluster_configuration_file` information (used with -c) to the specified `output_configuration_file`. This file defines the Apache Cloudberry members using the `QD_PRIMARY_ARRAY`, `PRIMARY_ARRAY`, and `MIRROR_ARRAY` parameters. Use this file as a template for the `-I` `input_configuration_file` option. See [Examples](#examples) for more information. **`-p postgresql_conf_param_file`** -Optional. The name of a file that contains `postgresql.conf` parameter settings that you want to set for Cloudberry Database. These settings will be used when the individual coordinator and segment instances are initialized. You can also set parameters after initialization using the `gpconfig` utility. +Optional. The name of a file that contains `postgresql.conf` parameter settings that you want to set for Apache Cloudberry. These settings will be used when the individual coordinator and segment instances are initialized. You can also set parameters after initialization using the `gpconfig` utility. **`-q`** @@ -136,11 +136,11 @@ Sets the amount of memory a Cloudberry server instance uses for shared memory bu **`-s standby_coordinator_host`** -Optional. If you wish to configure a backup coordinator instance, specify the host name using this option. The Cloudberry Database software must already be installed and configured on this host. +Optional. If you wish to configure a backup coordinator instance, specify the host name using this option. The Apache Cloudberry software must already be installed and configured on this host. **`-P standby_coordinator_port`** -If you configure a standby coordinator instance with `-s`, specify its port number using this option. The default port is the same as the coordinator port. To run the standby and coordinator on the same host, you must use this option to specify a different port for the standby. The Cloudberry Database software must already be installed and configured on the standby host. +If you configure a standby coordinator instance with `-s`, specify its port number using this option. The default port is the same as the coordinator port. To run the standby and coordinator on the same host, you must use this option to specify a different port for the standby. The Apache Cloudberry software must already be installed and configured on the standby host. **`-S standby_coordinator_datadir | --standby_dir=standby_coordinator_datadir`** @@ -148,7 +148,7 @@ If you configure a standby coordinator host with `-s`, use this option to specif **`-e superuser_password | --su_password=superuser_password`** -Use this option to specify the password to set for the Cloudberry Database superuser account (such as `gpadmin`). If this option is not specified, the default password `gparray` is assigned to the superuser account. You can use the `ALTER ROLE` command to change the password at a later time. +Use this option to specify the password to set for the Apache Cloudberry superuser account (such as `gpadmin`). If this option is not specified, the default password `gparray` is assigned to the superuser account. You can use the `ALTER ROLE` command to change the password at a later time. Recommended security best practices: @@ -157,7 +157,7 @@ Recommended security best practices: **`--mirror-mode={group|spread}`** -Use this option to specify the placement of mirror segment instances on the segment hosts. The default, `group`, groups the mirror segments for all of a host's primary segments on a single alternate host. `spread` spreads mirror segments for the primary segments on a host across different hosts in the Cloudberry Database array. Spreading is only allowed if the number of hosts is greater than the number of segment instances per host. +Use this option to specify the placement of mirror segment instances on the segment hosts. The default, `group`, groups the mirror segments for all of a host's primary segments on a single alternate host. `spread` spreads mirror segments for the primary segments on a host across different hosts in the Apache Cloudberry array. Spreading is only allowed if the number of hosts is greater than the number of segment instances per host. **`-v | --version`** @@ -171,7 +171,7 @@ Show help about `gpinitsystem` command line arguments, and exit. `gpinitsystem` requires a cluster configuration file with the following parameters defined. An example initialization configuration file can be found in `$GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config`. -To avoid port conflicts between Cloudberry Database and other applications, the Cloudberry Database port numbers should not be in the range specified by the operating system parameter `net.ipv4.ip_local_port_range`. For example, if `net.ipv4.ip_local_port_range = 10000 65535`, you could set Cloudberry Database base port numbers to these values. +To avoid port conflicts between Apache Cloudberry and other applications, the Apache Cloudberry port numbers should not be in the range specified by the operating system parameter `net.ipv4.ip_local_port_range`. For example, if `net.ipv4.ip_local_port_range = 10000 65535`, you could set Apache Cloudberry base port numbers to these values. ```shell PORT_BASE = 6000 @@ -180,11 +180,11 @@ MIRROR_PORT_BASE = 7000 **`MACHINE_LIST_FILE`** -**Optional.** Can be used in place of the `-h` option. This specifies the file that contains the list of the segment host address names that comprise the Cloudberry Database system. The coordinator host is assumed to be the host from which you are running the utility and should not be included in this file. If your segment hosts have multiple network interfaces, then this file would include all addresses for the host. Give the absolute path to the file. +**Optional.** Can be used in place of the `-h` option. This specifies the file that contains the list of the segment host address names that comprise the Apache Cloudberry system. The coordinator host is assumed to be the host from which you are running the utility and should not be included in this file. If your segment hosts have multiple network interfaces, then this file would include all addresses for the host. Give the absolute path to the file. **`SEG_PREFIX`** -**Required.** This specifies a prefix that will be used to name the data directories on the coordinator and segment instances. The naming convention for data directories in a Cloudberry Database system is SEG_PREFIXnumber where number starts with 0 for segment instances (the coordinator is always -1). So for example, if you choose the prefix `gpseg`, your coordinator instance data directory would be named `gpseg-1`, and the segment instances would be named `gpseg0`, `gpseg1`, `gpseg2`, `gpseg3`, and so on. +**Required.** This specifies a prefix that will be used to name the data directories on the coordinator and segment instances. The naming convention for data directories in a Apache Cloudberry system is SEG_PREFIXnumber where number starts with 0 for segment instances (the coordinator is always -1). So for example, if you choose the prefix `gpseg`, your coordinator instance data directory would be named `gpseg-1`, and the segment instances would be named `gpseg0`, `gpseg1`, `gpseg2`, `gpseg3`, and so on. **`PORT_BASE`** @@ -209,7 +209,7 @@ declare -a DATA_DIRECTORY=(/data1/primary /data1/primary **`COORDINATOR_PORT`** -**Required.** The port number for the coordinator instance. This is the port number that users and client connections will use when accessing the Cloudberry Database system. +**Required.** The port number for the coordinator instance. This is the port number that users and client connections will use when accessing the Apache Cloudberry system. **`TRUSTED_SHELL`** @@ -217,11 +217,11 @@ declare -a DATA_DIRECTORY=(/data1/primary /data1/primary **`ENCODING`** -**Required.** The character set encoding to use. This character set must be compatible with the `--locale` settings used, especially `--lc-collate` and `--lc-ctype`. Cloudberry Database supports the same character sets as PostgreSQL. +**Required.** The character set encoding to use. This character set must be compatible with the `--locale` settings used, especially `--lc-collate` and `--lc-ctype`. Apache Cloudberry supports the same character sets as PostgreSQL. **`DATABASE_NAME`** -**Optional.** The name of a Cloudberry Database database to create after the system is initialized. You can always create a database later using the `CREATE DATABASE` command or the `createdb` utility. +**Optional.** The name of a Apache Cloudberry database to create after the system is initialized. You can always create a database later using the `CREATE DATABASE` command or the `createdb` utility. **`MIRROR_PORT_BASE`** @@ -239,7 +239,7 @@ declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror **`QD_PRIMARY_ARRAY, PRIMARY_ARRAY, MIRROR_ARRAY`** -**Required** when using `input_configuration file` with `-I` option. These parameters specify the Cloudberry Database coordinator host, the primary segment, and the mirror segment hosts respectively. During new cluster initialization, use the `gpinitsystem` `-O output_configuration_file` to populate `QD_PRIMARY_ARRAY`, `PRIMARY_ARRAY`, `MIRROR_ARRAY`. +**Required** when using `input_configuration file` with `-I` option. These parameters specify the Apache Cloudberry coordinator host, the primary segment, and the mirror segment hosts respectively. During new cluster initialization, use the `gpinitsystem` `-O output_configuration_file` to populate `QD_PRIMARY_ARRAY`, `PRIMARY_ARRAY`, `MIRROR_ARRAY`. To initialize a new cluster or re-create a cluster from a backed up configuration, edit these values in the input configuration file used with the `gpinitsystem` `-I input_configuration_file` option. Use one of the following formats to specify the host information: @@ -255,7 +255,7 @@ or The first format populates the `hostname` and `address` fields in the `gp_segment_configuration` catalog table with the hostname and address values provided in the input configuration file. The second format populates `hostname` and `address` fields with the same value, derived from host. -The Cloudberry Database coordinator always uses the value -1 for the segment ID and content ID. For example, seg_prefix `` and dbid values for `QD_PRIMARY_ARRAY` use `-1` to indicate the coordinator instance: +The Apache Cloudberry coordinator always uses the value -1 for the segment ID and content ID. For example, seg_prefix `` and dbid values for `QD_PRIMARY_ARRAY` use `-1` to indicate the coordinator instance: ```shell QD_PRIMARY_ARRAY=cdw~cdw~5432~/gpdata/coordinator/gpseg-1~1~-1 @@ -273,15 +273,15 @@ sdw1~sdw1~50001~/gpdata/mirror2/gpseg3~9~3 ) ``` -To re-create a cluster using a known Cloudberry Database system configuration, you can edit the segment and content IDs to match the values of the system. +To re-create a cluster using a known Apache Cloudberry system configuration, you can edit the segment and content IDs to match the values of the system. **`HEAP_CHECKSUM`** -**Optional.** This parameter specifies if checksums are enabled for heap data. When enabled, checksums are calculated for heap storage in all databases, enabling Cloudberry Database to detect corruption in the I/O system. This option is set when the system is initialized and cannot be changed later. +**Optional.** This parameter specifies if checksums are enabled for heap data. When enabled, checksums are calculated for heap storage in all databases, enabling Apache Cloudberry to detect corruption in the I/O system. This option is set when the system is initialized and cannot be changed later. The `HEAP_CHECKSUM` option is on by default and turning it off is strongly discouraged. If you set this option to off, data corruption in storage can go undetected and make recovery much more difficult. -To determine if heap checksums are enabled in a Cloudberry Database system, you can query the `data_checksums` server configuration parameter with the `gpconfig` management utility: +To determine if heap checksums are enabled in a Apache Cloudberry system, you can query the `data_checksums` server configuration parameter with the `gpconfig` management utility: ```shell $ gpconfig -s data_checksums @@ -289,42 +289,42 @@ $ gpconfig -s data_checksums **`HBA_HOSTNAMES`** -**Optional.** This parameter controls whether `gpinitsystem` uses IP addresses or host names in the `pg_hba.conf` file when updating the file with addresses that can connect to Cloudberry Database. The default value is `0`, the utility uses IP addresses when updating the file. When initializing a Cloudberry Database system, specify `HBA_HOSTNAMES=1` to have the utility use host names in the `pg_hba.conf` file. +**Optional.** This parameter controls whether `gpinitsystem` uses IP addresses or host names in the `pg_hba.conf` file when updating the file with addresses that can connect to Apache Cloudberry. The default value is `0`, the utility uses IP addresses when updating the file. When initializing a Apache Cloudberry system, specify `HBA_HOSTNAMES=1` to have the utility use host names in the `pg_hba.conf` file. - + ## Specify hosts using hostnames or IP addresses -When initializing a Cloudberry Database system with `gpinitsystem`, you can specify segment hosts using either hostnames or IP addresses. For example, you can use hostnames or IP addresses in the file specified with the `-h` option. +When initializing a Apache Cloudberry system with `gpinitsystem`, you can specify segment hosts using either hostnames or IP addresses. For example, you can use hostnames or IP addresses in the file specified with the `-h` option. - If you specify a hostname, the resolution of the hostname to an IP address should be done locally for security. For example, you should use entries in a local `/etc/hosts` file to map a hostname to an IP address. The resolution of a hostname to an IP address should not be performed by an external service such as a public DNS server. You must stop the Cloudberry system before you change the mapping of a hostname to a different IP address. -- If you specify an IP address, the address should not be changed after the initial configuration. When segment mirroring is enabled, replication from the primary to the mirror segment will fail if the IP address changes from the configured value. For this reason, you should use a hostname when initializing a Cloudberry Database system unless you have a specific requirement to use IP addresses. +- If you specify an IP address, the address should not be changed after the initial configuration. When segment mirroring is enabled, replication from the primary to the mirror segment will fail if the IP address changes from the configured value. For this reason, you should use a hostname when initializing a Apache Cloudberry system unless you have a specific requirement to use IP addresses. -When initializing the Cloudberry Database system, `gpinitsystem` uses the initialization information to populate the `gp_segment_configuration` catalog table and adds hosts to the `pg_hba.conf` file. By default, the host IP address is added to the file. Specify the `gpinitsystem` configuration file parameter `HBA_HOSTNAMES=1` to add hostnames to the file. +When initializing the Apache Cloudberry system, `gpinitsystem` uses the initialization information to populate the `gp_segment_configuration` catalog table and adds hosts to the `pg_hba.conf` file. By default, the host IP address is added to the file. Specify the `gpinitsystem` configuration file parameter `HBA_HOSTNAMES=1` to add hostnames to the file. -Cloudberry Database uses the `address` value of the `gp_segment_configuration` catalog table when looking up host systems for Cloudberry interconnect (internal) communication between the coordinator and segment instances and between segment instances, and for other internal communication. +Apache Cloudberry uses the `address` value of the `gp_segment_configuration` catalog table when looking up host systems for Cloudberry interconnect (internal) communication between the coordinator and segment instances and between segment instances, and for other internal communication. ## Examples -Initialize a Cloudberry Database system by supplying a cluster configuration file and a segment host address file, and set up a spread mirroring (`--mirror-mode=spread`) configuration: +Initialize a Apache Cloudberry system by supplying a cluster configuration file and a segment host address file, and set up a spread mirroring (`--mirror-mode=spread`) configuration: ```shell $ gpinitsystem -c gpinitsystem_config -h hostfile_gpinitsystem --mirror-mode=spread ``` -Initialize a Cloudberry Database system and set the superuser remote password: +Initialize a Apache Cloudberry system and set the superuser remote password: ```shell $ gpinitsystem -c gpinitsystem_config -h hostfile_gpinitsystem --su-password=mypassword ``` -Initialize a Cloudberry Database system with an optional standby coordinator host: +Initialize a Apache Cloudberry system with an optional standby coordinator host: ```shell $ gpinitsystem -c gpinitsystem_config -h hostfile_gpinitsystem -s host09 ``` -Initialize a Cloudberry Database system and write the provided configuration to an output file, for example `cluster_init.config`: +Initialize a Apache Cloudberry system and write the provided configuration to an output file, for example `cluster_init.config`: ```shell $ gpinitsystem -c gpinitsystem_config -h hostfile_gpinitsystem -O cluster_init.config @@ -348,13 +348,13 @@ cdw~cdw.local~7001~/data/mirror1/gpseg0~3~0 ) ``` -Initialize a Cloudberry Database using an input configuration file (a file that defines the Cloudberry Database cluster) using `QD_PRIMARY_ARRAY` and `PRIMARY_ARRAY` parameters: +Initialize a Apache Cloudberry using an input configuration file (a file that defines the Apache Cloudberry cluster) using `QD_PRIMARY_ARRAY` and `PRIMARY_ARRAY` parameters: ```shell $ gpinitsystem -I cluster_init.config ``` -The following example uses a host system configured with multiple NICs. If host systems are configured with multiple NICs, you can initialize a Cloudberry Database system to use each NIC as a Cloudberry host system. You must ensure that the host systems are configured with sufficient resources to support all the segment instances being added to the host. Also, if high availability is enabled, you must ensure that the Cloudberry system configuration supports failover if a host system fails. +The following example uses a host system configured with multiple NICs. If host systems are configured with multiple NICs, you can initialize a Apache Cloudberry system to use each NIC as a Cloudberry host system. You must ensure that the host systems are configured with sufficient resources to support all the segment instances being added to the host. Also, if high availability is enabled, you must ensure that the Cloudberry system configuration supports failover if a host system fails. For this simple coordinator and segment instance configuration, the host system `gp7c` is configured with two NICs `gp7c-1` and `gp7c-2`. In the configuration, the `QD_PRIMARY_ARRAY` parameter defines the coordinator segment using `gp7c-1`. The `PRIMARY_ARRAY` and `MIRROR_ARRAY` parameters use `gp7c-2` to define a primary and mirror segment instance. @@ -372,4 +372,4 @@ gp7c~gp7c-2~50000~/data/mirror1/gpseg1~5~1 ## See also -[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Cloudberry Database](/docs/start-and-stop-cbdb-database.md) +[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](/docs/start-and-stop-cbdb-database.md) diff --git a/docs/sys-utilities/gpload.md b/docs/sys-utilities/gpload.md index 930377a1bd..4bc7780f5f 100644 --- a/docs/sys-utilities/gpload.md +++ b/docs/sys-utilities/gpload.md @@ -22,13 +22,13 @@ gpload --version The client machine where `gpload` is run must have the following: -- The [gpfdist](/docs/sys-utilities/gpfdist.md) parallel file distribution program installed and in your `$PATH`. This program is located in `$GPHOME/bin` of your Cloudberry Database server installation. -- Network access to and from all hosts in your Cloudberry Database array (coordinator and segments). +- The [gpfdist](/docs/sys-utilities/gpfdist.md) parallel file distribution program installed and in your `$PATH`. This program is located in `$GPHOME/bin` of your Apache Cloudberry server installation. +- Network access to and from all hosts in your Apache Cloudberry array (coordinator and segments). - Network access to and from the hosts where the data to be loaded resides (ETL servers). ## Description -`gpload` is a data loading utility that acts as an interface to the Cloudberry Database external table parallel loading feature. Using a load specification defined in a YAML formatted control file, `gpload` runs a load by invoking the Cloudberry Database parallel file server ([gpfdist](/docs/sys-utilities/gpfdist.md)), creating an external table definition based on the source data defined, and running an `INSERT`, `UPDATE` or `MERGE` operation to load the source data into the target table in the database. +`gpload` is a data loading utility that acts as an interface to the Apache Cloudberry external table parallel loading feature. Using a load specification defined in a YAML formatted control file, `gpload` runs a load by invoking the Apache Cloudberry parallel file server ([gpfdist](/docs/sys-utilities/gpfdist.md)), creating an external table definition based on the source data defined, and running an `INSERT`, `UPDATE` or `MERGE` operation to load the source data into the target table in the database. > **Note** `MERGE` and `UPDATE` operations are not supported if the target table column name is a reserved keyword, has capital letters, or includes any character that requires quotes (" ") to identify the column. @@ -86,15 +86,15 @@ The database to load into. If not specified, reads from the load control file, t **`-h hostname`** -Specifies the host name of the machine on which the Cloudberry Database coordinator database server is running. If not specified, reads from the load control file, the environment variable `$PGHOST` or defaults to `localhost`. +Specifies the host name of the machine on which the Apache Cloudberry coordinator database server is running. If not specified, reads from the load control file, the environment variable `$PGHOST` or defaults to `localhost`. **`-p port`** -Specifies the TCP port on which the Cloudberry Database coordinator database server is listening for connections. If not specified, reads from the load control file, the environment variable `$PGPORT` or defaults to 5432. +Specifies the TCP port on which the Apache Cloudberry coordinator database server is listening for connections. If not specified, reads from the load control file, the environment variable `$PGPORT` or defaults to 5432. **`--max_retries retry_times`** -Specifies the maximum number of times `gpload` attempts to connect to Cloudberry Database after a connection timeout. The default value is `0`, do not attempt to connect after a connection timeout. A negative integer, such as `-1`, specifies an unlimited number of attempts. +Specifies the maximum number of times `gpload` attempts to connect to Apache Cloudberry after a connection timeout. The default value is `0`, do not attempt to connect after a connection timeout. A negative integer, such as `-1`, specifies an unlimited number of attempts. **`-U username`** @@ -106,7 +106,7 @@ Force a password prompt. If not specified, reads the password from the environme ## Control File Format -The `gpload` control file uses the [YAML 1.1](http://yaml.org/spec/1.1/) document format and then implements its own schema for defining the various steps of a Cloudberry Database load operation. The control file must be a valid YAML document. +The `gpload` control file uses the [YAML 1.1](http://yaml.org/spec/1.1/) document format and then implements its own schema for defining the various steps of a Apache Cloudberry load operation. The control file must be a valid YAML document. The `gpload` program processes the control file document in order and uses indentation (spaces) to determine the document hierarchy and the relationships of the sections to one another. The use of white space is significant. White space should not be used simply for formatting purposes, and tabs should not be used at all. @@ -176,21 +176,21 @@ Optional. The version of the `gpload` control file schema. The current version i **`DATABASE`** -Optional. Specifies which database in the Cloudberry Database system to connect to. If not specified, defaults to `$PGDATABASE` if set or the current system user name. You can also specify the database on the command line using the `-d` option. +Optional. Specifies which database in the Apache Cloudberry system to connect to. If not specified, defaults to `$PGDATABASE` if set or the current system user name. You can also specify the database on the command line using the `-d` option. **`USER`** Optional. Specifies which database role to use to connect. If not specified, defaults to the current user or `$PGUSER` if set. You can also specify the database role on the command line using the `-U` option. -If the user running `gpload` is not a Cloudberry Database superuser, then the appropriate rights must be granted to the user for the load to be processed. +If the user running `gpload` is not a Apache Cloudberry superuser, then the appropriate rights must be granted to the user for the load to be processed. **`HOST`** -Optional. Specifies Cloudberry Database coordinator host name. If not specified, defaults to localhost or `$PGHOST` if set. You can also specify the coordinator host name on the command line using the `-h` option. +Optional. Specifies Apache Cloudberry coordinator host name. If not specified, defaults to localhost or `$PGHOST` if set. You can also specify the coordinator host name on the command line using the `-h` option. **`PORT`** -Optional. Specifies Cloudberry Database coordinator port. If not specified, defaults to 5432 or `$PGPORT` if set. You can also specify the coordinator port on the command line using the `-p` option. +Optional. Specifies Apache Cloudberry coordinator port. If not specified, defaults to 5432 or `$PGPORT` if set. You can also specify the coordinator port on the command line using the `-p` option. **`GPLOAD`** @@ -198,7 +198,7 @@ Required. Begins the load specification section. A `GPLOAD` specification must h **`INPUT`** -Required. Defines the location and the format of the input data to be loaded. `gpload` will start one or more instances of the [gpfdist](/docs/sys-utilities/gpfdist.md) file distribution program on the current host and create the required external table definition(s) in Cloudberry Database that point to the source data. Note that the host from which you run `gpload` must be accessible over the network by all Cloudberry Database hosts (coordinator and segments). +Required. Defines the location and the format of the input data to be loaded. `gpload` will start one or more instances of the [gpfdist](/docs/sys-utilities/gpfdist.md) file distribution program on the current host and create the required external table definition(s) in Apache Cloudberry that point to the source data. Note that the host from which you run `gpload` must be accessible over the network by all Apache Cloudberry hosts (coordinator and segments). SOURCE** @@ -246,7 +246,7 @@ The root directory (`/`) cannot be specified as `CERTIFICATES_PATH`. Optional. Specifies whether `gpload` resolve hostnames to the fully qualified domain name (FQDN) or the local hostname. If the value is set to `true`, names are resolved to the FQDN. If the value is set to `false`, resolution is to the local hostname. The default is `false`. -A fully qualified domain name might be required in some situations. For example, if the Cloudberry Database system is in a different domain than an ETL application that is being accessed by `gpload`. +A fully qualified domain name might be required in some situations. For example, if the Apache Cloudberry system is in a different domain than an ETL application that is being accessed by `gpload`. **`COLUMNS`** @@ -288,7 +288,7 @@ Specifies the type of newline used in your data files, one of: - CR (Carriage return, 0x0D) - CRLF (Carriage return plus line feed, 0x0D 0x0A). -If not specified, Cloudberry Database detects the newline type by examining the first row of data that it receives, and uses the first newline type that it encounters. +If not specified, Apache Cloudberry detects the newline type by examining the first row of data that it receives, and uses the first newline type that it encounters. **`NULL_AS`** @@ -296,7 +296,7 @@ Optional. Specifies the string that represents a null value. The default is `\N` **`FILL_MISSING_FIELDS`** -Optional. The default value is `false`. When reading a row of data that has missing trailing field values (the row of data has missing data fields at the end of a line or row), Cloudberry Database returns an error. +Optional. The default value is `false`. When reading a row of data that has missing trailing field values (the row of data has missing data fields at the end of a line or row), Apache Cloudberry returns an error. If the value is `true`, when reading a row of data that has missing trailing field values, the values are set to `NULL`. Blank rows, fields with a `NOT NULL` constraint, and trailing delimiters on a line will still report an error. @@ -322,18 +322,18 @@ Optional. Character set encoding of the source data. Specify a string constant ( **`ERROR_LIMIT`** -Optional. Enables single row error isolation mode for this load operation. When enabled, input rows that have format errors will be discarded provided that the error limit count is not reached on any Cloudberry Database segment instance during input processing. If the error limit is not reached, all good rows will be loaded and any error rows will either be discarded or captured as part of error log information. The default is to cancel the load operation on the first error encountered. Note that single row error isolation only applies to data rows with format errors; for example, extra or missing attributes, attributes of a wrong data type, or invalid client encoding sequences. Constraint errors, such as primary key violations, will still cause the load operation to be cancelled if encountered. For information about handling load errors, see [Loading data](../data-loading/). +Optional. Enables single row error isolation mode for this load operation. When enabled, input rows that have format errors will be discarded provided that the error limit count is not reached on any Apache Cloudberry segment instance during input processing. If the error limit is not reached, all good rows will be loaded and any error rows will either be discarded or captured as part of error log information. The default is to cancel the load operation on the first error encountered. Note that single row error isolation only applies to data rows with format errors; for example, extra or missing attributes, attributes of a wrong data type, or invalid client encoding sequences. Constraint errors, such as primary key violations, will still cause the load operation to be cancelled if encountered. For information about handling load errors, see [Loading data](../data-loading/). **`LOG_ERRORS`** -Optional when `ERROR_LIMIT` is declared. Value is either `true` or `false`. The default value is `false`. If the value is `true`, rows with formatting errors are logged internally when running in single row error isolation mode. You can examine formatting errors with the Cloudberry Database built-in SQL function `gp_read_error_log('')`. If formatting errors are detected when loading data, `gpload` generates a warning message with the name of the table that contains the error information similar to this message. +Optional when `ERROR_LIMIT` is declared. Value is either `true` or `false`. The default value is `false`. If the value is `true`, rows with formatting errors are logged internally when running in single row error isolation mode. You can examine formatting errors with the Apache Cloudberry built-in SQL function `gp_read_error_log('')`. If formatting errors are detected when loading data, `gpload` generates a warning message with the name of the table that contains the error information similar to this message. ```shell |WARN|1 bad row, please use GPDB built-in function gp_read_error_log('table-name') to access the detailed error row ``` -If `LOG_ERRORS: true` is specified, `REUSE_TABLES: true` must be specified to retain the formatting errors in Cloudberry Database error logs. If `REUSE_TABLES: true` is not specified, the error information is deleted after the `gpload` operation. Only summary information about formatting errors is returned. You can delete the formatting errors from the error logs with the Cloudberry Database function `gp_truncate_error_log()`. +If `LOG_ERRORS: true` is specified, `REUSE_TABLES: true` must be specified to retain the formatting errors in Apache Cloudberry error logs. If `REUSE_TABLES: true` is not specified, the error information is deleted after the `gpload` operation. Only summary information about formatting errors is returned. You can delete the formatting errors from the error logs with the Apache Cloudberry function `gp_truncate_error_log()`. > **Note** When `gpfdist` reads data and encounters a data formatting error, the error message includes a row number indicating the location of the formatting error. `gpfdist` attempts to capture the row that contains the error. However, `gpfdist` might not capture the exact row for some formatting errors. @@ -341,7 +341,7 @@ If `LOG_ERRORS: true` is specified, `REUSE_TABLES: true` must be specified to re Optional. Defines the schema of the external table database objects created by `gpload`. -The default is to use the Cloudberry Database `search_path`. +The default is to use the Apache Cloudberry `search_path`. :SCHEMA @@ -407,7 +407,7 @@ Optional. If set to true, `gpload` will remove all rows in the target table prio Optional. If set to true, `gpload` will not drop the external table objects and staging table objects it creates. These objects will be reused for future load operations that use the same load specifications. This improves performance of trickle loads (ongoing small loads to the same target table). -If `LOG_ERRORS: true` is specified, `REUSE_TABLES: true` must be specified to retain the formatting errors in Cloudberry Database error logs. If `REUSE_TABLES: true` is not specified, formatting error information is deleted after the `gpload` operation. +If `LOG_ERRORS: true` is specified, `REUSE_TABLES: true` must be specified to retain the formatting errors in Apache Cloudberry error logs. If `REUSE_TABLES: true` is not specified, formatting error information is deleted after the `gpload` operation. If the `` exists, the utility uses the existing table. The utility returns an error if the table schema does not match the `OUTPUT` table schema. diff --git a/docs/sys-utilities/gplogfilter.md b/docs/sys-utilities/gplogfilter.md index faf95fc6cb..11574d4d66 100644 --- a/docs/sys-utilities/gplogfilter.md +++ b/docs/sys-utilities/gplogfilter.md @@ -4,7 +4,7 @@ title: gplogfilter # gplogfilter -Searches through Cloudberry Database log files for specified entries. +Searches through Apache Cloudberry log files for specified entries. ## Synopsis @@ -19,13 +19,13 @@ gplogfilter --version ## Description -The `gplogfilter` utility can be used to search through a Cloudberry Database log file for entries matching the specified criteria. If an input file is not supplied, then `gplogfilter` will use the `$COORDINATOR_DATA_DIRECTORY` environment variable to locate the Cloudberry coordinator log file in the standard logging location. To read from standard input, use a dash (`-`) as the input file name. Input files may be compressed using `gzip`. In an input file, a log entry is identified by its timestamp in `YYYY-MM-DD [hh:mm[:ss]]` format. +The `gplogfilter` utility can be used to search through a Apache Cloudberry log file for entries matching the specified criteria. If an input file is not supplied, then `gplogfilter` will use the `$COORDINATOR_DATA_DIRECTORY` environment variable to locate the Cloudberry coordinator log file in the standard logging location. To read from standard input, use a dash (`-`) as the input file name. Input files may be compressed using `gzip`. In an input file, a log entry is identified by its timestamp in `YYYY-MM-DD [hh:mm[:ss]]` format. You can also use `gplogfilter` to search through all segment log files at once by running it through the [gpssh](/docs/sys-utilities/gpssh.md) utility. For example, to display the last three lines of each segment log file: ```shell gpssh -f seg_host_file -=> source /usr/local/cloudberry-db/greenplum_path.sh +=> source /usr/local/cloudberry/greenplum_path.sh => gplogfilter -n 3 /gpdata/*/log/gpdb*.csv ``` @@ -115,7 +115,7 @@ If the output file already exists, appends to the file instead of overwriting it **`input_file`** -The name of the input log file(s) to search through. If an input file is not supplied, `gplogfilter` will use the `$COORDINATOR_DATA_DIRECTORY` environment variable to locate the Cloudberry Database coordinator log file. To read from standard input, use a dash (`-`) as the input file name. +The name of the input log file(s) to search through. If an input file is not supplied, `gplogfilter` will use the `$COORDINATOR_DATA_DIRECTORY` environment variable to locate the Apache Cloudberry coordinator log file. To read from standard input, use a dash (`-`) as the input file name. **`-u | --unzip`** @@ -153,7 +153,7 @@ Using [gpssh](/docs/sys-utilities/gpssh.md), run `gplogfilter` on the segment ho ```shell gpssh -f seg_hosts_file -e 'source -/usr/local/cloudberry-db/greenplum_path.sh ; gplogfilter -f +/usr/local/cloudberry/greenplum_path.sh ; gplogfilter -f con6 /gpdata/*/log/gpdb*.csv' > seglog.out ``` diff --git a/docs/sys-utilities/gpmemwatcher.md b/docs/sys-utilities/gpmemwatcher.md index 50eebb27c4..774ca040da 100644 --- a/docs/sys-utilities/gpmemwatcher.md +++ b/docs/sys-utilities/gpmemwatcher.md @@ -4,7 +4,7 @@ title: gpmemwatcher # gpmemwatcher -Tracks the memory usage of each process in a Cloudberry Database cluster. +Tracks the memory usage of each process in a Apache Cloudberry cluster. ## Synopsis @@ -20,9 +20,9 @@ gpmemwatcher -h | --help ## Description -The `gpmemwatcher` utility is a daemon that runs on all servers of a Cloudberry Database cluster. It tracks the memory usage of each process by collecting the output of the `ps` command every 60 seconds. It is a low impact process that only consumes 4 MB of memory. It will generate approximately 30 MB of data over a 24-hour period. +The `gpmemwatcher` utility is a daemon that runs on all servers of a Apache Cloudberry cluster. It tracks the memory usage of each process by collecting the output of the `ps` command every 60 seconds. It is a low impact process that only consumes 4 MB of memory. It will generate approximately 30 MB of data over a 24-hour period. -You may use this utility if Cloudberry Database is reporting `Out of memory` errors and causing segments to go down or queries to fail. You collect the memory usage information of one or multiple servers within the Cloudberry Database cluster with `gpmemwatcher` and then use [gpmemreport](/docs/sys-utilities/gpmemreport.md) to analyze the files collected. +You may use this utility if Apache Cloudberry is reporting `Out of memory` errors and causing segments to go down or queries to fail. You collect the memory usage information of one or multiple servers within the Apache Cloudberry cluster with `gpmemwatcher` and then use [gpmemreport](/docs/sys-utilities/gpmemreport.md) to analyze the files collected. ## Options diff --git a/docs/sys-utilities/gpmovemirrors.md b/docs/sys-utilities/gpmovemirrors.md index 45cbca3c89..73601ca264 100644 --- a/docs/sys-utilities/gpmovemirrors.md +++ b/docs/sys-utilities/gpmovemirrors.md @@ -42,8 +42,8 @@ The coordinator data directory. If not specified, the value set for `$COORDINATO **`--hba-hostnames boolean`** -Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Cloudberry Database. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Cloudberry Database system was initialized. - +Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Apache Cloudberry. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Apache Cloudberry system was initialized. + **`-i move_config_file`** @@ -75,7 +75,7 @@ Displays the online help. ## Examples -Moves mirrors from an existing Cloudberry Database system to a different set of hosts: +Moves mirrors from an existing Apache Cloudberry system to a different set of hosts: ```shell $ gpmovemirrors -i move_config_file diff --git a/docs/sys-utilities/gppkg.md b/docs/sys-utilities/gppkg.md index 088a3ce25d..518ece38e9 100644 --- a/docs/sys-utilities/gppkg.md +++ b/docs/sys-utilities/gppkg.md @@ -4,7 +4,7 @@ title: gppkg # gppkg -Cloudberry Package Manager installs, upgrades, migrates, and removes Cloudberry Database extensions in `.gppkg` format, such as PL/Java, PL/R, PostGIS, and MADlib, along with their dependencies, across an entire cluster. +Cloudberry Package Manager installs, upgrades, migrates, and removes Apache Cloudberry extensions in `.gppkg` format, such as PL/Java, PL/R, PostGIS, and MADlib, along with their dependencies, across an entire cluster. ## Synopsis @@ -20,9 +20,9 @@ gppkg -v | --verbose ## Description -The Cloudberry Package Manager -- `gppkg` -- utility installs Cloudberry Database extensions, along with any dependencies, on all hosts across a cluster. It will also automatically install extensions on new hosts in the case of system expansion and segment recovery. +The Cloudberry Package Manager -- `gppkg` -- utility installs Apache Cloudberry extensions, along with any dependencies, on all hosts across a cluster. It will also automatically install extensions on new hosts in the case of system expansion and segment recovery. -The `gppkg` utility does not require that Cloudberry Database is running in order to install packages. +The `gppkg` utility does not require that Apache Cloudberry is running in order to install packages. Examples of database extensions and packages software that are delivered using the Cloudberry Package Manager: @@ -43,7 +43,7 @@ Install or upgrade the specified package in the cluster. This includes any pre/p **`migrate --source --destination [--pkglibs ] []`** -Migrate all packages from one minor version of Cloudberry Database to another. The option `--source ` specifies the path of the source `$GPHOME`, and the option `--destination ` specifies the path of the destination `$GPHOME`. Additionally, the option `--pkglibs ` allows you to point to a location where you may place newer version packages for the destination Cloudberry version; `gppkg` will upgrade these packages automatically. +Migrate all packages from one minor version of Apache Cloudberry to another. The option `--source ` specifies the path of the source `$GPHOME`, and the option `--destination ` specifies the path of the destination `$GPHOME`. Additionally, the option `--pkglibs ` allows you to point to a location where you may place newer version packages for the destination Cloudberry version; `gppkg` will upgrade these packages automatically. **`query [] [] []`** @@ -68,7 +68,7 @@ Reconcile the package state of the cluster to match the state of the master host **`--cluster_info `** -Use this option when Cloudberry Database is not running. The input file `` contains information about the database cluster. You may generate the file by running the following command: +Use this option when Apache Cloudberry is not running. The input file `` contains information about the database cluster. You may generate the file by running the following command: ```shell psql postgres -Xc 'select dbid, content, role, preferred_role, mode, status, hostname, address, port, datadir from gp_segment_configuration order by content, preferred_role desc;' | head -n-2 | tail -n+3 | tr -d " " > cluster_info @@ -100,7 +100,7 @@ Set the logging level to verbose. ## Examples -Install the Cloudberry Database PL/Java extension: +Install the Apache Cloudberry PL/Java extension: ```shell gppkg install ./pljava-2.0.7-gp7-rhel8_x86_64.gppkg diff --git a/docs/sys-utilities/gprecoverseg.md b/docs/sys-utilities/gprecoverseg.md index 04c2bfb3ea..2c39a97fa4 100644 --- a/docs/sys-utilities/gprecoverseg.md +++ b/docs/sys-utilities/gprecoverseg.md @@ -32,7 +32,7 @@ In a system with mirrors enabled, the `gprecoverseg` utility reactivates a faile During an incremental recovery (the `-F` option is not specified), if `gprecoverseg` detects a segment instance with mirroring deactivated in a system with mirrors activated, the utility reports that mirroring is deactivated for the segment, does not attempt to recover that segment instance, and continues the recovery process. -A segment instance can fail for several reasons, such as a host failure, network failure, or disk failure. When a segment instance fails, its status is marked as `d` (down) in the Cloudberry Database system catalog, and its mirror is activated in `Not in Sync` mode. In order to bring the failed segment instance back into operation again, you must first correct the problem that made it fail in the first place, and then recover the segment instance in Cloudberry Database using `gprecoverseg`. +A segment instance can fail for several reasons, such as a host failure, network failure, or disk failure. When a segment instance fails, its status is marked as `d` (down) in the Apache Cloudberry system catalog, and its mirror is activated in `Not in Sync` mode. In order to bring the failed segment instance back into operation again, you must first correct the problem that made it fail in the first place, and then recover the segment instance in Apache Cloudberry using `gprecoverseg`. Segment recovery using `gprecoverseg` requires that you have an active mirror to recover from. For systems that do not have mirroring enabled, or in the event of a double fault (a primary and mirror pair both down at the same time) — you must take manual steps to recover the failed segment instances and then perform a system restart to bring the segments back online. For example, this command restarts a system. @@ -62,9 +62,9 @@ The `gp_segment_configuration` system catalog table can help you determine your    ORDER BY dbid; ``` -The new recovery segment host must be pre-installed with the Cloudberry Database software and configured exactly the same as the existing segment hosts. A spare data directory location must exist on all currently configured segment hosts and have enough disk space to accommodate the failed segments. +The new recovery segment host must be pre-installed with the Apache Cloudberry software and configured exactly the same as the existing segment hosts. A spare data directory location must exist on all currently configured segment hosts and have enough disk space to accommodate the failed segments. -The recovery process marks the segment as up again in the Cloudberry Database system catalog, and then initiates the resynchronization process to bring the transactional state of the segment up-to-date with the latest changes. The system is online and available during `Not in Sync` mode. +The recovery process marks the segment as up again in the Apache Cloudberry system catalog, and then initiates the resynchronization process to bring the transactional state of the segment up-to-date with the latest changes. The system is online and available during `Not in Sync` mode. ## Options @@ -103,9 +103,9 @@ Optional. Perform a differential copy of the active segment instance in order to **`--hba-hostnames boolean`** -Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Cloudberry Database. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Cloudberry Database system was initialized. +Optional. Controls whether this utility uses IP addresses or host names in the `pg_hba.conf` file when updating this file with addresses that can connect to Apache Cloudberry. When set to 0 -- the default value -- this utility uses IP addresses when updating this file. When set to 1, this utility uses host names when updating this file. For consistency, use the same value that was specified for `HBA_HOSTNAMES` when the Apache Cloudberry system was initialized. - + **`-i recover_config_file`** @@ -169,9 +169,9 @@ $ gprecoverseg -o /home/gpadmin/recover_config_file **`-p new_recover_host[,...]`** -Specifies a new host outside of the currently configured Cloudberry Database array on which to recover invalid segments. +Specifies a new host outside of the currently configured Apache Cloudberry array on which to recover invalid segments. -The new host must have the Cloudberry Database software installed and configured, and have the same hardware and OS configuration as the current segment hosts (same OS version, locales, `gpadmin` user account, data directory locations created, ssh keys exchanged, number of network interfaces, network interface naming convention, and so on). Specifically, the Cloudberry Database binaries must be installed, the new host must be able to connect password-less with all segments including the Cloudberry coordinator, and any other Cloudberry Database specific OS configuration parameters must be applied. +The new host must have the Apache Cloudberry software installed and configured, and have the same hardware and OS configuration as the current segment hosts (same OS version, locales, `gpadmin` user account, data directory locations created, ssh keys exchanged, number of network interfaces, network interface naming convention, and so on). Specifically, the Apache Cloudberry binaries must be installed, the new host must be able to connect password-less with all segments including the Cloudberry coordinator, and any other Apache Cloudberry specific OS configuration parameters must be applied. > **Note** In the case of multiple failed segment hosts, you can specify the hosts to recover with a comma-separated list. However, it is strongly recommended to recover one host at a time. If you must recover more than one host at a time, then it is critical to ensure that a double fault scenario does not occur, in which both the segment primary and corresponding mirror are offline. diff --git a/docs/sys-utilities/gpreload.md b/docs/sys-utilities/gpreload.md index 840fc8798f..2bc8d861e8 100644 --- a/docs/sys-utilities/gpreload.md +++ b/docs/sys-utilities/gpreload.md @@ -4,7 +4,7 @@ title: gpreload # gpreload -Reloads Cloudberry Database table data sorting the data based on specified columns. +Reloads Apache Cloudberry table data sorting the data based on specified columns. ## Synopsis @@ -53,11 +53,11 @@ The database that contains the tables to be reloaded. The `gpreload` utility con **`-p port`** -The Cloudberry Database coordinator port. If not specified, the value of the `PGPORT` environment variable is used. If the value is not available, an error is returned. +The Apache Cloudberry coordinator port. If not specified, the value of the `PGPORT` environment variable is used. If the value is not available, an error is returned. **`{-t | --table-file } path_to_file`** -The location and name of file containing list of schema qualified table names to reload and the column names to reorder from the Cloudberry Database. Only user defined tables are supported. Views or system catalog tables are not supported. +The location and name of file containing list of schema qualified table names to reload and the column names to reorder from the Apache Cloudberry. Only user defined tables are supported. Views or system catalog tables are not supported. If indexes are defined on table listed in the file, `gpreload` prompts to continue. diff --git a/docs/sys-utilities/gprestore.md b/docs/sys-utilities/gprestore.md index c2b56bd04c..7389e87250 100644 --- a/docs/sys-utilities/gprestore.md +++ b/docs/sys-utilities/gprestore.md @@ -4,7 +4,7 @@ title: gprestore # gprestore -Restore a Cloudberry Database backup that was created using the `gpbackup` utility. By default `gprestore` uses backed up metadata files and DDL files located in the Cloudberry Database master host data directory, with table data stored locally on segment hosts in CSV data files. +Restore a Apache Cloudberry backup that was created using the `gpbackup` utility. By default `gprestore` uses backed up metadata files and DDL files located in the Apache Cloudberry master host data directory, with table data stored locally on segment hosts in CSV data files. ## Synopsis @@ -53,7 +53,7 @@ When restoring from a backup set, `gprestore` restores to a database with the sa When restoring a backup set that contains data from some leaf partitions of a partitioned tables, the partitioned table is restored along with the data for the leaf partitions. For example, you create a backup with the `gpbackup` option `--include-table-file` and the text file lists some leaf partitions of a partitioned table. Restoring the backup creates the partitioned table and restores the data only for the leaf partitions listed in the file. -By default, only database objects in the backup set are restored. Cloudberry Database system objects are automatically included in a `gpbackup` backup set, but these objects are only restored if you include the `--with-globals` option to `gprestore`. +By default, only database objects in the backup set are restored. Apache Cloudberry system objects are automatically included in a `gpbackup` backup set, but these objects are only restored if you include the `--with-globals` option to `gprestore`. During a restore operation, automatic updating of table statistics is deactivated for the tables being restored. If you backed up query plan statistics using the `--with-stats` option, you can restore those statistics by providing `--with-stats` to `gprestore`. If you did not use `--with-stats` during a backup, or you want to collect current statistics during the restore operation, you can use the `--run-analyze` option to run `ANALYZE` on the restored tables. @@ -65,17 +65,17 @@ When a restore operation completes, `gprestore` returns a status code. `gprestore` can send status email notifications after a back up operation completes. You specify when the utility sends the mail and the email recipients in a configuration file. -Note: This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Cloudberry Database deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. +Note: This utility uses secure shell (SSH) connections between systems to perform its tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, this utility may exceed the host's maximum threshold for unauthenticated connections. Consider updating the SSH `MaxStartups` and `MaxSessions` configuration parameters to increase this threshold. For more information about SSH configuration options, refer to the SSH documentation for your Linux distribution. ## Options **--timestamp YYYYMMDDHHMMSS** -Required. Specifies the timestamp of the `gpbackup` backup set to restore. By default `gprestore` tries to locate metadata files for the timestamp on the Cloudberry Database master host in the $MASTER_DATA_DIRECTORY/backups/YYYYMMDD/YYYYMMDDhhmmss/ directory, and CSV data files in the `/backups/YYYYMMDD/YYYYMMDDhhmmss/` directory of each segment host. +Required. Specifies the timestamp of the `gpbackup` backup set to restore. By default `gprestore` tries to locate metadata files for the timestamp on the Apache Cloudberry master host in the $MASTER_DATA_DIRECTORY/backups/YYYYMMDD/YYYYMMDDhhmmss/ directory, and CSV data files in the `/backups/YYYYMMDD/YYYYMMDDhhmmss/` directory of each segment host. **--backup-dir directory** -Optional. Sources all backup files (metadata files and data files) from the specified directory. You must specify directory as an absolute path (not relative). If you do not supply this option, `gprestore` tries to locate metadata files for the timestamp on the Cloudberry Database master host in the $MASTER_DATA_DIRECTORY/backups/YYYYMMDD/YYYYMMDDhhmmss/ directory. CSV data files must be available on each segment in the `/backups/YYYYMMDD/YYYYMMDDhhmmss/` directory. Include this option when you specify a custom backup directory with `gpbackup`. +Optional. Sources all backup files (metadata files and data files) from the specified directory. You must specify directory as an absolute path (not relative). If you do not supply this option, `gprestore` tries to locate metadata files for the timestamp on the Apache Cloudberry master host in the $MASTER_DATA_DIRECTORY/backups/YYYYMMDD/YYYYMMDDhhmmss/ directory. CSV data files must be available on each segment in the `/backups/YYYYMMDD/YYYYMMDDhhmmss/` directory. Include this option when you specify a custom backup directory with `gpbackup`. You cannot combine this option with the option `--plugin-config`. @@ -155,7 +155,7 @@ If you specify this option, the utility does not automatically restore dependent For a materialized view, the data is not restored. To populate the materialized view with data, you must use `REFRESH MATERIALIZED VIEW` and the tables that are referenced by the materialized view definition must be available. -If you use the `--include-table-file` option, `gprestore` does not create roles or set the owner of the tables. The utility restores table indexes and rules. Triggers are also restored but are not supported in Cloudberry Database. +If you use the `--include-table-file` option, `gprestore` does not create roles or set the owner of the tables. The utility restores table indexes and rules. Triggers are also restored but are not supported in Apache Cloudberry. **--incremental (Beta)** @@ -203,7 +203,7 @@ Optional. Specifies the number of parallel connections to use when restoring tab **--metadata-only** -Optional. Creates database tables from a backup created with the `gpbackup` utility, but does not restore the table data. This option assumes the tables do not exist in the target database. To create a specific set of tables from a backup set, you can specify an option to include tables or schemas or exclude tables or schemas. Specify the option `--with-globals` to restore the Cloudberry Database system objects. +Optional. Creates database tables from a backup created with the `gpbackup` utility, but does not restore the table data. This option assumes the tables do not exist in the target database. To create a specific set of tables from a backup set, you can specify an option to include tables or schemas or exclude tables or schemas. Specify the option `--with-globals` to restore the Apache Cloudberry system objects. The backup set must contain the DDL for tables to be restored. For example, a backup created with the `gpbackup` option `--data-only` does not contain the DDL for tables. @@ -238,7 +238,7 @@ Optional. Displays verbose log messages during a restore operation.--versionOpti **--with-globals** -Optional. Restores Cloudberry Database system objects in the backup set, in addition to database objects. +Optional. Restores Apache Cloudberry system objects in the backup set, in addition to database objects. **--with-stats** @@ -282,7 +282,7 @@ $ createdb demo2 $ gprestore --timestamp 20171103152558 --redirect-db demo2 ``` -Restore global Cloudberry Database metadata and query plan statistics in addition to the database objects: +Restore global Apache Cloudberry metadata and query plan statistics in addition to the database objects: ``` $ gprestore --timestamp 20171103152558 --create-db --with-globals --with-stats diff --git a/docs/sys-utilities/gpshrink.md b/docs/sys-utilities/gpshrink.md index 62480294e7..17c31dff2e 100644 --- a/docs/sys-utilities/gpshrink.md +++ b/docs/sys-utilities/gpshrink.md @@ -4,7 +4,7 @@ title: gpshrink # gpshrink -Cloudberry Database scales in clusters using the `gpshrink` system tool. When cluster resources are idle, such as disk space usage consistently below 20% or low CPU and memory usage, `gpshrink` can be used to reduce the size of the cluster, saving server resources. Users can remove segments from redundant servers with the `gpshrink` tool to scale in the cluster. +Apache Cloudberry scales in clusters using the `gpshrink` system tool. When cluster resources are idle, such as disk space usage consistently below 20% or low CPU and memory usage, `gpshrink` can be used to reduce the size of the cluster, saving server resources. Users can remove segments from redundant servers with the `gpshrink` tool to scale in the cluster. The gpshrink tool operates in two phases: @@ -44,8 +44,8 @@ The gpshrink tool operates in two phases: Example format for deleting one segment: ```bash - i-thd001y0|i-thd001y0|7004|/home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2|4|2|p - i-thd001y0|i-thd001y0|7007|/home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2|7|2|m + i-thd001y0|i-thd001y0|7004|/home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2|4|2|p + i-thd001y0|i-thd001y0|7007|/home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2|7|2|m ``` 4. Run the `gpshrink` Command Twice: diff --git a/docs/sys-utilities/gpssh-exkeys.md b/docs/sys-utilities/gpssh-exkeys.md index 6eebfd8f3f..4acc031247 100644 --- a/docs/sys-utilities/gpssh-exkeys.md +++ b/docs/sys-utilities/gpssh-exkeys.md @@ -20,15 +20,15 @@ gpssh-exkeys --version ## Description -The `gpssh-exkeys` utility exchanges SSH keys between the specified host names (or host addresses). This allows SSH connections between Cloudberry hosts and network interfaces without a password prompt. The utility is used to initially prepare a Cloudberry Database system for passwordless SSH access, and also to prepare additional hosts for passwordless SSH access when expanding a Cloudberry Database system. +The `gpssh-exkeys` utility exchanges SSH keys between the specified host names (or host addresses). This allows SSH connections between Cloudberry hosts and network interfaces without a password prompt. The utility is used to initially prepare a Apache Cloudberry system for passwordless SSH access, and also to prepare additional hosts for passwordless SSH access when expanding a Apache Cloudberry system. -Keys are exchanged as the currently logged in user. You run the utility on the coordinator host as the `gpadmin` user (the user designated to own your Cloudberry Database installation). Cloudberry Database management utilities require that the `gpadmin` user be created on all hosts in the Cloudberry Database system, and the utilities must be able to connect as that user to all hosts without a password prompt. +Keys are exchanged as the currently logged in user. You run the utility on the coordinator host as the `gpadmin` user (the user designated to own your Apache Cloudberry installation). Apache Cloudberry management utilities require that the `gpadmin` user be created on all hosts in the Apache Cloudberry system, and the utilities must be able to connect as that user to all hosts without a password prompt. You can also use `gpssh-exkeys` to enable passwordless SSH for additional users, `root`, for example. The `gpssh-exkeys` utility has the following prerequisites: -- The user must have an account on the coordinator, standby, and every segment host in the Cloudberry Database cluster. +- The user must have an account on the coordinator, standby, and every segment host in the Apache Cloudberry cluster. - The user must have an `id_rsa` SSH key pair installed on the coordinator host. - The user must be able to connect with SSH from the coordinator host to every other host machine without entering a password. (This is called "1-*n* passwordless SSH.") @@ -36,7 +36,7 @@ You can enable 1-*n* passwordless SSH using the `ssh-copy-id` command to add the To specify the hosts involved in an SSH key exchange, use the `-f` option to specify a file containing a list of host names (recommended), or use the `-h` option to name single host names on the command-line. At least one host name (`-h`) or a host file (`-f`) is required. Note that the local host is included in the key exchange by default. -To specify new expansion hosts to be added to an existing Cloudberry Database system, use the `-e` and `-x` options. The `-e` option specifies a file containing a list of existing hosts in the system that have already exchanged SSH keys. The `-x` option specifies a file containing a list of new hosts that need to participate in the SSH key exchange. +To specify new expansion hosts to be added to an existing Apache Cloudberry system, use the `-e` and `-x` options. The `-e` option specifies a file containing a list of existing hosts in the system that have already exchanged SSH keys. The `-x` option specifies a file containing a list of new hosts that need to participate in the SSH key exchange. The `gpssh-exkeys` utility performs key exchange using the following steps: diff --git a/docs/sys-utilities/gpssh.md b/docs/sys-utilities/gpssh.md index 8c37ee6a6e..43a01d620c 100644 --- a/docs/sys-utilities/gpssh.md +++ b/docs/sys-utilities/gpssh.md @@ -105,7 +105,7 @@ A decimal number greater than 0 (zero) that is the multiplier for the timeout th **`sync_retries = attempts`** -A non-negative integer that specifies the maximum number of times that `gpssh` attempts to connect to a remote Cloudberry Database host. The default is 3. If the value is 0, `gpssh` returns an error if the initial connection attempt fails. Increasing the number of attempts also increases the time between retry attempts. This parameter cannot be configured with a command-line option. +A non-negative integer that specifies the maximum number of times that `gpssh` attempts to connect to a remote Apache Cloudberry host. The default is 3. If the value is 0, `gpssh` returns an error if the initial connection attempt fails. Increasing the number of attempts also increases the time between retry attempts. This parameter cannot be configured with a command-line option. The `-t` option also affects the time between retry attempts. diff --git a/docs/sys-utilities/gpstart.md b/docs/sys-utilities/gpstart.md index 37c01d878d..7e8c58ac7a 100644 --- a/docs/sys-utilities/gpstart.md +++ b/docs/sys-utilities/gpstart.md @@ -4,7 +4,7 @@ title: gpstart # gpstart -Starts a Cloudberry Database system. +Starts a Apache Cloudberry system. ## Synopsis @@ -21,15 +21,15 @@ gpstart --version ## Description -The `gpstart` utility is used to start the Cloudberry Database server processes. When you start a Cloudberry Database system, you are actually starting several `postgres` database server listener processes at once (the coordinator and all of the segment instances). The `gpstart` utility handles the startup of the individual instances. Each instance is started in parallel. +The `gpstart` utility is used to start the Apache Cloudberry server processes. When you start a Apache Cloudberry system, you are actually starting several `postgres` database server listener processes at once (the coordinator and all of the segment instances). The `gpstart` utility handles the startup of the individual instances. Each instance is started in parallel. -As part of the startup process, the utility checks the consistency of heap checksum setting among the Cloudberry Database coordinator and segment instances, either enabled or deactivated on all instances. If the heap checksum setting is different among the instances, an error is returned and Cloudberry Database does not start. The validation can be deactivated by specifying the option `--skip-heap-checksum-validation`. +As part of the startup process, the utility checks the consistency of heap checksum setting among the Apache Cloudberry coordinator and segment instances, either enabled or deactivated on all instances. If the heap checksum setting is different among the instances, an error is returned and Apache Cloudberry does not start. The validation can be deactivated by specifying the option `--skip-heap-checksum-validation`. -> **Note** Before you can start a Cloudberry Database system, you must have initialized the system using `gpinitsystem`. Enabling or deactivating heap checksums is set when you initialize the system and cannot be changed after initialization. +> **Note** Before you can start a Apache Cloudberry system, you must have initialized the system using `gpinitsystem`. Enabling or deactivating heap checksums is set when you initialize the system and cannot be changed after initialization. -If the Cloudberry Database system is configured with a standby coordinator, and `gpstart` does not detect it during startup, `gpstart` displays a warning and lets you cancel the startup operation. +If the Apache Cloudberry system is configured with a standby coordinator, and `gpstart` does not detect it during startup, `gpstart` displays a warning and lets you cancel the startup operation. - If the `-a` option (deactivate interactive mode prompts) is not specified, `gpstart` displays and logs these messages: @@ -85,13 +85,13 @@ Run in quiet mode. Command output is not displayed on the screen, but is still w **`-R`** -Starts Cloudberry Database in restricted mode (only database superusers are allowed to connect). +Starts Apache Cloudberry in restricted mode (only database superusers are allowed to connect). **`--skip-heap-checksum-validation`** -During startup, the utility does not validate the consistency of the heap checksum setting among the Cloudberry Database coordinator and segment instances. The default is to ensure that the heap checksum setting is the same on all instances, either enabled or deactivated. +During startup, the utility does not validate the consistency of the heap checksum setting among the Apache Cloudberry coordinator and segment instances. The default is to ensure that the heap checksum setting is the same on all instances, either enabled or deactivated. -> **Caution** Starting Cloudberry Database without this validation could lead to data loss. Use this option to start Cloudberry Database only when it is necessary to ignore the heap checksum verification errors to recover data or to troubleshoot the errors. +> **Caution** Starting Apache Cloudberry without this validation could lead to data loss. Use this option to start Apache Cloudberry only when it is necessary to ignore the heap checksum verification errors to recover data or to troubleshoot the errors. **`-t timeout_seconds`** @@ -115,13 +115,13 @@ Displays the version of this utility. ## Examples -Start a Cloudberry Database system: +Start a Apache Cloudberry system: ```shell gpstart ``` -Start a Cloudberry Database system in restricted mode (only allow superuser connections): +Start a Apache Cloudberry system in restricted mode (only allow superuser connections): ```shell gpstart -R diff --git a/docs/sys-utilities/gpstate.md b/docs/sys-utilities/gpstate.md index b35b9ec8bb..7aa6c537e3 100644 --- a/docs/sys-utilities/gpstate.md +++ b/docs/sys-utilities/gpstate.md @@ -4,7 +4,7 @@ title: gpstate # gpstate -Shows the status of a running Cloudberry Database system. +Shows the status of a running Apache Cloudberry system. ## Synopsis @@ -18,7 +18,7 @@ gpstate -? | -h | --help ## Description -The `gpstate` utility displays information about a running Cloudberry Database instance. There is additional information you may want to know about a Cloudberry Database system, since it is comprised of multiple PostgreSQL database instances (segments) spanning multiple machines. The `gpstate` utility provides additional status information for a Cloudberry Database system, such as: +The `gpstate` utility displays information about a running Apache Cloudberry instance. There is additional information you may want to know about a Apache Cloudberry system, since it is comprised of multiple PostgreSQL database instances (segments) spanning multiple machines. The `gpstate` utility provides additional status information for a Apache Cloudberry system, such as: - Which segments are down. - Coordinator and segment configuration information (hosts, data directories, etc.). @@ -29,7 +29,7 @@ The `gpstate` utility displays information about a running Cloudberry Database i **`-b (brief status)`** -Optional. Display a brief summary of the state of the Cloudberry Database system. This is the default option. +Optional. Display a brief summary of the state of the Apache Cloudberry system. This is the default option. **`-B parallel_processes`** @@ -59,9 +59,9 @@ Show details on primary/mirror segment pairs that have potential issues. These i **`-f (show standby coordinator details)`** Display details of the standby coordinator host if configured. -**`-i (show Cloudberry Database version)`** +**`-i (show Apache Cloudberry version)`** -Display the Cloudberry Database software version information for each instance. +Display the Apache Cloudberry software version information for each instance. **`-l logfile_directory`** @@ -73,7 +73,7 @@ Optional. List the mirror segment instances in the system and their current role **`-p (show ports)`** -List the port numbers used throughout the Cloudberry Database system. +List the port numbers used throughout the Apache Cloudberry system. **`-q (no screen output)`** @@ -85,7 +85,7 @@ Optional. Checks segment status in the system catalog on the coordinator host. D **`-s (detailed status)`** -Optional. Displays detailed status information about the Cloudberry Database system. +Optional. Displays detailed status information about the Apache Cloudberry system. **`-v (verbose output)`** @@ -93,7 +93,7 @@ Optional. Displays error messages and outputs detailed status and progress infor **`-x (expand)`** -Optional. Displays detailed information about the progress and state of a Cloudberry Database system expansion. +Optional. Displays detailed information about the progress and state of a Apache Cloudberry system expansion. **`-? | -h | --help (help)`** @@ -110,11 +110,11 @@ The following output fields are reported by `gpstate -s` for the coordinator: |Coordinator data directory|file system location of the coordinator data directory| |Coordinator port|port of the coordinator `postgres` database listener process| |Coordinator current role|dispatch = regular operating mode

utility = maintenance mode| -|CloudberryDB array configuration type|Standard = one NIC per host

Multi-Home = multiple NICs per host| -|CloudberryDB initsystem version|version of Cloudberry Database when system was first initialized| -|CloudberryDB current version|current version of Cloudberry Database| -|Postgres version|version of PostgreSQL that Cloudberry Database is based on| -|CloudberryDB mirroring status|physical mirroring or none| +|Cloudberry array configuration type|Standard = one NIC per host

Multi-Home = multiple NICs per host| +|Cloudberry initsystem version|version of Apache Cloudberry when system was first initialized| +|Cloudberry current version|current version of Apache Cloudberry| +|Postgres version|version of PostgreSQL that Apache Cloudberry is based on| +|Cloudberry mirroring status|physical mirroring or none| |Coordinator standby|host name of the standby coordinator| |Standby coordinator state|status of the standby coordinator: active or passive| @@ -133,7 +133,7 @@ The following output fields are reported by `gpstate -s` for each primary segmen |Bytes remaining to send to mirror|Bytes remaining to be sent from primary to mirror| |Active PID|active process ID of a segment| |Configuration reports status as|segment status as reported in the system catalog: Up or Down| -|Database status|status of Cloudberry Database to incoming requests: Up, Down, or Suspended. A Suspended state means database activity is temporarily paused while a segment transitions from one state to another.| +|Database status|status of Apache Cloudberry to incoming requests: Up, Down, or Suspended. A Suspended state means database activity is temporarily paused while a segment transitions from one state to another.| The following output fields are reported by `gpstate -s` for each mirror segment: @@ -153,7 +153,7 @@ The following output fields are reported by `gpstate -s` for each mirror segment |Bytes received but remain to replay|Difference between replay log location and sent log location| |Active PID|active process ID of a segment| |Configuration reports status as|segment status as reported in the system catalog: Up or Down| -|Database status|status of Cloudberry Database to incoming requests: Up, Down, or Suspended. A Suspended state means database activity is temporarily paused while a segment transitions from one state to another.| +|Database status|status of Apache Cloudberry to incoming requests: Up, Down, or Suspended. A Suspended state means database activity is temporarily paused while a segment transitions from one state to another.| :::info When there is no connection between a primary segment and its mirror, `gpstate -s` displays `Unknown` in the following fields: @@ -183,7 +183,7 @@ The following output fields are reported by `gpstate -f` for standby coordinator ## Examples -Show detailed status information of a Cloudberry Database system: +Show detailed status information of a Apache Cloudberry system: ```shell gpstate -s @@ -207,7 +207,7 @@ Show information about the standby coordinator configuration: gpstate -f ``` -Display the Cloudberry Database software version information: +Display the Apache Cloudberry software version information: ```shell gpstate -i diff --git a/docs/sys-utilities/gpstop.md b/docs/sys-utilities/gpstop.md index 772507d63e..e1cb32b00e 100644 --- a/docs/sys-utilities/gpstop.md +++ b/docs/sys-utilities/gpstop.md @@ -4,7 +4,7 @@ title: gpstop # gpstop -Stops or restarts a Cloudberry Database system. +Stops or restarts a Apache Cloudberry system. ## Synopsis @@ -27,7 +27,7 @@ gpstop -? | -h | --help ## Description -The `gpstop` utility is used to stop the database servers that comprise a Cloudberry Database system. When you stop a Cloudberry Database system, you are actually stopping several `postgres` database server processes at once (the coordinator and all of the segment instances). The `gpstop` utility handles the shutdown of the individual instances. Each instance is shutdown in parallel. +The `gpstop` utility is used to stop the database servers that comprise a Apache Cloudberry system. When you stop a Apache Cloudberry system, you are actually stopping several `postgres` database server processes at once (the coordinator and all of the segment instances). The `gpstop` utility handles the shutdown of the individual instances. Each instance is shutdown in parallel. The default shutdown mode (`-M smart`) waits for current client connections to finish before completing the shutdown. If any connections remain open after the timeout period, or if you interrupt with CTRL-C, `gpstop` lists the open connections and prompts whether to continue waiting for connections to finish, or to perform a fast or immediate shutdown. The default timeout period is 120 seconds and can be changed with the `-t timeout_seconds` option. @@ -51,7 +51,7 @@ Optional. The coordinator host data directory. You are expected to specify the d **`--host host_name`** -The utility shuts down the Cloudberry Database segment instances on the specified host to allow maintenance on the host. Each primary segment instance on the host is shut down and the associated mirror segment instance is promoted to a primary segment if the mirror segment is on another host. Mirror segment instances on the host are shut down. +The utility shuts down the Apache Cloudberry segment instances on the specified host to allow maintenance on the host. Each primary segment instance on the host is shut down and the associated mirror segment instance is promoted to a primary segment if the mirror segment is on another host. Mirror segment instances on the host are shut down. The segment instances are not shut down and the utility returns an error in these cases: @@ -71,7 +71,7 @@ The directory to write the log file. Defaults to `~/gpAdminLogs`. **`-m`** -Optional. Shuts down a Cloudberry Database coordinator instance that was started in maintenance mode. +Optional. Shuts down a Apache Cloudberry coordinator instance that was started in maintenance mode. **`-M fast`** @@ -97,11 +97,11 @@ Restart after shutdown is complete. **`-t timeout_seconds`** -Specifies a timeout threshold (in seconds) to wait for a segment instance to shutdown. If a segment instance does not shutdown in the specified number of seconds, `gpstop` displays a message indicating that one or more segments are still in the process of shutting down and that you cannot restart Cloudberry Database until the segment instance(s) are stopped. This option is useful in situations where `gpstop` is run and there are very large transactions that need to rollback. These large transactions can take over a minute to rollback and surpass the default timeout period of 120 seconds. +Specifies a timeout threshold (in seconds) to wait for a segment instance to shutdown. If a segment instance does not shutdown in the specified number of seconds, `gpstop` displays a message indicating that one or more segments are still in the process of shutting down and that you cannot restart Apache Cloudberry until the segment instance(s) are stopped. This option is useful in situations where `gpstop` is run and there are very large transactions that need to rollback. These large transactions can take over a minute to rollback and surpass the default timeout period of 120 seconds. **`-u`** -This option reloads the `pg_hba.conf` files of the coordinator and segments and the runtime parameters of the `postgresql.conf` files but does not shutdown the Cloudberry Database array. Use this option to make new configuration settings active after editing `postgresql.conf` or `pg_hba.conf`. Note that this only applies to configuration parameters that are designated as *runtime* parameters. +This option reloads the `pg_hba.conf` files of the coordinator and segments and the runtime parameters of the `postgresql.conf` files but does not shutdown the Apache Cloudberry array. Use this option to make new configuration settings active after editing `postgresql.conf` or `pg_hba.conf`. Note that this only applies to configuration parameters that are designated as *runtime* parameters. **`-v`** @@ -121,13 +121,13 @@ Displays the version of this utility. ## Examples -Stop a Cloudberry Database system in smart mode: +Stop a Apache Cloudberry system in smart mode: ```shell gpstop ``` -Stop a Cloudberry Database system in fast mode: +Stop a Apache Cloudberry system in fast mode: ```shell gpstop -M fast @@ -145,7 +145,7 @@ Stop a coordinator instance that was started in maintenance mode: gpstop -m ``` -Reload the `postgresql.conf` and `pg_hba.conf` files after making configuration changes but do not shutdown the Cloudberry Database array: +Reload the `postgresql.conf` and `pg_hba.conf` files after making configuration changes but do not shutdown the Apache Cloudberry array: ```shell gpstop -u diff --git a/docs/sys-utilities/gpsync.md b/docs/sys-utilities/gpsync.md index 70caca5d54..7852263561 100644 --- a/docs/sys-utilities/gpsync.md +++ b/docs/sys-utilities/gpsync.md @@ -20,7 +20,7 @@ gpsync --version ## Description -The `gpsync` utility allows you to copy one or more files from the specified hosts to other specified hosts in one command using remote sync. For example, you can copy a file from the Cloudberry Database coordinator host to all of the segment hosts at the same time. +The `gpsync` utility allows you to copy one or more files from the specified hosts to other specified hosts in one command using remote sync. For example, you can copy a file from the Apache Cloudberry coordinator host to all of the segment hosts at the same time. To specify the hosts involved in the remote sync session, use the `-f` option to specify a file containing a list of host names, or use the `-h` option to name single host names on the command-line. At least one host name (`-h`) or a host file (`-f`) is required. The `-J` option allows you to specify a single character to substitute for the hostname in the `copy from` and `copy to` destination strings. If `-J` is not specified, the default substitution character is an equal sign (`=`). For example, the following command will copy `.bashrc` from the local host to `/home/gpadmin` on all hosts named in `hostfile_gpssh`: @@ -85,7 +85,7 @@ gpsync -f hostfile_gpssh installer.tar =:/ Copy the file named myfuncs.so to the specified location on the hosts named `sdw1` and `sdw2`: ```shell -gpsync -h sdw1 -h sdw2 myfuncs.so =:/usr/local/cloudberry-db/lib +gpsync -h sdw1 -h sdw2 myfuncs.so =:/usr/local/cloudberry/lib ``` ## See also diff --git a/docs/sys-utilities/index.md b/docs/sys-utilities/index.md index 4712206260..9e03d3b40b 100644 --- a/docs/sys-utilities/index.md +++ b/docs/sys-utilities/index.md @@ -4,13 +4,13 @@ title: Utility Overview # Utility Overview -The command-line utilities provided with Cloudberry Database. +The command-line utilities provided with Apache Cloudberry. -Cloudberry Database uses the standard PostgreSQL client and server programs and provides additional management utilities for administering a distributed Cloudberry Database DBMS. +Apache Cloudberry uses the standard PostgreSQL client and server programs and provides additional management utilities for administering a distributed Apache Cloudberry DBMS. -Several utilities are installed when you install the Cloudberry Database server. These utilities reside in `$GPHOME/bin`. +Several utilities are installed when you install the Apache Cloudberry server. These utilities reside in `$GPHOME/bin`. -Superscripts identify those utilities that require separate downloads, as well as those utilities that are also installed with the Client and Loader Tools Packages. All utilities are installed when you install the Cloudberry Database server, unless specifically identified by a superscript. +Superscripts identify those utilities that require separate downloads, as well as those utilities that are also installed with the Client and Loader Tools Packages. All utilities are installed when you install the Apache Cloudberry server, unless specifically identified by a superscript. For more information about a utility, see the corresponding topic listed in the System Utilities section. @@ -36,19 +36,19 @@ For more information about a utility, see the corresponding topic listed in the ## Reference for Administrator - [analyzedb](./analyzedb.md) - performs `ANALYZE` operations on tables incrementally and concurrently -- [gpaddmirrors](./gpaddmirrors.md) - add mirror segments to a Cloudberry Database system that was initially configured without mirroring -- [gpbackup](./gpbackup.md) - create a Cloudberry Database backup for use with the gprestore utility -- [gpcheckcat](./gpcheckcat.md) - test Cloudberry Database catalog tables for inconsistencies +- [gpaddmirrors](./gpaddmirrors.md) - add mirror segments to a Apache Cloudberry system that was initially configured without mirroring +- [gpbackup](./gpbackup.md) - create a Apache Cloudberry backup for use with the gprestore utility +- [gpcheckcat](./gpcheckcat.md) - test Apache Cloudberry catalog tables for inconsistencies - [gpcheckperf](./gpcheckperf.md) - verifiy the baseline hardware performance of the specified hosts -- [gpconfig](./gpconfig.md) - set server configuration parameters on all segments within a Cloudberry Database system -- [gpdeletesystem](./gpdeletesystem.md) - delete a Cloudberry Database system that was initialized using gpinitsystem -- [gpdemo](./gpdemo.md) - deploy a small Cloudberry Database cluster with segments on a single node to make a demo -- [gpexpand](./gpexpand.md) - expand an existing Cloudberry Database across new hosts in the system -- [gpfdist](./gpfdist.md) - serve data files to or writes data files out from Cloudberry Database segments -- [gpinitstandby](./gpinitstandby.md) - add and/or initialize a standby coordinator host for a Cloudberry Database system -- [gpinitsystem](./gpinitsystem.md) - initialize a Cloudberry Database system using configuration parameters specified in the gpinitsystem_config file +- [gpconfig](./gpconfig.md) - set server configuration parameters on all segments within a Apache Cloudberry system +- [gpdeletesystem](./gpdeletesystem.md) - delete a Apache Cloudberry system that was initialized using gpinitsystem +- [gpdemo](./gpdemo.md) - deploy a small Apache Cloudberry cluster with segments on a single node to make a demo +- [gpexpand](./gpexpand.md) - expand an existing Apache Cloudberry across new hosts in the system +- [gpfdist](./gpfdist.md) - serve data files to or writes data files out from Apache Cloudberry segments +- [gpinitstandby](./gpinitstandby.md) - add and/or initialize a standby coordinator host for a Apache Cloudberry system +- [gpinitsystem](./gpinitsystem.md) - initialize a Apache Cloudberry system using configuration parameters specified in the gpinitsystem_config file - [gpload](./gpload.md) - run a load job as defined in a YAML formatted control file -- [gplogfilter](./gplogfilter.md) - search through Cloudberry Database log files for specified entries +- [gplogfilter](./gplogfilter.md) - search through Apache Cloudberry log files for specified entries - [gpmemreport](./gpmemreport.md) - interpret the output created by the gpmemwatcher utility and generates output files in a readable format - [gpmemwatcher](./gpmemwatcher.md) - track the memory usage of each process in a database cluster. - [gpmovemirrors](./gpmovemirrors.md) - move mirror segment instances to new locations @@ -59,8 +59,8 @@ For more information about a utility, see the corresponding topic listed in the - [gpshrink](./gpshrink.md) - use `gpshrink` to reduce the size of the cluster, saving server resources - [gpssh-exkeys](./gpssh-exkeys.md) - exchange SSH public keys between hosts - [gpssh](./gpssh.md) - provide SSH access to multiple hosts at once -- [gpstart](./gpstart.md) - start a Cloudberry Database system -- [gpstate](./gpstate.md) - show the status of a running Cloudberry Database system -- [gpstop](./gpstop.md) - stop or restart a Cloudberry Database system +- [gpstart](./gpstart.md) - start a Apache Cloudberry system +- [gpstate](./gpstate.md) - show the status of a running Apache Cloudberry system +- [gpstop](./gpstop.md) - stop or restart a Apache Cloudberry system - [gpsync](./gpsync.md) - copy files between multiple hosts at once - [gpactivatestandby](./gpactivatestandby.md) - activate a standby coordinator host and makes it the active coordinator \ No newline at end of file diff --git a/docs/sys-utilities/pg-checksums.md b/docs/sys-utilities/pg-checksums.md index 1c87871dc6..7859b4341d 100644 --- a/docs/sys-utilities/pg-checksums.md +++ b/docs/sys-utilities/pg-checksums.md @@ -4,7 +4,7 @@ title: pg_checksums # pg_checksums -Enables, disables, or checks data checksums in a Cloudberry Database cluster. +Enables, disables, or checks data checksums in a Apache Cloudberry cluster. ## Synopsis @@ -18,9 +18,9 @@ pg_checksums -V | --version ## Description -`pg_checksums` is a standard PostgreSQL utility that you can use to check, enable, or disable data checksums in a Cloudberry Database cluster. The server must be shut down cleanly before running `pg_checksums`. When verifying checksums, Cloudberry Database returns the exit status zero if there are no checksum errors, and returns nonzero if it detects at least one checksum failure. When enabling or disabling checksums, Cloudberry returns a nonzero exit status if the operation failed. +`pg_checksums` is a standard PostgreSQL utility that you can use to check, enable, or disable data checksums in a Apache Cloudberry cluster. The server must be shut down cleanly before running `pg_checksums`. When verifying checksums, Apache Cloudberry returns the exit status zero if there are no checksum errors, and returns nonzero if it detects at least one checksum failure. When enabling or disabling checksums, Cloudberry returns a nonzero exit status if the operation failed. -When verifying checksums, Cloudberry Database scans every file in the cluster. When enabling checksums, every file in the cluster is rewritten in-place. Disabling checksums only updates the `pg_control` file. +When verifying checksums, Apache Cloudberry scans every file in the cluster. When enabling checksums, every file in the cluster is rewritten in-place. Disabling checksums only updates the `pg_control` file. ## Options diff --git a/docs/sys-utilities/pg-config.md b/docs/sys-utilities/pg-config.md index 257af5b8b9..1d019a3a15 100644 --- a/docs/sys-utilities/pg-config.md +++ b/docs/sys-utilities/pg-config.md @@ -4,7 +4,7 @@ title: pg_config # pg_config -Retrieves information about the installed version of Cloudberry Database. +Retrieves information about the installed version of Apache Cloudberry. ## Synopsis @@ -18,7 +18,7 @@ pg_config --version ## Description -The `pg_config` utility prints configuration parameters of the currently installed version of Cloudberry Database. It is intended, for example, to be used by software packages that want to interface to Cloudberry Database to facilitate finding the required header files and libraries. Note that information printed out by `pg_config` is for the Cloudberry Database coordinator only. +The `pg_config` utility prints configuration parameters of the currently installed version of Apache Cloudberry. It is intended, for example, to be used by software packages that want to interface to Apache Cloudberry to facilitate finding the required header files and libraries. Note that information printed out by `pg_config` is for the Apache Cloudberry coordinator only. If more than one option is given, the information is printed in that order, one item per line. If no options are given, all available information is printed, with labels. @@ -74,47 +74,47 @@ Print the location of extension makefiles. **`--configure`** -Print the options that were given to the configure script when Cloudberry Database was configured for building. +Print the options that were given to the configure script when Apache Cloudberry was configured for building. **`--cc`** -Print the value of the CC variable that was used for building Cloudberry Database. This shows the C compiler used. +Print the value of the CC variable that was used for building Apache Cloudberry. This shows the C compiler used. **`--cppflags`** -Print the value of the `CPPFLAGS` variable that was used for building Cloudberry Database. This shows C compiler switches needed at preprocessing time. +Print the value of the `CPPFLAGS` variable that was used for building Apache Cloudberry. This shows C compiler switches needed at preprocessing time. **`--cflags`** -Print the value of the `CFLAGS` variable that was used for building Cloudberry Database. This shows C compiler switches. +Print the value of the `CFLAGS` variable that was used for building Apache Cloudberry. This shows C compiler switches. **`--cflags_sl`** -Print the value of the `CFLAGS_SL` variable that was used for building Cloudberry Database. This shows extra C compiler switches used for building shared libraries. +Print the value of the `CFLAGS_SL` variable that was used for building Apache Cloudberry. This shows extra C compiler switches used for building shared libraries. **`--ldflags`** -Print the value of the `LDFLAGS` variable that was used for building Cloudberry Database. This shows linker switches. +Print the value of the `LDFLAGS` variable that was used for building Apache Cloudberry. This shows linker switches. **`--ldflags_ex`** -Print the value of the `LDFLAGS_EX` variable that was used for building Cloudberry Database. This shows linker switches that were used for building executables only. +Print the value of the `LDFLAGS_EX` variable that was used for building Apache Cloudberry. This shows linker switches that were used for building executables only. **`--ldflags_sl`** -Print the value of the `LDFLAGS_SL` variable that was used for building Cloudberry Database. This shows linker switches used for building shared libraries only. +Print the value of the `LDFLAGS_SL` variable that was used for building Apache Cloudberry. This shows linker switches used for building shared libraries only. **`--libs`** -Print the value of the `LIBS` variable that was used for building Cloudberry Database. This normally contains `-l` switches for external libraries linked into Cloudberry Database. +Print the value of the `LIBS` variable that was used for building Apache Cloudberry. This normally contains `-l` switches for external libraries linked into Apache Cloudberry. **`--version`** -Print the version of Cloudberry Database. +Print the version of Apache Cloudberry. ## Examples -To reproduce the build configuration of the current Cloudberry Database installation, run the following command: +To reproduce the build configuration of the current Apache Cloudberry installation, run the following command: ```shell eval ./configure 'pg_config --configure' diff --git a/docs/sys-utilities/pg-dump.md b/docs/sys-utilities/pg-dump.md index 2040a2555f..b2012c4054 100644 --- a/docs/sys-utilities/pg-dump.md +++ b/docs/sys-utilities/pg-dump.md @@ -18,14 +18,14 @@ pg_dump -V | --version ## Description -`pg_dump` is a standard PostgreSQL utility for backing up a database, and is also supported in Cloudberry Database. It creates a single (non-parallel) dump file. For routine backups of Cloudberry Database, it is better to use the Cloudberry Database backup utility, gpbackup, for the best performance. +`pg_dump` is a standard PostgreSQL utility for backing up a database, and is also supported in Apache Cloudberry. It creates a single (non-parallel) dump file. For routine backups of Apache Cloudberry, it is better to use the Apache Cloudberry backup utility, gpbackup, for the best performance. -Use `pg_dump` if you are migrating your data to another database vendor's system, or to another Cloudberry Database system with a different segment configuration (for example, if the system you are migrating to has greater or fewer segment instances). To restore, you must use the corresponding [pg_restore](/docs/sys-utilities/pg-restore.md) utility (if the dump file is in archive format), or you can use a client program such as [psql](/docs/sys-utilities/psql.md) (if the dump file is in plain text format). +Use `pg_dump` if you are migrating your data to another database vendor's system, or to another Apache Cloudberry system with a different segment configuration (for example, if the system you are migrating to has greater or fewer segment instances). To restore, you must use the corresponding [pg_restore](/docs/sys-utilities/pg-restore.md) utility (if the dump file is in archive format), or you can use a client program such as [psql](/docs/sys-utilities/psql.md) (if the dump file is in plain text format). -Since `pg_dump` is compatible with regular PostgreSQL, it can be used to migrate data into Cloudberry Database. The `pg_dump` utility in Cloudberry Database is very similar to the PostgreSQL `pg_dump` utility, with the following exceptions and limitations: +Since `pg_dump` is compatible with regular PostgreSQL, it can be used to migrate data into Apache Cloudberry. The `pg_dump` utility in Apache Cloudberry is very similar to the PostgreSQL `pg_dump` utility, with the following exceptions and limitations: -- If using `pg_dump` to backup a Cloudberry Database database, keep in mind that the dump operation can take a long time (several hours) for very large databases. Also, you must make sure you have sufficient disk space to create the dump file. -- If you are migrating data from one Cloudberry Database system to another, use the `--gp-syntax` command-line option to include the `DISTRIBUTED BY` clause in `CREATE TABLE` statements. This ensures that Cloudberry Database table data is distributed with the correct distribution key columns upon restore. +- If using `pg_dump` to backup a Apache Cloudberry database, keep in mind that the dump operation can take a long time (several hours) for very large databases. Also, you must make sure you have sufficient disk space to create the dump file. +- If you are migrating data from one Apache Cloudberry system to another, use the `--gp-syntax` command-line option to include the `DISTRIBUTED BY` clause in `CREATE TABLE` statements. This ensures that Apache Cloudberry table data is distributed with the correct distribution key columns upon restore. `pg_dump` makes consistent backups even if the database is being used concurrently. `pg_dump` does not block other users accessing the database (readers or writers). @@ -48,7 +48,7 @@ This option is similar to, but for historical reasons not identical to, specifyi Include large objects in the dump. This is the default behavior except when `--schema`, `--table`, or `--schema-only` is specified. The `-b` switch is only useful add large objects to dumps where a specific schema or table has been requested. Note that blobs are considered data and therefore will be included when `--data-only` is used, but not when `--schema-only` is. -> **Note** Cloudberry Database does not support the PostgreSQL [large object facility](https://www.postgresql.org/docs/12/largeobjects.html) for streaming user data that is stored in large-object structures. +> **Note** Apache Cloudberry does not support the PostgreSQL [large object facility](https://www.postgresql.org/docs/12/largeobjects.html) for streaming user data that is stored in large-object structures. **`-c | --clean`** @@ -103,7 +103,7 @@ Do not dump any schemas matching the schema pattern. The pattern is interpreted **`-o | --oids`** -Dump object identifiers (OIDs) as part of the data for every table. Use of this option is not recommended for files that are intended to be restored into Cloudberry Database. +Dump object identifiers (OIDs) as part of the data for every table. Use of this option is not recommended for files that are intended to be restored into Apache Cloudberry. **`-O | --no-owner`** @@ -123,7 +123,7 @@ To exclude table data for only a subset of tables in the database, see `--exclud Specify the superuser user name to use when deactivating triggers. This is relevant only if `--disable-triggers` is used. It is better to leave this out, and instead start the resulting script as a superuser. -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`-t
- Test the state of coordinator mirroring on Cloudberry Database. If the + Test the state of coordinator mirroring on Apache Cloudberry. If the value is not "STREAMING", an alert or warning will be raised.

Recommended frequency: run every 5 to 10 minutes

Severity: IMPORTANT

@@ -197,7 +197,7 @@ psql <dbname> -c
- Check disk space usage on volumes used for Cloudberry Database data + Check disk space usage on volumes used for Apache Cloudberry data storage and the OS.

Recommended frequency: every 5 to 30 minutes

Severity: CRITICAL

@@ -258,7 +258,7 @@ psql <dbname> -c

- Run the Cloudberry Database + Run the Apache Cloudberry gpcheckperf utility. @@ -302,7 +302,7 @@ psql <dbname> -c

Severity: IMPORTANT

- Run the Cloudberry Database gpcheckcat utility in each database: + Run the Apache Cloudberry gpcheckcat utility in each database:
gpcheckcat -O -p [target_port]
Note: With the @@ -318,7 +318,7 @@ psql <dbname> -c

Severity: IMPORTANT

- With no users on the system, run the Cloudberry Database gpcheckcat utility in each database: + With no users on the system, run the Apache Cloudberry gpcheckcat utility in each database:
gpcheckcat -R pgclass -p [target_port]
Run the repair scripts for any issues identified. - During a downtime, with no users on the system, run the Cloudberry Database gpcheckcat utility in each database: + During a downtime, with no users on the system, run the Apache Cloudberry gpcheckcat utility in each database:
gpcheckcat -R namespace -p [target_port]
Run the repair scripts for any issues identified. - With no users on the system, run the Cloudberry Database gpcheckcat utility in each database: + With no users on the system, run the Apache Cloudberry gpcheckcat utility in each database:
gpcheckcat -R distribution_policy -p [target_port]
Run the repair scripts for any issues identified. - During a downtime, with no users on the system, run the Cloudberry Database gpcheckcat utility in each database: + During a downtime, with no users on the system, run the Apache Cloudberry gpcheckcat utility in each database:
gpcheckcat -R dependency -p [target_port]
Run the repair scripts for any issues identified.
- Install Cloudberry Database minor releases, for example v1.5.0.x. + Install Apache Cloudberry minor releases, for example v1.5.0.x.

Recommended frequency: quarterly

Severity: IMPORTANT

- Follow upgrade instructions in the Cloudberry Database + Follow upgrade instructions in the Apache Cloudberry Release Notes. Always upgrade to the latest in the series. - Keep the Cloudberry Database software current to incorporate bug fixes, - performance enhancements, and feature enhancements into your Cloudberry Database + Keep the Apache Cloudberry software current to incorporate bug fixes, + performance enhancements, and feature enhancements into your Apache Cloudberry cluster.
| --table=
`** @@ -173,7 +173,7 @@ This option deactivates the use of dollar quoting for function bodies, and force This option is relevant only when creating a data-only dump. It instructs `pg_dump` to include commands to temporarily deactivate triggers on the target tables while the data is reloaded. Use this if you have triggers on the tables that you do not want to invoke during data reload. The commands emitted for `--disable-triggers` must be done as superuser. So, you should also specify a superuser name with `-S`, or preferably be careful to start the resulting script as a superuser. This option is only meaningful for the plain-text format. For the archive formats, you may specify the option when you call [pg_restore](/docs/sys-utilities/pg-restore.md). -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`--exclude-table-and-children=
`** @@ -217,7 +217,7 @@ Do not dump the contents of unlogged tables. This option has no effect on whethe **`--quote-all-identifiers`** -Force quoting of all identifiers. This option is recommended when dumping a database from a server whose Cloudberry Database major version is different from `pg_dump`'s, or when the output is intended to be loaded into a server of a different major version. By default, `pg_dump` quotes only identifiers that are reserved words in its own major version. This sometimes results in compatibility issues when dealing with servers of other versions that may have slightly different sets of reserved words. Using `--quote-all-identifiers` prevents such issues, at the price of a harder-to-read dump script. +Force quoting of all identifiers. This option is recommended when dumping a database from a server whose Apache Cloudberry major version is different from `pg_dump`'s, or when the output is intended to be loaded into a server of a different major version. By default, `pg_dump` quotes only identifiers that are reserved words in its own major version. This sometimes results in compatibility issues when dealing with servers of other versions that may have slightly different sets of reserved words. Using `--quote-all-identifiers` prevents such issues, at the price of a harder-to-read dump script. **`--section=`** @@ -233,7 +233,7 @@ This option is not beneficial for a dump which is intended only for disaster rec This option will make no difference if there are no read-write transactions active when `pg_dump` is started. If read-write transactions are active, the start of the dump may be delayed for an indeterminate length of time. Once running, performance with or without the switch is the same. -> **Note** Because Cloudberry Database does not support serializable transactions, the `--serializable-deferrable` option has no effect in Cloudberry Database. +> **Note** Because Apache Cloudberry does not support serializable transactions, the `--serializable-deferrable` option has no effect in Apache Cloudberry. **`--table-and-children=
`** @@ -245,7 +245,7 @@ Output SQL-standard `SET SESSION AUTHORIZATION` commands instead of `ALTER OWNER **`--gp-syntax | --no-gp-syntax`** -Use `--gp-syntax` to dump Cloudberry Database syntax in the `CREATE TABLE` statements. This allows the distribution policy (`DISTRIBUTED BY` or `DISTRIBUTED RANDOMLY` clauses) of a Cloudberry Database table to be dumped, which is useful for restoring into other Cloudberry Database systems. The default is to include Cloudberry Database syntax when connected to a Cloudberry Database system, and to exclude it when connected to a regular PostgreSQL system. +Use `--gp-syntax` to dump Apache Cloudberry syntax in the `CREATE TABLE` statements. This allows the distribution policy (`DISTRIBUTED BY` or `DISTRIBUTED RANDOMLY` clauses) of a Apache Cloudberry table to be dumped, which is useful for restoring into other Apache Cloudberry systems. The default is to include Apache Cloudberry syntax when connected to a Apache Cloudberry system, and to exclude it when connected to a regular PostgreSQL system. **`--function-oids `** @@ -273,11 +273,11 @@ If this parameter contains an `=` sign or starts with a valid URI prefix (`postg **`-h | --host=`** -The host name of the machine on which the Cloudberry Database coordinator database server is running. If not specified, reads from the environment variable `PGHOST` or defaults to localhost. +The host name of the machine on which the Apache Cloudberry coordinator database server is running. If not specified, reads from the environment variable `PGHOST` or defaults to localhost. **`-p | --port=`** -The TCP port on which the Cloudberry Database coordinator database server is listening for connections. If not specified, reads from the environment variable `PGPORT` or defaults to 5432. +The TCP port on which the Apache Cloudberry coordinator database server is listening for connections. If not specified, reads from the environment variable `PGPORT` or defaults to 5432. **`-U | --username=`** @@ -303,7 +303,7 @@ The dump file produced by `pg_dump` does not contain the statistics used by the The database activity of `pg_dump` is normally collected by the statistics collector. If this is undesirable, you can set parameter `track_counts` to false via `PGOPTIONS` or the `ALTER USER` command. -Because `pg_dump` may be used to transfer data to newer versions of Cloudberry Database, the output of `pg_dump` can be expected to load into Cloudberry Database versions newer than `pg_dump`'s version. `pg_dump` can also dump from Cloudberry Database versions older than its own version. However, `pg_dump` cannot dump from Cloudberry Database versions newer than its own major version; it will refuse to even try, rather than risk making an invalid dump. Also, it is not guaranteed that `pg_dump`'s output can be loaded into a server of an older major version — not even if the dump was taken from a server of that version. Loading a dump file into an older server may require manual editing of the dump file to remove syntax not understood by the older server. Use of the `--quote-all-identifiers` option is recommended in cross-version cases, as it can prevent problems arising from varying reserved-word lists in different Cloudberry Database versions. +Because `pg_dump` may be used to transfer data to newer versions of Apache Cloudberry, the output of `pg_dump` can be expected to load into Apache Cloudberry versions newer than `pg_dump`'s version. `pg_dump` can also dump from Apache Cloudberry versions older than its own version. However, `pg_dump` cannot dump from Apache Cloudberry versions newer than its own major version; it will refuse to even try, rather than risk making an invalid dump. Also, it is not guaranteed that `pg_dump`'s output can be loaded into a server of an older major version — not even if the dump was taken from a server of that version. Loading a dump file into an older server may require manual editing of the dump file to remove syntax not understood by the older server. Use of the `--quote-all-identifiers` option is recommended in cross-version cases, as it can prevent problems arising from varying reserved-word lists in different Apache Cloudberry versions. ## Examples @@ -319,7 +319,7 @@ To reload such a script into a (freshly created) database named `newdb`: psql -d newdb -f db.sql ``` -Dump a Cloudberry Database in tar file format and include distribution policy information: +Dump a Apache Cloudberry in tar file format and include distribution policy information: ```shell pg_dump -Ft --gp-syntax mydb > db.tar diff --git a/docs/sys-utilities/pg-dumpall.md b/docs/sys-utilities/pg-dumpall.md index 734173cb8b..955e821c12 100644 --- a/docs/sys-utilities/pg-dumpall.md +++ b/docs/sys-utilities/pg-dumpall.md @@ -4,7 +4,7 @@ title: pg_dumpall # pg_dumpall -Extracts all databases in a Cloudberry Database system to a single script file or other archive file. +Extracts all databases in a Apache Cloudberry system to a single script file or other archive file. ## Synopsis @@ -18,7 +18,7 @@ pg_dumpall -V | --version ## Description -`pg_dumpall` is a standard PostgreSQL utility for backing up all databases in a Cloudberry Database (or PostgreSQL) instance, and is also supported in Cloudberry Database. It creates a single (non-parallel) dump file. For routine backups of Cloudberry Database it is better to use the Cloudberry Database backup utility, gpbackup, for the best performance. +`pg_dumpall` is a standard PostgreSQL utility for backing up all databases in a Apache Cloudberry (or PostgreSQL) instance, and is also supported in Apache Cloudberry. It creates a single (non-parallel) dump file. For routine backups of Apache Cloudberry it is better to use the Apache Cloudberry backup utility, gpbackup, for the best performance. `pg_dumpall` creates a single script file that contains SQL commands that can be used as input to [psql](/docs/sys-utilities/psql.md) to restore the databases. It does this by calling [pg_dump](/docs/sys-utilities/pg-dump.md) for each database. `pg_dumpall` also dumps global objects that are common to all databases. (`pg_dump` does not save these objects.) This currently includes information about database users and groups, and access permissions that apply to databases as a whole. @@ -26,7 +26,7 @@ Since `pg_dumpall` reads tables from all databases you will most likely have to The SQL script will be written to the standard output. Use the `[-f | --file]` option or shell operators to redirect it into a file. -`pg_dumpall` needs to connect several times to the Cloudberry Database coordinator server (once per database). If you use password authentication it is likely to ask for a password each time. It is convenient to have a `~/.pgpass` file in such cases. +`pg_dumpall` needs to connect several times to the Apache Cloudberry coordinator server (once per database). If you use password authentication it is likely to ask for a password each time. It is convenient to have a `~/.pgpass` file in such cases. ## Options @@ -49,7 +49,7 @@ Dump only global objects (roles and tablespaces), no databases. **`-o | --oids`** -Dump object identifiers (OIDs) as part of the data for every table. Use of this option is not recommended for files that are intended to be restored into Cloudberry Database. +Dump object identifiers (OIDs) as part of the data for every table. Use of this option is not recommended for files that are intended to be restored into Apache Cloudberry. **`-O | --no-owner`** @@ -67,7 +67,7 @@ Dump only the object definitions (schema), not data. Specify the superuser user name to use when deactivating triggers. This is relevant only if `--disable-triggers` is used. It is better to leave this out, and instead start the resulting script as a superuser. -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`-t | --tablespaces-only`** @@ -101,7 +101,7 @@ This option deactivates the use of dollar quoting for function bodies, and force This option is relevant only when creating a data-only dump. It instructs `pg_dumpall` to include commands to temporarily deactivate triggers on the target tables while the data is reloaded. Use this if you have triggers on the tables that you do not want to invoke during data reload. The commands emitted for `--disable-triggers` must be done as superuser. So, you should also specify a superuser name with `-S`, or preferably be careful to start the resulting script as a superuser. -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`--inserts`** @@ -109,7 +109,7 @@ Dump data as `INSERT` commands (rather than `COPY`). This will make restoration **`--lock-wait-timeout=timeout`** -Do not wait forever to acquire shared table locks at the beginning of the dump. Instead, fail if unable to lock a table within the specified timeout. The timeout may be specified in any of the formats accepted by `SET statement_timeout`. Allowed values vary depending on the server version you are dumping from, but an integer number of milliseconds is accepted by all Cloudberry Database versions. +Do not wait forever to acquire shared table locks at the beginning of the dump. Instead, fail if unable to lock a table within the specified timeout. The timeout may be specified in any of the formats accepted by `SET statement_timeout`. Allowed values vary depending on the server version you are dumping from, but an integer number of milliseconds is accepted by all Apache Cloudberry versions. **`--no-security-labels`** @@ -141,7 +141,7 @@ Output SQL-standard `SET SESSION AUTHORIZATION` commands instead of `ALTER OWNER **`--gp-syntax`** -Output Cloudberry Database syntax in the `CREATE TABLE` statements. This allows the distribution policy (`DISTRIBUTED BY` or `DISTRIBUTED RANDOMLY` clauses) of a Cloudberry Database table to be dumped, which is useful for restoring into other Cloudberry Database systems. +Output Apache Cloudberry syntax in the `CREATE TABLE` statements. This allows the distribution policy (`DISTRIBUTED BY` or `DISTRIBUTED RANDOMLY` clauses) of a Apache Cloudberry table to be dumped, which is useful for restoring into other Apache Cloudberry systems. **`--no-gp-syntax`** diff --git a/docs/sys-utilities/pg-filedump.md b/docs/sys-utilities/pg-filedump.md index b864ebf32c..e1dc2f0228 100644 --- a/docs/sys-utilities/pg-filedump.md +++ b/docs/sys-utilities/pg-filedump.md @@ -4,9 +4,9 @@ title: pg_filedump # pg_filedump -Filedump, also known as the command-line tool `pg_filedump`, is a system utility that formats Cloudberry Database heap files, index files, and control files into human-readable forms. Using filedump, you can format and dump files in multiple ways, as described in the [Command-line options](#command-line-options) section, and even dump binary data directly. +Filedump, also known as the command-line tool `pg_filedump`, is a system utility that formats Apache Cloudberry heap files, index files, and control files into human-readable forms. Using filedump, you can format and dump files in multiple ways, as described in the [Command-line options](#command-line-options) section, and even dump binary data directly. -The repository for Cloudberry Database's filedump can be found at: `https://github.com/cloudberrydb/filedump` +The repository for Apache Cloudberry's filedump can be found at: `https://github.com/cloudberry-contrib/filedump` ## User scenarios @@ -19,19 +19,19 @@ The filedump tool is primarily used in the following scenarios: ## Compile and install filedump -Before compiling filedump, you need to have a Cloudberry Database cluster installed with version 1.0.0 or later. The following are the typical steps for compilation: +Before compiling filedump, you need to have a Apache Cloudberry cluster installed with version 1.0.0 or later. The following are the typical steps for compilation: -1. Ensure the CloudberryDB package can be found: +1. Ensure the Cloudberry package can be found: ```bash su - gpadmin - source /usr/local/cloudberry-db/greenplum_path.sh + source /usr/local/cloudberry/greenplum_path.sh ``` -2. Clone the GitHub repository `cloudberrydb/filedump` to your local environment: +2. Clone the GitHub repository `cloudberry-contrib/filedump` to your local environment: ```bash - git clone https://github.com/cloudberrydb/filedump.git + git clone https://github.com/cloudberry-contrib/filedump.git ``` 3. Enter the repository directory and run the compilation command: diff --git a/docs/sys-utilities/pg-restore.md b/docs/sys-utilities/pg-restore.md index 2a1d3593a4..6502cbb5a1 100644 --- a/docs/sys-utilities/pg-restore.md +++ b/docs/sys-utilities/pg-restore.md @@ -50,7 +50,7 @@ When this option is used, the database named with `-d` is used only to issue the **`-d dbname | --dbname=dbname`** -Connect to this database and restore directly into this database. This utility, like most other Cloudberry Database utilities, also uses the environment variables supported by `libpq`. However it does not read `PGDATABASE` when a database name is not supplied. +Connect to this database and restore directly into this database. This utility, like most other Apache Cloudberry utilities, also uses the environment variables supported by `libpq`. However it does not read `PGDATABASE` when a database name is not supplied. **`-e | --exit-on-error`** @@ -112,7 +112,7 @@ This option is the inverse of `--data-only`. It is similar to, but for historica Specify the superuser user name to use when deactivating triggers. This is only relevant if `--disable-triggers` is used. -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`-t table | --table=table`** @@ -122,7 +122,7 @@ Restore definition and/or data of named table only. Multiple tables may be speci Restore named trigger only. -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`-v | --verbose`** @@ -144,7 +144,7 @@ Run the restore as a single transaction. This ensures that either all the comman This option is relevant only when performing a data-only restore. It instructs `pg_restore` to run commands to temporarily deactivate triggers on the target tables while the data is reloaded. Use this if you have triggers on the tables that you do not want to invoke during data reload. The commands emitted for `--disable-triggers` must be done as superuser. So you should also specify a superuser name with `-S` or, preferably, run `pg_restore` as a superuser. -> **Note** Cloudberry Database does not support user-defined triggers. +> **Note** Apache Cloudberry does not support user-defined triggers. **`--no-data-for-failed-tables`** @@ -180,7 +180,7 @@ The host name of the machine on which the Cloudberry coordinator database server **`-p port | --port port`** -The TCP port on which the Cloudberry Database coordinator database server is listening for connections. If not specified, reads from the environment variable `PGPORT` or defaults to 5432. +The TCP port on which the Apache Cloudberry coordinator database server is listening for connections. If not specified, reads from the environment variable `PGPORT` or defaults to 5432. **`-U username | --username username`** diff --git a/docs/sys-utilities/psql.md b/docs/sys-utilities/psql.md index 1ac3b4d636..99f9b85775 100644 --- a/docs/sys-utilities/psql.md +++ b/docs/sys-utilities/psql.md @@ -4,7 +4,7 @@ title: psql # psql -Interactive command-line interface for Cloudberry Database +Interactive command-line interface for Apache Cloudberry ## Synopsis @@ -14,7 +14,7 @@ psql [
@@ -330,7 +330,7 @@ grubby --info=ALL ##### 禁用透明大页面 (THP) -你需要禁用透明大页面 (THP),因为它会降低 Cloudberry Database 的性能。禁用的命令如下所示: +你需要禁用透明大页面 (THP),因为它会降低 Apache Cloudberry 的性能。禁用的命令如下所示: ```bash grubby --update-kernel=ALL --args="transparent_hugepage=never" @@ -344,7 +344,7 @@ cat /sys/kernel/mm/*transparent_hugepage/enabled ##### 禁用 IPC 对象删除 -禁用 IPC 对象删除,即把 `RemoveIPC` 的值设为 `no`。你可以在 Cloudberry Database 的 `/etc/systemd/logind.conf` 文件中设置该参数。 +禁用 IPC 对象删除,即把 `RemoveIPC` 的值设为 `no`。你可以在 Apache Cloudberry 的 `/etc/systemd/logind.conf` 文件中设置该参数。 ``` RemoveIPC=no @@ -378,7 +378,7 @@ service sshd restart ##### 时钟同步设置 -Cloudberry Database 要求为所有主机配置时钟需要同步,时钟同步服务应当随主机启动而启动。有两种同步方式: +Apache Cloudberry 要求为所有主机配置时钟需要同步,时钟同步服务应当随主机启动而启动。有两种同步方式: - 使用 Coordinator 节点的时间作为来源,其他主机同步 Coordinator 节点主机的时钟。 - 使用外部时钟来源同步。 @@ -397,9 +397,9 @@ server 0.centos.pool.ntp.org iburst systemctl status chronyd ``` -## 第 2 步:通过 RPM 包安装 Cloudberry Database +## 第 2 步:通过 RPM 包安装 Apache Cloudberry -1. 下载 Cloudberry Database 的 RPM 安装包至 `gpadmin` 主目录 `/home/gpadmin/`: +1. 下载 Apache Cloudberry 的 RPM 安装包至 `gpadmin` 主目录 `/home/gpadmin/`: ```bash wget -P /home/gpadmin <下载地址> @@ -407,7 +407,7 @@ systemctl status chronyd 2. 在 `/home/gpadmin` 目录下安装 RPM 包。 - 执行以下命令时,你需要将 `` 替换为实际的安装包路径,并使用 `root` 用户执行。安装时,会自动创建默认安装目录 `/usr/local/cloudberry-db/`。 + 执行以下命令时,你需要将 `` 替换为实际的安装包路径,并使用 `root` 用户执行。安装时,会自动创建默认安装目录 `/usr/local/cloudberry/`。 ```bash cd /home/gpadmin @@ -429,9 +429,9 @@ systemctl status chronyd ssh `hostname` # 确认本地 SSH 登录能正常工作 ``` -## 第 3 步:部署单计算节点的 Cloudberry Database +## 第 3 步:部署单计算节点的 Apache Cloudberry -使用脚本工具 [`gpdemo`](/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md) 快速部署 Cloudberry Database。`gpdemo` 包含在 RPM 包中,将随配置脚本(gpinitsystem、gpstart、gpstop 等)一并安装到 `GPHOME/bin` 目录下,支持快捷部署无 Segment 节点的 Cloudberry Database。 +使用脚本工具 [`gpdemo`](/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md) 快速部署 Apache Cloudberry。`gpdemo` 包含在 RPM 包中,将随配置脚本(gpinitsystem、gpstart、gpstop 等)一并安装到 `GPHOME/bin` 目录下,支持快捷部署无 Segment 节点的 Apache Cloudberry。 在上面[为 XFS 文件系统设置挂载选项](#为-xfs-文件系统设置挂载选项)中,XFS 文件系统的数据目录挂载在了 `/data0` 上。以下指令在该数据目录下部署一个单计算节点集群: @@ -440,13 +440,13 @@ cd /data0 NUM_PRIMARY_MIRROR_PAIRS=0 gpdemo # 使用 gpdemo 工具 ``` -在 `gpdemo` 的执行过程中,会输出一条新的警告 `[WARNING]:-SinglenodeMode has been enabled, no segment will be created.`,这表示当前正以单计算节点模式部署 Cloudberry Database。 +在 `gpdemo` 的执行过程中,会输出一条新的警告 `[WARNING]:-SinglenodeMode has been enabled, no segment will be created.`,这表示当前正以单计算节点模式部署 Apache Cloudberry。 ## 常见问题 ### 如何确认集群的部署模式 -执行以下步骤确认当前 Cloudberry Database 的部署模式: +执行以下步骤确认当前 Apache Cloudberry 的部署模式: 1. 连接到 Coordinator 节点。 2. 执行 `SHOW ``gp_role``;` 查看当前集群的运行模式。 @@ -473,7 +473,7 @@ NUM_PRIMARY_MIRROR_PAIRS=0 gpdemo # 使用 gpdemo 工具 ## 用户行为变更 -在单计算节点模式下, Cloudberry Database 的产品行为有如下变更,用户在执行相关操作前需注意: +在单计算节点模式下, Apache Cloudberry 的产品行为有如下变更,用户在执行相关操作前需注意: - 在使用 `CREATE TABLE` 语句建表时,`DISTRIBUTED BY` 子句不再生效。执行此类语句时,该子句的效果将被忽略,并输出一条新的警告 `WARNING: DISTRIBUTED BY clause has no effect in singlenode mode`。 - `SELECT` 语句的 `SCATTER BY` 子句不再有效。执行此类语句时,该子句的效果将被忽略,并输出一条新的警告 `WARNING: SCATTER BY clause has no effect in singlenode mode`。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/insert-update-delete-rows.md b/i18n/zh/docusaurus-plugin-content-docs/current/insert-update-delete-rows.md index 9818e94ed9..05a852d33a 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/insert-update-delete-rows.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/insert-update-delete-rows.md @@ -4,7 +4,7 @@ title: 插入、更新和删除行数据 # 插入、更新和删除行数据 -本文介绍如何在 Cloudberry Database 中操作行数据,包括: +本文介绍如何在 Apache Cloudberry 中操作行数据,包括: - [插入行](#插入行) - [更新现有行](#更新现有行) @@ -46,11 +46,11 @@ INSERT INTO products (product_no, name, price) VALUES ### 往分区表中插入行 -要将数据插入分区表,你需要指定根分区表,该根分区表是通过 `CREATE TABLE` 命令创建的。Cloudberry Database 不支持在 `INSERT` 命令中直接指定叶分区,这样会导致报错。这是因为数据插入是由数据库系统自动管理的,所以叶分区对用户不可见。 +要将数据插入分区表,你需要指定根分区表,该根分区表是通过 `CREATE TABLE` 命令创建的。Apache Cloudberry 不支持在 `INSERT` 命令中直接指定叶分区,这样会导致报错。这是因为数据插入是由数据库系统自动管理的,所以叶分区对用户不可见。 如果插入的数据不符合任何现有分区的范围(例如,指定的键值与任何分区规则都不匹配),将返回错误。 -要确保数据正确地插入分区表,只需要在 `INSERT` 语句中指定根分区表。Cloudberry Database 根据分区键自动将数据行插入正确的叶分区。如果数据行不符合任何叶分区的范围,Cloudberry Database 将返回错误。 +要确保数据正确地插入分区表,只需要在 `INSERT` 语句中指定根分区表。Apache Cloudberry 根据分区键自动将数据行插入正确的叶分区。如果数据行不符合任何叶分区的范围,Apache Cloudberry 将返回错误。 示例: @@ -59,13 +59,13 @@ _-- 将数据插入根分区表_ INSERT INTO sales (sale_id, product_no, year, amount) VALUES (1, 'Cheese', 2021, 9.99); ``` -对于上述语句,Cloudberry Database 根据年份列的值自动将数据行插入正确的分区,因此不需要在语句中指定叶分区。 +对于上述语句,Apache Cloudberry 根据年份列的值自动将数据行插入正确的分区,因此不需要在语句中指定叶分区。 ### 在 Append-Optimized 表中插入行 要插入大量数据到 Append-Optimized (AO) 表,请使用外部表或 `COPY` 命令,它们比 `INSERT` 更高效。 -Cloudberry Database 中 AO 表的存储设计,是为了高效地加载批量数据,不适合单条 `INSERT` 插入行数据。如果要将大量数据插入 AO 表,建议使用批处理加载方法,例如 `COPY` 命令。Cloudberry Database 支持在 AO 表上执行多个并发的 `INSERT` 事务,但此功能通常用于批量插入而不是单行插入。 +Apache Cloudberry 中 AO 表的存储设计,是为了高效地加载批量数据,不适合单条 `INSERT` 插入行数据。如果要将大量数据插入 AO 表,建议使用批处理加载方法,例如 `COPY` 命令。Apache Cloudberry 支持在 AO 表上执行多个并发的 `INSERT` 事务,但此功能通常用于批量插入而不是单行插入。 ## 更新现有行 @@ -105,7 +105,7 @@ DELETE FROM products; TRUNCATE mytable; ``` -该命令将一次性清空表中的所有行。注意,在 Cloudberry Database 中,即使不用 `CASCADE` 选项,`TRUNCATE` 命令也会默认影响继承的子表。此外,由于 Cloudberry Database 不支持外键约束,`TRUNCATE` 命令不会触发任何 `ON DELETE` 操作或重写规则。该命令仅清空指定表中的行。 +该命令将一次性清空表中的所有行。注意,在 Apache Cloudberry 中,即使不用 `CASCADE` 选项,`TRUNCATE` 命令也会默认影响继承的子表。此外,由于 Apache Cloudberry 不支持外键约束,`TRUNCATE` 命令不会触发任何 `ON DELETE` 操作或重写规则。该命令仅清空指定表中的行。 ## 清空数据库 @@ -117,7 +117,7 @@ VACUUM mytable; `VACUUM` 命令可以用于收集表级别的统计信息,例如行数和页数。在加载数据后,对所有表(包括 AO 表)进行 `VACUUM` 操作。 -在 Cloudberry Database 中维护数据时,特别是频繁执行更新和删除操作时,需要使用 `VACUUM`、`VACUUM FULL` 和 `VACUUM ANALYZE` 命令。 +在 Apache Cloudberry 中维护数据时,特别是频繁执行更新和删除操作时,需要使用 `VACUUM`、`VACUUM FULL` 和 `VACUUM ANALYZE` 命令。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/manage-roles-and-privileges.md b/i18n/zh/docusaurus-plugin-content-docs/current/manage-roles-and-privileges.md index b17e827815..3b47581af6 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/manage-roles-and-privileges.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/manage-roles-and-privileges.md @@ -2,4 +2,4 @@ title: 管理角色和权限 --- -# 管理 Cloudberry Database 中的角色和权限 +# 管理 Apache Cloudberry 中的角色和权限 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-create-ao-refresh-mv.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-create-ao-refresh-mv.md index 990eb62250..5d2ed7b74d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-create-ao-refresh-mv.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-create-ao-refresh-mv.md @@ -4,7 +4,7 @@ title: 并行创建 AO/AOCO 表与刷新物化视图 # 并行创建 AO/AOCO 表与刷新物化视图(引入自 v1.5.0 版本) -自 v1.5.0 起,Cloudberry Database 支持使用 `CREATE TABLE AS` 语句并行创建 Append-Optimized (AO) 表和 Append-Optimized Column Oriented (AOCO) 表,同时支持并行刷新基于该表的物化视图,从而加速建表和物化视图刷新。 +自 v1.5.0 起,Apache Cloudberry 支持使用 `CREATE TABLE AS` 语句并行创建 Append-Optimized (AO) 表和 Append-Optimized Column Oriented (AOCO) 表,同时支持并行刷新基于该表的物化视图,从而加速建表和物化视图刷新。 要使用该并发功能,你需要先将系统参数 `enable_parallel` 的值设为 `ON`。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-query-execution.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-query-execution.md index 51c9cdd78e..9db3936b27 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-query-execution.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/parallel-query-execution.md @@ -4,7 +4,7 @@ title: 并行执行查询 # 并行执行查询 -本文档介绍 Cloudberry Database 并行查询的使用场景、使用方法、使用限制和使用该功能的常见问题。Cloudberry Database 支持并行查询功能,即利用多个 CPU 核心来处理单个查询,以此来提高查询性能。在执行查询过程中,计算节点(包括 `SeqScan` 算子)的数量会随着数据量变化动态调整。 +本文档介绍 Apache Cloudberry 并行查询的使用场景、使用方法、使用限制和使用该功能的常见问题。Apache Cloudberry 支持并行查询功能,即利用多个 CPU 核心来处理单个查询,以此来提高查询性能。在执行查询过程中,计算节点(包括 `SeqScan` 算子)的数量会随着数据量变化动态调整。 ## 使用场景 @@ -14,7 +14,7 @@ title: 并行执行查询 ## 使用方法 -Cloudberry Database 支持在 AO/AOCO 表和 Heap 表上进行并行查询。 +Apache Cloudberry 支持在 AO/AOCO 表和 Heap 表上进行并行查询。 ### 并行查询 Heap 表 @@ -83,7 +83,7 @@ Cloudberry Database 支持在 AO/AOCO 表和 Heap 表上进行并行查询。 ## 常见问题 -- 目前支持并行执行包含下列算子的查询语句。Cloudberry Database 暂不支持包含其他算子的查询。 +- 目前支持并行执行包含下列算子的查询语句。Apache Cloudberry 暂不支持包含其他算子的查询。 ```sql sequence scan diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/update-stats-using-analyze.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/update-stats-using-analyze.md index 6130161d75..0ec475686a 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/update-stats-using-analyze.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/update-stats-using-analyze.md @@ -4,7 +4,7 @@ title: 更新统计信息 # 更新统计信息 -要获得良好的查询性能,准确的统计信息准确十分重要。通过使用 `ANALYZE` 语句更新统计信息,可以使查询优化器生成最优的查询计划。Cloudberry Database 对表进行分析时,相关的数据信息被存储在系统目录表中。如果存储的信息过时了,查询优化器可能会生成低效的查询计划。 +要获得良好的查询性能,准确的统计信息准确十分重要。通过使用 `ANALYZE` 语句更新统计信息,可以使查询优化器生成最优的查询计划。Apache Cloudberry 对表进行分析时,相关的数据信息被存储在系统目录表中。如果存储的信息过时了,查询优化器可能会生成低效的查询计划。 ## 查看统计信息是否已更新 @@ -24,7 +24,7 @@ WHERE relname = 'test_analyze'; 对大表执行 `ANALYZE` 可能需要很长时间。如果无法对大表的所有列执行 `ANALYZE`,可以仅针对特定列使用 `ANALYZE table(column, ...)` 来生成统计信息。确保在这些条件中使用的列被包含在内:连接、`WHERE` 子句、`SORT` 子句、`GROUP BY` 子句或 `HAVING` 子句。 -对于分区表,可以仅对发生变化的分区执行 `ANALYZE`,例如,添加新分区时。请注意,对于分区表,可以在根分区表或叶分区(实际存储数据和统计信息的文件)上执行 `ANALYZE`。在 Cloudberry Database 中,对分区表的单个分区执行 `ANALYZE` 还会更新根表的统计信息,这表明对一个分区进行统计信息收集可能会影响整个分区表的优化器统计信息。使用 `pg_partition_tree()` 函数可以找到叶分区的名称。 +对于分区表,可以仅对发生变化的分区执行 `ANALYZE`,例如,添加新分区时。请注意,对于分区表,可以在根分区表或叶分区(实际存储数据和统计信息的文件)上执行 `ANALYZE`。在 Apache Cloudberry 中,对分区表的单个分区执行 `ANALYZE` 还会更新根表的统计信息,这表明对一个分区进行统计信息收集可能会影响整个分区表的优化器统计信息。使用 `pg_partition_tree()` 函数可以找到叶分区的名称。 ```sql SELECT * FROM pg_partition_tree( 'parent_table' ); @@ -58,7 +58,7 @@ SELECT * FROM pg_partition_tree( 'parent_table' ); 而将 `gp_autostats_mode` 设置为 `on_change` 时,仅当受影响的行数超过 `gp_autostats_on_change_threshold` 所设定的阈值时,才会进行统计信息收集。这个阈值的默认值是 `2147483647`。在表所有者执行 `CREATE TABLE AS SELECT`、`UPDATE`、`DELETE`、`INSERT` 和 `COPY` 操作时,如果受影响的行数超过了这个阈值,就会触发自动统计信息收集。 -另外,若将服务器配置参数 `gp_autostats_allow_nonowner` 设置为 `true`,Cloudberry Database 将会在以下情况对表进行自动统计信息收集: +另外,若将服务器配置参数 `gp_autostats_allow_nonowner` 设置为 `true`,Apache Cloudberry 将会在以下情况对表进行自动统计信息收集: - 当 `gp_autostats_mode` 被设置为 `on_no_stats`,并且第一个对表执行 `INSERT` 或 `COPY` 操作的非所有者用户。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-aggre-pushdown-to-speed-up-queries.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-aggre-pushdown-to-speed-up-queries.md index e907060c93..22641ebb18 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-aggre-pushdown-to-speed-up-queries.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-aggre-pushdown-to-speed-up-queries.md @@ -4,12 +4,12 @@ title: 使用聚集下推优化查询 # 使用聚集下推优化查询 -聚集下推 (Aggregation Pushdown) 是使聚集 (Aggregation) 操作的运算更接近数据源的一种优化技术。Cloudberry Database 支持将聚集运算下推,即将聚集算子提前到连接 (Join) 算子之前进行计算。 +聚集下推 (Aggregation Pushdown) 是使聚集 (Aggregation) 操作的运算更接近数据源的一种优化技术。Apache Cloudberry 支持将聚集运算下推,即将聚集算子提前到连接 (Join) 算子之前进行计算。 在[适用的场景](#适用场景)下,聚集下推能够明显地减少连接算子或者聚集算子的输入集大小,进而提升算子的执行性能。 :::tip 提示 -- 在原生 PostgreSQL 内核的优化器逻辑中,每个查询中的聚集操作始终都在全部连接操作结束后才会进行(不包含子查询的情况下)。因此 Cloudberry Database 引入聚集下推特性,使得 Cloudberry Database 能够在合适的场景下选择提前执行聚集操作。 +- 在原生 PostgreSQL 内核的优化器逻辑中,每个查询中的聚集操作始终都在全部连接操作结束后才会进行(不包含子查询的情况下)。因此 Apache Cloudberry 引入聚集下推特性,使得 Apache Cloudberry 能够在合适的场景下选择提前执行聚集操作。 - 要判断优化器选择的执行计划是否应用了聚集下推优化,可以观察 Aggregation 和 Join 在执行计划树中的位置关系。若某个执行计划先进行了 Partial Aggregation 然后再进行 Join 操作,最终才进行 Final Aggregation,则表示优化器应用了聚集下推。 ::: @@ -49,7 +49,7 @@ Optimizer: Postgres query optimizer (13 rows) ``` -从以上示例的执行计划结果中,可以看到在进行 HashJoin 运算前,Cloudberry Database 先对 `t1` 表根据 `id` 列提前执行了聚集运算。该聚集运算并不会破坏语句执行结果的正确性,并大概率能减少进入 HashJoin 的数据量,进而提升语句执行的效率。 +从以上示例的执行计划结果中,可以看到在进行 HashJoin 运算前,Apache Cloudberry 先对 `t1` 表根据 `id` 列提前执行了聚集运算。该聚集运算并不会破坏语句执行结果的正确性,并大概率能减少进入 HashJoin 的数据量,进而提升语句执行的效率。 ## 适用场景 @@ -69,7 +69,7 @@ SELECT o.order_id, SUM(price) ``` - 在 PostgreSQL 原生优化器中的执行方式:PostgreSQL 原生优化器只能先将两表连接再进行聚集运算。由于 `order_line_tbl` 表中的每个订单项一定存在对应的 `order_tbl` 表中的订单信息,因此该 Join 算子并不会筛选掉任何数据。 -- 在 Cloudberry Database 中的执行方式:假设每个订单平均包含 10 个订单项,那么经过 Aggregation 算子进行聚集后,数据量预计会减少 10 倍。开启聚集下推后,数据库将先对 `order_line_tbl` 中的数据根据 `order_id` 进行提前聚集,由此传入 Join 算子的数据量将减少 10 倍,进而显著地降低 Join 算子的开销。对应的执行计划如下: +- 在 Apache Cloudberry 中的执行方式:假设每个订单平均包含 10 个订单项,那么经过 Aggregation 算子进行聚集后,数据量预计会减少 10 倍。开启聚集下推后,数据库将先对 `order_line_tbl` 中的数据根据 `order_id` 进行提前聚集,由此传入 Join 算子的数据量将减少 10 倍,进而显著地降低 Join 算子的开销。对应的执行计划如下: ```sql EXPLAIN SELECT o.order_id, SUM(price) @@ -100,7 +100,7 @@ SELECT proj_name, sum(cost) GROUP BY proj_name; ``` -对于该查询,开启聚集下推后,Cloudberry Database 会提前对 `experiment` 表根据 `e_pid` 列进行预聚集,将同一个 `project` 的信息先聚集在一起。 +对于该查询,开启聚集下推后,Apache Cloudberry 会提前对 `experiment` 表根据 `e_pid` 列进行预聚集,将同一个 `project` 的信息先聚集在一起。 但如果该查询在 `project` 表上也做了许多筛选的话,可能使得 Join 的筛选率过高,导致执行效率不高,因此聚集下推暂时不适用于这种情况。 @@ -201,8 +201,8 @@ SELECT id, sum1 * cnt2 FROM WHERE AT1.id = AT2.id GROUP BY id; ``` -该例子中将聚集操作同时下推到了 Join 的两侧。对于 `t1` 表中 `id` 为 `100` 的全部元组,Cloudberry Database 对其 `val` 提前进行了求和,得到了对应的 `sum1`。 +该例子中将聚集操作同时下推到了 Join 的两侧。对于 `t1` 表中 `id` 为 `100` 的全部元组,Apache Cloudberry 对其 `val` 提前进行了求和,得到了对应的 `sum1`。 -在实际进行连接的过程中,对于 `t2` 表中每个 `id` 为 `100` 的元组,都会与 `sum1` 所属的元组进行连接并得到对应的元组。也就是说 `t2` 中每有一个 `id` 为 `100`,`sum1` 就会在最终的求和结果中出现一次,因此 Cloudberry Database 可以提前对 `t2` 进行聚集,计算出总共有 `cnt2` 个 `id` 为 `100` 的 元组,最后可以通过 `sum1 * cnt2` 来计算最终的结果。 +在实际进行连接的过程中,对于 `t2` 表中每个 `id` 为 `100` 的元组,都会与 `sum1` 所属的元组进行连接并得到对应的元组。也就是说 `t2` 中每有一个 `id` 为 `100`,`sum1` 就会在最终的求和结果中出现一次,因此 Apache Cloudberry 可以提前对 `t2` 进行聚集,计算出总共有 `cnt2` 个 `id` 为 `100` 的 元组,最后可以通过 `sum1 * cnt2` 来计算最终的结果。 由于该场景涉及到相对复杂的语句改写以及表达式改写,在目前产品中暂不支持。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-auto-materialized-view-to-answer-queries.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-auto-materialized-view-to-answer-queries.md index d5f64d0d02..0cd4b12f97 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-auto-materialized-view-to-answer-queries.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-auto-materialized-view-to-answer-queries.md @@ -4,7 +4,7 @@ title: 自动使用物化视图进行查询优化 # 自动使用物化视图进行查询优化(引入自 v1.5.0 版本) -自 v1.5.0 版本起,Cloudberry Database 支持在查询规划阶段自动使用物化视图来计算部分或全部查询(即 AQUMV)。这一功能特别适用于在大表上进行的查询,能显著提高查询处理时间。AQUMV 使用增量物化视图 (IMV),因为当相关表有写入操作时,IMV 通常能保持最新的数据。 +自 v1.5.0 版本起,Apache Cloudberry 支持在查询规划阶段自动使用物化视图来计算部分或全部查询(即 AQUMV)。这一功能特别适用于在大表上进行的查询,能显著提高查询处理时间。AQUMV 使用增量物化视图 (IMV),因为当相关表有写入操作时,IMV 通常能保持最新的数据。 ## 场景说明 @@ -117,7 +117,7 @@ title: 自动使用物化视图进行查询优化 AQUMV 是通过对查询树进行等效转换来实现查询优化的。 -表查询只有满足以下条件后,Cloudberry Database 才会自动使用物化视图: +表查询只有满足以下条件后,Apache Cloudberry 才会自动使用物化视图: - 物化视图必须包含查询表达式所需的所有行。 - 如果物化视图包含比查询更多的行,可能需要添加额外的过滤条件。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-incremental-materialized-view.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-incremental-materialized-view.md index 0d62906a42..a68446cb16 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-incremental-materialized-view.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-incremental-materialized-view.md @@ -4,7 +4,7 @@ title: 使用增量物化视图 # 使用增量物化视图(引入自 v1.5.0 版本) -本文档介绍 Cloudberry Database 增量物化视图的使用场景、使用方法、使用限制和注意事项。 +本文档介绍 Apache Cloudberry 增量物化视图的使用场景、使用方法、使用限制和注意事项。 增量物化视图是物化视图的一种特殊形式。当数据在基础表中发生变化时(例如插入、更新、删除操作),增量物化视图不需要重新计算整个视图中的所有数据。相反,它只更新那些自上次刷新以来发生变化的部分。这样可以节省大量的计算资源和时间,显著提高性能,尤其是在处理大型数据集时。 @@ -23,7 +23,7 @@ title: 使用增量物化视图 ## 使用方法示例 :::info 注意 -使用增量物化视图前,确保你所使用的 Cloudberry Database 版本大于等于 1.5.0。v1.4.0 及以下版本的 Cloudberry Database 不支持增量物化视图。 +使用增量物化视图前,确保你所使用的 Apache Cloudberry 版本大于等于 1.5.0。v1.4.0 及以下版本的 Apache Cloudberry 不支持增量物化视图。 ::: 你可以使用 SQL 命令 `CREATE INCREMENTAL MATERIALIZED VIEW` 来创建增量物化视图。完整的语法支持如下: @@ -38,7 +38,7 @@ CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name [ WITH [ NO ] DATA ] ``` -以下示例说明如何在 Cloudberry Database 中为表格创建增量物化视图。 +以下示例说明如何在 Apache Cloudberry 中为表格创建增量物化视图。 1. 创建表格 `t0` 和 `t1`。 @@ -102,7 +102,7 @@ CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name ## 与普通视图的查询性能对比 -以下示例展示了在 Cloudberry Database 中,在处理大型数据集时普通视图与增量物化视图的查询性能对比。以下示例使用了 TPC-H 的 Query 15 测试数据集。 +以下示例展示了在 Apache Cloudberry 中,在处理大型数据集时普通视图与增量物化视图的查询性能对比。以下示例使用了 TPC-H 的 Query 15 测试数据集。 ### 使用普通视图 @@ -221,7 +221,7 @@ CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name ## 使用限制和注意事项 -目前,Cloudberry Database 上的增量物化视图有以下使用限制: +目前,Apache Cloudberry 上的增量物化视图有以下使用限制: - 不支持为 Append-Optimized (AO) 表创建增量物化视图。 - 不支持为分区表创建增量物化视图。 @@ -237,7 +237,7 @@ CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name - 不支持在物化视图上创建增量物化视图。 -除此之外,你在 Cloudberry Database 上使用增量物化视图,还需要注意以下问题: +除此之外,你在 Apache Cloudberry 上使用增量物化视图,还需要注意以下问题: - 引入增量物化视图会导致数据插入、删除、更新变慢。另外,一张基表可能有多个增量物化视图,性能退化倍数和增量物化视图个数成正比。 - 使用增量物化视图会产生临时文件用于存储计算 delta 视图,这可能会占用一些存储空间。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-index-scan-on-ao-tables.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-index-scan-on-ao-tables.md index ed8f4c5414..179671c30b 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-index-scan-on-ao-tables.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-index-scan-on-ao-tables.md @@ -4,7 +4,7 @@ title: 在 AO 表上使用 IndexScan # 在 AO 表上使用 IndexScan -Cloudberry Database 支持对 Append-Optimized 表(简称 AO 表)进行 IndexScan,以此来提升某些场景下的查询效率,例如以下查询: +Apache Cloudberry 支持对 Append-Optimized 表(简称 AO 表)进行 IndexScan,以此来提升某些场景下的查询效率,例如以下查询: ```sql SELECT * FROM tbl WHERE val > 100 ORDER BY val LIMIT 10; @@ -13,15 +13,15 @@ SELECT * FROM tbl WHERE val > 100 ORDER BY val LIMIT 10; :::tip 提示 Append-Optimized (AO) 表是一种优化存储方式,面向以批量插入为主的场景,例如大数据分析和数据仓库场景。 -当向 AO 表中插入新数据时,Cloudberry Database 会将新数据插入到表的末尾,而不是像普通的表那样寻找空闲空间插入。这意味着在向 AO 表插入数据时,只需要对文件进行追加写入,因此可以获得更高的插入效率。 +当向 AO 表中插入新数据时,Apache Cloudberry 会将新数据插入到表的末尾,而不是像普通的表那样寻找空闲空间插入。这意味着在向 AO 表插入数据时,只需要对文件进行追加写入,因此可以获得更高的插入效率。 ::: 对于以上查询语句: -- 如果使用 heap 表存储方式,Cloudberry Database 执行该查询可以通过 IndexScan 找到 10 个 `val` 大于 `100` 的元祖,仅需通过索引和数据表读取大约 10 个元祖即可。 +- 如果使用 heap 表存储方式,Apache Cloudberry 执行该查询可以通过 IndexScan 找到 10 个 `val` 大于 `100` 的元祖,仅需通过索引和数据表读取大约 10 个元祖即可。 - 如果使用 AO 表的存储方式,且假设 `tbl` 表有 10 亿行元组,而我们通过 `LIMIT` 子句指定只需要返回 10 条元组: - - Cloudberry Database 支持使用 IndexScan 运算来扫描 AO 表,可大幅降低扫描的数据量,大大提升扫描的效率,是比 SeqScan 以及 BitmapScan 更好的扫描方式。SeqScan 或者 BitmapScan 比 IndexScan 多扫描 1 亿倍的数据量。 + - Apache Cloudberry 支持使用 IndexScan 运算来扫描 AO 表,可大幅降低扫描的数据量,大大提升扫描的效率,是比 SeqScan 以及 BitmapScan 更好的扫描方式。SeqScan 或者 BitmapScan 比 IndexScan 多扫描 1 亿倍的数据量。 ## 适用场景 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-runtimefilter-to-optimize-queries.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-runtimefilter-to-optimize-queries.md index 7fdbd3b104..f7f35a52c9 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-runtimefilter-to-optimize-queries.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-runtimefilter-to-optimize-queries.md @@ -6,16 +6,16 @@ title: 使用 RuntimeFilter 优化 Join 查询 在执行大表连接查询的时候,SQL 优化器常选用 HashJoin 算子来进行运算。HashJoin 基于连接键构建哈希表来进行连接键的匹配,这可能存在内存访问和磁盘瓶颈。RuntimeFilter 是在执行 HashJoin 运算时,实时产生过滤器 (Filter) 的优化技术,可以在执行 HashJoin 前预先对数据进行筛选,更快地执行 HashJoin。在某些场景下,通过 RuntimeFilter 优化能够使执行效率翻倍。 -HashJoin 常用于小表和大表的连接。Cloudberry Database 在执行 HashJoin 运算时,通常基于待连接的两表中较小的表来构建哈希表,然后循环地根据较大表中的元祖,在哈希表中查找连接键匹配的元祖来实现连接。用来构建哈希表的小表被称为内表 (Inner Table),另一个用来循环匹配的表被称为外表 (Outer Table)。 +HashJoin 常用于小表和大表的连接。Apache Cloudberry 在执行 HashJoin 运算时,通常基于待连接的两表中较小的表来构建哈希表,然后循环地根据较大表中的元祖,在哈希表中查找连接键匹配的元祖来实现连接。用来构建哈希表的小表被称为内表 (Inner Table),另一个用来循环匹配的表被称为外表 (Outer Table)。 HashJoin 算子在执行上主要存在以下瓶颈: - 内存访问:对于外表的每个元组,需要在哈希表中查找匹配的内表元组,涉及一次或多次内存访问。 - 磁盘 I/O:若内表不适合全部放到内存中,需要借助磁盘来分批次进行处理,因此产生大量磁盘 I/O。 -针对以上瓶颈开启 RuntimeFilter 优化后,Cloudberry Database 在构建哈希表的同时,也会构建对应的 RuntimeFilter,即在执行 HashJoin 之前对大表的元组进行过滤。在执行过程中,通过布隆过滤器 (Bloom Filter) 来实现 RuntimeFilter,该数据结构所占内存空间远小于哈希表。在能够完全存放在 L3 缓存中的情况下,布隆过滤器的过滤效率为 HashJoin 的两倍,使得内存访问开销大大降低。 +针对以上瓶颈开启 RuntimeFilter 优化后,Apache Cloudberry 在构建哈希表的同时,也会构建对应的 RuntimeFilter,即在执行 HashJoin 之前对大表的元组进行过滤。在执行过程中,通过布隆过滤器 (Bloom Filter) 来实现 RuntimeFilter,该数据结构所占内存空间远小于哈希表。在能够完全存放在 L3 缓存中的情况下,布隆过滤器的过滤效率为 HashJoin 的两倍,使得内存访问开销大大降低。 -该优化会根据 HashJoin 的连接条件的筛选率以及内表的大小来决定是否生成 RuntimeFilter 算子,在实际执行的过程中如果发现数据量与估算结果偏离太大,Cloudberry Database 也会及时停止使用 RuntimeFilter。 +该优化会根据 HashJoin 的连接条件的筛选率以及内表的大小来决定是否生成 RuntimeFilter 算子,在实际执行的过程中如果发现数据量与估算结果偏离太大,Apache Cloudberry 也会及时停止使用 RuntimeFilter。 ## 适用场景 @@ -24,7 +24,7 @@ HashJoin 算子在执行上主要存在以下瓶颈: - 单个 Segment 中 HashJoin 的内表大小在 1600 万行以内。 - 原本的 HashJoin 连接键对数据的选择率低于 60%,即满足 Hash 连接条件的结果集大小不到外表的 60%,也可以理解为筛选率大于 40%。 -上述场景下,Cloudberry Database 通过 RuntimeFilter 构建的布隆过滤器大小在 2 MB 以内,能够完全存放在 L3 缓存中,进而能以较小的开销筛选掉 40% 的外表元组,因此产生正向收益。在某些场景下,如果该 HashJoin 连接键的选择率低于 10%,通过 RuntimeFilter 优化能够使执行效率翻倍。 +上述场景下,Apache Cloudberry 通过 RuntimeFilter 构建的布隆过滤器大小在 2 MB 以内,能够完全存放在 L3 缓存中,进而能以较小的开销筛选掉 40% 的外表元组,因此产生正向收益。在某些场景下,如果该 HashJoin 连接键的选择率低于 10%,通过 RuntimeFilter 优化能够使执行效率翻倍。 ## 使用限制 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-unique-index-on-ao-tables.md b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-unique-index-on-ao-tables.md index 79906551c1..1a8b8abc9f 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-unique-index-on-ao-tables.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/performance/use-unique-index-on-ao-tables.md @@ -4,11 +4,11 @@ title: 在 AO 表上使用唯一索引 # 在 AO 表上使用唯一索引(引入自 v1.5.0 版本) -自 v1.5.0 版本起,你可以在 Cloudberry Database 的 Append-Optimized (AO) 或 AOCS 表上添加唯一索引。有了唯一索引,Cloudberry Database 会在将数据插入到 AO 表时,强制检查唯一性约束,从而保证数据的唯一性,同时能够与优化器一起优化特定的查询,从而提高数据库的查询性能。但这也带来的一定的开销用于维护唯一索引,尤其是在插入数据时。 +自 v1.5.0 版本起,你可以在 Apache Cloudberry 的 Append-Optimized (AO) 或 AOCS 表上添加唯一索引。有了唯一索引,Apache Cloudberry 会在将数据插入到 AO 表时,强制检查唯一性约束,从而保证数据的唯一性,同时能够与优化器一起优化特定的查询,从而提高数据库的查询性能。但这也带来的一定的开销用于维护唯一索引,尤其是在插入数据时。 ## 实现原理 -针对存在唯一索引的场景,在向 AO 表插入数据时,Cloudberry Database 会在 AO 表的辅助索引结构 BlockDirectory 中插入 placeholder,以阻塞相同 key 的插入,从而实现唯一索引。 +针对存在唯一索引的场景,在向 AO 表插入数据时,Apache Cloudberry 会在 AO 表的辅助索引结构 BlockDirectory 中插入 placeholder,以阻塞相同 key 的插入,从而实现唯一索引。 ## 如何在 AO 表上添加唯一索引 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/query-performance-overview.md b/i18n/zh/docusaurus-plugin-content-docs/current/query-performance-overview.md index 9219d6032b..a0de42a186 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/query-performance-overview.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/query-performance-overview.md @@ -4,16 +4,16 @@ title: 查询性能概述 # 查询性能概述 -Cloudberry Database 能动态地清除表中的无关分区,并对查询中的不同运算符进行最优内存分配。有了这些优化,数据库执行查询时会减少扫描的数据量,加速查询处理,并支持更多并发。 +Apache Cloudberry 能动态地清除表中的无关分区,并对查询中的不同运算符进行最优内存分配。有了这些优化,数据库执行查询时会减少扫描的数据量,加速查询处理,并支持更多并发。 - 动态分区消除 - 在 Cloudberry Database 中,系统会利用仅在执行查询时才能确定的值来动态地对分区进行裁剪,从而提高查询处理速度。通过设置服务器配置参数 `gp_dynamic_partition_pruning` 为 `ON` 或 `OFF` 来启用或禁用动态分区裁剪,默认情况下为 `ON`。 + 在 Apache Cloudberry 中,系统会利用仅在执行查询时才能确定的值来动态地对分区进行裁剪,从而提高查询处理速度。通过设置服务器配置参数 `gp_dynamic_partition_pruning` 为 `ON` 或 `OFF` 来启用或禁用动态分区裁剪,默认情况下为 `ON`。 - 内存优化 - Cloudberry Database 为查询中的不同运算符进行最优分配内存,在处理查询的不同阶段释放或重新分配内存。 + Apache Cloudberry 为查询中的不同运算符进行最优分配内存,在处理查询的不同阶段释放或重新分配内存。 :::tip 提示 -Cloudberry Database 默认使用 GPORCA 优化器。 GPORCA 扩展了 Postgres 优化器的规划和优化能力。 +Apache Cloudberry 默认使用 GPORCA 优化器。 GPORCA 扩展了 Postgres 优化器的规划和优化能力。 ::: diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/security/transparent-data-encryption.md b/i18n/zh/docusaurus-plugin-content-docs/current/security/transparent-data-encryption.md index d2533c9d30..58eb3c3333 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/security/transparent-data-encryption.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/security/transparent-data-encryption.md @@ -4,7 +4,7 @@ title: 透明数据加密 # 透明数据加密 -为了满足保护用户数据安全的需求,Cloudberry Database 支持透明数据加密(Transparent Data Encryption,简称 TDE)功能。 +为了满足保护用户数据安全的需求,Apache Cloudberry 支持透明数据加密(Transparent Data Encryption,简称 TDE)功能。 透明数据加密(TDE)是一种用于加密数据库数据文件的技术: @@ -32,7 +32,7 @@ title: 透明数据加密 - 对称加密:加密和解密使用相同密钥。 - 非对称加密:公钥加密,私钥解密,适合一对多、多对一的加密需求。 -对称加密中的分组加密算法是主流选择,性能优于流加密和非对称加密。Cloudberry Database 支持 AES 和 SM4 两种分组加密算法。 +对称加密中的分组加密算法是主流选择,性能优于流加密和非对称加密。Apache Cloudberry 支持 AES 和 SM4 两种分组加密算法。 #### AES 加密算法 @@ -56,10 +56,10 @@ AES 是国际标准的分组加密算法,支持 128、192 和 256 位密钥, 在使用透明数据加密(TDE)功能前,请确保满足以下条件: -- 安装 OpenSSL:Cloudberry Database 节点上必须安装 OpenSSL。通常,Linux 发行版的操作系统会自带 OpenSSL。 -- Cloudberry Database 版本:确保你的 Cloudberry Database 版本不低于 v1.6.0,该版本开始支持 TDE 功能。 +- 安装 OpenSSL:Apache Cloudberry 节点上必须安装 OpenSSL。通常,Linux 发行版的操作系统会自带 OpenSSL。 +- Apache Cloudberry 版本:确保你的 Apache Cloudberry 版本不低于 v1.6.0,该版本开始支持 TDE 功能。 -在部署 Cloudberry Database 时,可以通过设置来开启 TDE 功能,之后的所有数据加密操作将对用户完全透明。要在数据库初始化时启用 TDE,使用 `gpinitsystem` 命令并指定 `-T` 参数。Cloudberry Database 支持两种加密算法:AES 和 SM4。以下是开启 TDE 的示例: +在部署 Apache Cloudberry 时,可以通过设置来开启 TDE 功能,之后的所有数据加密操作将对用户完全透明。要在数据库初始化时启用 TDE,使用 `gpinitsystem` 命令并指定 `-T` 参数。Apache Cloudberry 支持两种加密算法:AES 和 SM4。以下是开启 TDE 的示例: - 使用 AES256 加密算法: diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/set-password-profile.md b/i18n/zh/docusaurus-plugin-content-docs/current/set-password-profile.md index b3f124c741..5c51ee97b0 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/set-password-profile.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/set-password-profile.md @@ -2,12 +2,12 @@ title: 配置密码策略 --- -# 在 Cloudberry Database 中配置密码策略(引入自 v1.5.0 版本) +# 在 Apache Cloudberry 中配置密码策略(引入自 v1.5.0 版本) -Profile,即密码策略配置,用于控制 Cloudberry Database 中用户的密码安全策略。你可以将 Profile 绑定到一个或多个用户中,从而控制数据库用户的密码安全策略。Profile 定义了用户管理和重复使用密码的规则。通过配置 Profile,数据库管理员可以使用 SQL 语句强制添加一些约束,例如在一定次数的登录失败后锁定账户,或者控制密码重复使用次数。 +Profile,即密码策略配置,用于控制 Apache Cloudberry 中用户的密码安全策略。你可以将 Profile 绑定到一个或多个用户中,从而控制数据库用户的密码安全策略。Profile 定义了用户管理和重复使用密码的规则。通过配置 Profile,数据库管理员可以使用 SQL 语句强制添加一些约束,例如在一定次数的登录失败后锁定账户,或者控制密码重复使用次数。 :::info 注意 -- 通常来说,Profile(即配置文件)分为密码策略及用户资源使用限制两部分。Cloudberry Database 中的 Profile 目前只支持密码策略,在本文档中提到的 Profile,除非另有说明,均代指 Password Profile(密码策略配置)。 +- 通常来说,Profile(即配置文件)分为密码策略及用户资源使用限制两部分。Apache Cloudberry 中的 Profile 目前只支持密码策略,在本文档中提到的 Profile,除非另有说明,均代指 Password Profile(密码策略配置)。 - 只有超级用户可以创建或修改 Profile 的策略,超级用户不受任何 Profile 策略的控制。普通用户只有在允许使用 Profile 时,Profile 的策略才会生效。 ::: @@ -23,7 +23,7 @@ gpstop -ra ## 实现原理 -类似于 Autovacuum 机制,Profile 引入了 Login Monitor Launcher 和 Login Monitor Worker 进程。当用户登录验证失败时,Cloudberry Database 会向 postmaster 发送信号量。postmaster 收到信号后会向 launcher 进程发送信号。launcher 进程在收到信号后,会通知 postmaster 拉起一个 worker 进程,worker 进程中进行元数据回写操作,并在完成后,通知用户进程和 launcher 进程。 +类似于 Autovacuum 机制,Profile 引入了 Login Monitor Launcher 和 Login Monitor Worker 进程。当用户登录验证失败时,Apache Cloudberry 会向 postmaster 发送信号量。postmaster 收到信号后会向 launcher 进程发送信号。launcher 进程在收到信号后,会通知 postmaster 拉起一个 worker 进程,worker 进程中进行元数据回写操作,并在完成后,通知用户进程和 launcher 进程。 ## 使用 SQL 语法设置密码策略 @@ -125,7 +125,7 @@ ALTER USER user ACCOUNT ## 在系统表中查看密码策略信息 -应用密码配置策略 Profile 后,Cloudberry Database 会在数据库元信息中做一些变更,即新增 `pg_profile` 和 `pg_password_history` 两张系统表,并在系统表/视图 `pg_authid` 和 `pg_roles` 添加了部分字段。示例如下: +应用密码配置策略 Profile 后,Apache Cloudberry 会在数据库元信息中做一些变更,即新增 `pg_profile` 和 `pg_password_history` 两张系统表,并在系统表/视图 `pg_authid` 和 `pg_roles` 添加了部分字段。示例如下: - **pg_catalog.pg_roles**:在 `pg_roles` 下,新增了 `rolprofile`、`rolaccountstatus`、`rolfailedlogins` 字段,分别记录应用 Profile 的数据库用户、账户状态、登录失败的次数。 @@ -260,7 +260,7 @@ ALTER USER user ACCOUNT ## 默认密码策略 -创建新用户时,如果没有指定具体的密码策略,Cloudberry Database 默认为该用户应用 Default Profile,即系统初始化时生成的默认密码策略。Cloudberry Database 中的 Default Profile 为 `pg_profile` 表中的 `pg_default` 行。`pg_default` 定义了 Profile 参数的默认值,只有超级用户可以对这些限制进行更新。 +创建新用户时,如果没有指定具体的密码策略,Apache Cloudberry 默认为该用户应用 Default Profile,即系统初始化时生成的默认密码策略。Apache Cloudberry 中的 Default Profile 为 `pg_profile` 表中的 `pg_default` 行。`pg_default` 定义了 Profile 参数的默认值,只有超级用户可以对这些限制进行更新。 如果用户设置的 Profile 中,有值为 `-1`(即使用默认值)的参数,这些参数会从 `pg_default` 中得到具体的值。`pg_default` 的默认值如下所示。如何使用 Default Profile 可以参考[使用场景三](#场景三使用-default-profile-的设置)。 @@ -441,7 +441,7 @@ ALTER USER myuser PASSWORD 'mypassword'; ## 场景三:使用 DEFAULT PROFILE 的设置 -创建一个新的 Profile 时,如果不显式指定参数值,那么该 Profile 在 `pg_profile` 表中对应的参数值为 `-1`,表示 Cloudberry Database 会从 `pg_default` 中获取该参数的实际值。 +创建一个新的 Profile 时,如果不显式指定参数值,那么该 Profile 在 `pg_profile` 表中对应的参数值为 `-1`,表示 Apache Cloudberry 会从 `pg_default` 中获取该参数的实际值。 以下以 `FAILED_LOGIN_ATTEMPTS` 为例: diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-abort.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-abort.md index 4b11e99f7e..e1b71a08f0 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-abort.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-abort.md @@ -29,7 +29,7 @@ ABORT [WORK | TRANSACTION] [AND [NO] CHAIN] ## 注意事项 -使用 [`COMMIT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/commit.md) 成功终止事务。 +使用 [`COMMIT`](https://github.com/apache/cloudberry-site/blob/cbdb-doc-validation/docs/sql-stmts/commit.md) 成功终止事务。 在事务块之外执行 `ABORT` 会报错,而且不会生效。 @@ -43,7 +43,7 @@ ABORT; ## 兼容性 -该命令是 Cloudberry Database 的扩展,仅出于历史原因而存在。`ROLLBACK` 是等效的标准 SQL 命令。 +该命令是 Apache Cloudberry 的扩展,仅出于历史原因而存在。`ROLLBACK` 是等效的标准 SQL 命令。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-database.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-database.md index e63d2e36dc..0aa1a8f2fd 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-database.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-database.md @@ -39,9 +39,9 @@ ALTER DATABASE RESET ALL 第三种形式更改数据库的所有者。要更改所有者,你必须拥有该数据库,并且是新所有者角色的直接或间接成员,你还必须有 `CREATEDB` 权限。注意,超级用户自动拥有所有这些权限。 -第四种形式更改数据库的默认表空间。只有数据库所有者或超级用户才能这样操作。你还必须对新表空间具有创建权限。执行该命令,Cloudberry Database 会将旧的默认表空间中所有的表或索引物理移动到新的表空间。新的默认表空间必须为空,且无人可以连接到该数据库。请注意,非默认表空间中的表和索引不受影响。 +第四种形式更改数据库的默认表空间。只有数据库所有者或超级用户才能这样操作。你还必须对新表空间具有创建权限。执行该命令,Apache Cloudberry 会将旧的默认表空间中所有的表或索引物理移动到新的表空间。新的默认表空间必须为空,且无人可以连接到该数据库。请注意,非默认表空间中的表和索引不受影响。 -其它的形式更改 Cloudberry Database 配置参数的会话默认值。执行命令后,在该数据库中启动新会话时,指定的值将成为会话的默认值。数据库特定的默认值会覆盖服务器配置文件(`postgresql.conf`)中的设置。只有数据库所有者或超级用户才能更改数据库的会话默认值。某些参数不能以这种方式设置,或者只能由超级用户设置。 +其它的形式更改 Apache Cloudberry 配置参数的会话默认值。执行命令后,在该数据库中启动新会话时,指定的值将成为会话的默认值。数据库特定的默认值会覆盖服务器配置文件(`postgresql.conf`)中的设置。只有数据库所有者或超级用户才能更改数据库的会话默认值。某些参数不能以这种方式设置,或者只能由超级用户设置。 ## 参数 @@ -55,7 +55,7 @@ ALTER DATABASE RESET ALL **`connlimit`** -Coordinator 上允许连接到该数据库的最大并发连接数。默认值为 `-1`,表示无限制。Cloudberry Database 超级用户不受此限制。 +Coordinator 上允许连接到该数据库的最大并发连接数。默认值为 `-1`,表示无限制。Apache Cloudberry 超级用户不受此限制。 **`istemplate`** @@ -101,7 +101,7 @@ ALTER DATABASE mydatabase SET search_path TO myschema, public, pg_catalog; ## 兼容性 -`ALTER DATABASE` 语句是 Cloudberry Database 的扩展。 +`ALTER DATABASE` 语句是 Apache Cloudberry 的扩展。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-rule.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-rule.md index a6d72016b4..46bb2a3415 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-rule.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-alter-rule.md @@ -42,7 +42,7 @@ ALTER RULE notify_all ON emp RENAME TO notify_me; ## 兼容性 -`ALTER RULE` 是 Cloudberry Database 的扩展,整个查询重写系统也是如此。 +`ALTER RULE` 是 Apache Cloudberry 的扩展,整个查询重写系统也是如此。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-database.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-database.md index 91cd76230f..a8f2fbfed1 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-database.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-database.md @@ -26,7 +26,7 @@ CREATE DATABASE name 默认情况下,数据库的创建者为新数据库的所有者。超级用户可以使用 `OWNER` 子句创建其他用户拥有的数据库。超级用户甚至可以创建无特殊权限的用户拥有的数据库。拥有 `CREATEDB` 权限的非超级用户只能创建自己拥有的数据库。 -默认情况下,新数据库将通过克隆系统数据库 `template1` 来创建。可以通过在命令中使用 `TEMPLATE name` 来指定不同的模板。特别地,在命令中使用 `TEMPLATE template0` 可以创建一个仅包含 Cloudberry Database 预定义标准对象的干净数据库。如果你希望避免将那些可能已添加到 `template1` 的任何安装本地对象复制到新数据库,使用该选项将非常有用。 +默认情况下,新数据库将通过克隆系统数据库 `template1` 来创建。可以通过在命令中使用 `TEMPLATE name` 来指定不同的模板。特别地,在命令中使用 `TEMPLATE template0` 可以创建一个仅包含 Apache Cloudberry 预定义标准对象的干净数据库。如果你希望避免将那些可能已添加到 `template1` 的任何安装本地对象复制到新数据库,使用该选项将非常有用。 ## 参数 @@ -52,7 +52,7 @@ CREATE DATABASE name **`connlimit`** -Coordinator 上允许连接到该数据库的最大并发连接数。默认值为 `-1`,表示没有限制。Cloudberry Database 超级用户不受此限制。 +Coordinator 上允许连接到该数据库的最大并发连接数。默认值为 `-1`,表示没有限制。Apache Cloudberry 超级用户不受此限制。 **`istemplate`** diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-index.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-index.md index b499605d2f..c4dff59357 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-index.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-create-index.md @@ -25,7 +25,7 @@ CREATE [UNIQUE] INDEX [[IF NOT EXISTS] ] ON [ONLY] [USING ## 描述 -`CREATE TABLESPACE` 为你的 Cloudberry Database 系统注册并配置一个新的表空间。表空间的名称必须与系统中任何现有表空间的名称不同。表空间是 Cloudberry Database 系统对象(全局对象),如果你有足够的权限,你可以从任何数据库使用表空间。 +`CREATE TABLESPACE` 为你的 Apache Cloudberry 系统注册并配置一个新的表空间。表空间的名称必须与系统中任何现有表空间的名称不同。表空间是 Apache Cloudberry 系统对象(全局对象),如果你有足够的权限,你可以从任何数据库使用表空间。 超级用户可以定义一个替代的主机文件系统位置,用于存放包含数据库对象(如表和索引)的数据文件。 -拥有适当权限的用户可以将 tablespace_name 传递给 [`CREATE DATABASE`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/create-database.md)、[`CREATE TABLE`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/create-table.md) 或 [`CREATE INDEX`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/create-index.md),以此指示 Cloudberry Database 将这些对象的数据文件存储在指定的表空间中。 +拥有适当权限的用户可以将 tablespace_name 传递给 [`CREATE DATABASE`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/create-database.md)、[`CREATE TABLE`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/create-table.md) 或 [`CREATE INDEX`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/create-index.md),以此指示 Apache Cloudberry 将这些对象的数据文件存储在指定的表空间中。 -在 Cloudberry Database 中,文件系统位置必须存在于所有主机上,包括运行 Coordinator、备用镜像、每个主要 Segment 和每个镜像 Segment 的主机。 +在 Apache Cloudberry 中,文件系统位置必须存在于所有主机上,包括运行 Coordinator、备用镜像、每个主要 Segment 和每个镜像 Segment 的主机。 ## 参数 @@ -37,15 +37,15 @@ CREATE TABLESPACE **`LOCATION 'directory'`** -将被用作表空间的目录,该目录应为空,并需由 Cloudberry Database 系统用户所拥有。您需要提供目录的绝对路径,且路径名长度不得超过 100 个字符。(这个路径用于在 `pg_tblspc` 目录下创建一个符号链接目标。当使用 `pg_basebackup` 等工具将符号链接目标发送至 tar 时,路径会被截短至 100 个字符。) +将被用作表空间的目录,该目录应为空,并需由 Apache Cloudberry 系统用户所拥有。您需要提供目录的绝对路径,且路径名长度不得超过 100 个字符。(这个路径用于在 `pg_tblspc` 目录下创建一个符号链接目标。当使用 `pg_basebackup` 等工具将符号链接目标发送至 tar 时,路径会被截短至 100 个字符。) -你可以为 `WITH` 子句中的任何 Cloudberry Database Segment 实例指定不同的表空间目录。 +你可以为 `WITH` 子句中的任何 Apache Cloudberry Segment 实例指定不同的表空间目录。 **`contentID_i='directory_i'`** `contentID_i='directory_i'` 中的 `ID_i` 是 Segment 实例的 content ID。`directory_i` 是主机系统文件位置的绝对路径,Segment 实例将该路径用作表空间的根目录。你不能指定 Coordinator 实例的 content ID(`-1`)。你可以为多个 Segment 指定相同的目录。 -如果一个 Segment 实例没有在 `WITH` 子句中列出,Cloudberry Database 将使用 `LOCATION` 子句中指定的表空间目录。 +如果一个 Segment 实例没有在 `WITH` 子句中列出,Apache Cloudberry 将使用 `LOCATION` 子句中指定的表空间目录。 对于 `LOCATION` 目录的限制也适用于 `directory_i`。 @@ -55,14 +55,14 @@ CREATE TABLESPACE ## 注意事项 -因为 `CREATE TABLESPACE` 命令会在 Coordinator 和 Segment 实例的数据目录下的 `pg_tblspc` 目录创建到指定目录的符号链接,Cloudberry Database 的表空间功能只支持那些能够使用符号链接的系统。 +因为 `CREATE TABLESPACE` 命令会在 Coordinator 和 Segment 实例的数据目录下的 `pg_tblspc` 目录创建到指定目录的符号链接,Apache Cloudberry 的表空间功能只支持那些能够使用符号链接的系统。 你不能在事务块中执行 `CREATE TABLESPACE`。 在创建表空间时,请确保文件系统位置有足够的 I/O 速度和可用磁盘空间。 :::info 注意 -Cloudberry Database 不支持为具有相同 content ID 的主-镜像对配置不同的表空间位置。只能为不同的 content ID 配置不同的位置。请勿修改 `pg_tblspc` 目录下的符号链接,以此使得主-镜像对指向不同的文件位置;这会导致错误行为。 +Apache Cloudberry 不支持为具有相同 content ID 的主-镜像对配置不同的表空间位置。只能为不同的 content ID 配置不同的位置。请勿修改 `pg_tblspc` 目录下的符号链接,以此使得主-镜像对指向不同的文件位置;这会导致错误行为。 ::: ## 示例 @@ -89,7 +89,7 @@ CREATE TABLESPACE mytblspace LOCATION '/mydbtspc/mytestspace' WITH (content0='/t ## 兼容性 -`CREATE TABLESPACE` 是 Cloudberry Database 的扩展。 +`CREATE TABLESPACE` 是 Apache Cloudberry 的扩展。 ## See also diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-database.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-database.md index 91b789c840..7e49ad188d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-database.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-database.md @@ -24,7 +24,7 @@ DROP DATABASE [IF EXISTS] **`IF EXISTS`** -如果数据库不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果数据库不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-extension.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-extension.md index 659a328931..f21f8a31e4 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-extension.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-extension.md @@ -4,7 +4,7 @@ title: DROP EXTENSION # DROP EXTENSION -从 Cloudberry Database 中移除一个扩展。 +从 Apache Cloudberry 中移除一个扩展。 ## 语法概要 @@ -17,7 +17,7 @@ DROP EXTENSION [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] `DROP EXTENSION` 从数据库中移除一个扩展,同时也会移除扩展的组件对象。 :::info 注意 -原本用于创建扩展的支持文件(例如库和 `.control` 文件)不会被删除。你需要手动从 Cloudberry Database 主机上移除这些文件。 +原本用于创建扩展的支持文件(例如库和 `.control` 文件)不会被删除。你需要手动从 Apache Cloudberry 主机上移除这些文件。 ::: 要使用 `DROP EXTENSION`,你必须是扩展的所有者。 @@ -26,7 +26,7 @@ DROP EXTENSION [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`IF EXISTS`** -如果扩展不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果扩展不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -42,7 +42,7 @@ DROP EXTENSION [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`RESTRICT`** -如果有对象依赖于该扩展(除了其自身的成员对象和同一 `DROP` 命令中列出的其他扩展),Cloudberry Database 会拒绝删除该扩展。这是默认行为。 +如果有对象依赖于该扩展(除了其自身的成员对象和同一 `DROP` 命令中列出的其他扩展),Apache Cloudberry 会拒绝删除该扩展。这是默认行为。 ## 示例 @@ -56,7 +56,7 @@ DROP EXTENSION hstore; ## 兼容性 -`DROP EXTENSION` 语句是 Cloudberry Database 的扩展,不在 SQL 标准中。 +`DROP EXTENSION` 语句是 Apache Cloudberry 的扩展,不在 SQL 标准中。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-external-table.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-external-table.md index af117c7f47..919f6a54ca 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-external-table.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-external-table.md @@ -24,7 +24,7 @@ DROP EXTERNAL [WEB] TABLE [IF EXISTS] [CASCADE | RESTRICT] **`IF EXISTS`** -如果外部表不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果外部表不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -36,7 +36,7 @@ DROP EXTERNAL [WEB] TABLE [IF EXISTS] [CASCADE | RESTRICT] **`RESTRICT`** -如果有任何对象依赖于外部表,Cloudberry Database 会拒绝删除外部表。这是默认行为。 +如果有任何对象依赖于外部表,Apache Cloudberry 会拒绝删除外部表。这是默认行为。 ## 示例 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-index.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-index.md index 4dab85a188..02042b738c 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-index.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-index.md @@ -26,7 +26,7 @@ DROP INDEX [ CONCURRENTLY ] [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`IF EXISTS`** -如果索引不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果索引不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -38,7 +38,7 @@ DROP INDEX [ CONCURRENTLY ] [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`RESTRICT`** -如果有任何对象依赖于索引,Cloudberry Database 会拒绝删除索引。这是默认行为。 +如果有任何对象依赖于索引,Apache Cloudberry 会拒绝删除索引。这是默认行为。 ## 示例 @@ -50,7 +50,7 @@ DROP INDEX title_idx; ## 兼容性 -`DROP INDEX` 是 Cloudberry Database 的语言扩展。SQL 标准中没有关于索引的规定。 +`DROP INDEX` 是 Apache Cloudberry 的语言扩展。SQL 标准中没有关于索引的规定。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-materialized-view.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-materialized-view.md index 28515d7bb7..7f73a489c3 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-materialized-view.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-materialized-view.md @@ -20,7 +20,7 @@ DROP MATERIALIZED VIEW [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`IF EXISTS`** -如果物化视图不存在,则不抛出错误。此时 Cloudberry Database 会发出一个提醒。 +如果物化视图不存在,则不抛出错误。此时 Apache Cloudberry 会发出一个提醒。 **`name`** @@ -32,7 +32,7 @@ DROP MATERIALIZED VIEW [ IF EXISTS ] [, ...] [ CASCADE | RESTRICT ] **`RESTRICT`** -如果有任何对象依赖于物化视图,Cloudberry Database 会拒绝删除物化视图。这是默认行为。 +如果有任何对象依赖于物化视图,Apache Cloudberry 会拒绝删除物化视图。这是默认行为。 ## 示例 @@ -44,7 +44,7 @@ DROP MATERIALIZED VIEW order_summary; ## 兼容性 -`DROP MATERIALIZED VIEW` 是 Cloudberry Database 的语言扩展,在 SQL 标准中没有。 +`DROP MATERIALIZED VIEW` 是 Apache Cloudberry 的语言扩展,在 SQL 标准中没有。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-role.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-role.md index 636dcd3484..d59fe97a54 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-role.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-role.md @@ -40,7 +40,7 @@ DROP ROLE sally, bob; ## 兼容性 -SQL 标准定义了 `DROP ROLE`,但一次只允许删除一个角色,并且它规定的权限要求与 Cloudberry Database 使用的不同。 +SQL 标准定义了 `DROP ROLE`,但一次只允许删除一个角色,并且它规定的权限要求与 Apache Cloudberry 使用的不同。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-rule.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-rule.md index 6c0aab2f70..24c6c27339 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-rule.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-rule.md @@ -20,7 +20,7 @@ DROP RULE [IF EXISTS] ON [CASCADE | RESTRICT] **`IF EXISTS`** -如果规则不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果规则不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -36,7 +36,7 @@ DROP RULE [IF EXISTS] ON [CASCADE | RESTRICT] **`RESTRICT`** -如果有对象依赖于规则,Cloudberry Database 会拒绝删除规则。这是默认值。 +如果有对象依赖于规则,Apache Cloudberry 会拒绝删除规则。这是默认值。 ## 示例 @@ -48,7 +48,7 @@ DROP RULE sales_2006 ON sales; ## 兼容性 -`DROP RULE` 是 Cloudberry Database 的扩展,整个查询重写系统也是如此。 +`DROP RULE` 是 Apache Cloudberry 的扩展,整个查询重写系统也是如此。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-schema.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-schema.md index 76199f4630..824e4fc244 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-schema.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-schema.md @@ -22,7 +22,7 @@ Schema 只能由其所有者或超级用户删除。请注意,即使所有者 **`IF EXISTS`** -如果 schema 不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果 schema 不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -34,7 +34,7 @@ Schema 只能由其所有者或超级用户删除。请注意,即使所有者 **`RESTRICT`** -如果 schema 包含任何对象,Cloudberry Database 会拒绝删除 schema。这是默认行为。 +如果 schema 包含任何对象,Apache Cloudberry 会拒绝删除 schema。这是默认行为。 ## 注意事项 @@ -50,7 +50,7 @@ DROP SCHEMA mystuff CASCADE; ## 兼容性 -`DROP SCHEMA` 是完全符合 SQL 标准的,但该标准只允许一次删除一个 schema。此外,`IF EXISTS` 选项是 Cloudberry Database 的扩展。 +`DROP SCHEMA` 是完全符合 SQL 标准的,但该标准只允许一次删除一个 schema。此外,`IF EXISTS` 选项是 Apache Cloudberry 的扩展。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-table.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-table.md index 21f0fcbbba..44d534735d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-table.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-table.md @@ -22,7 +22,7 @@ DROP TABLE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -如果表不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果表不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -34,7 +34,7 @@ DROP TABLE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`RESTRICT`** -如果有对象依赖于表,Cloudberry Database 会拒绝删除表。这是默认行为。 +如果有对象依赖于表,Apache Cloudberry 会拒绝删除表。这是默认行为。 ## 示例 @@ -52,7 +52,7 @@ DROP TABLE films, distributors; ## 兼容性 -`DROP TABLE` 符合 SQL 标准,但该标准只允许一次删除一个表。此外,`IF EXISTS` 选项是 Cloudberry Database 的扩展。 +`DROP TABLE` 符合 SQL 标准,但该标准只允许一次删除一个表。此外,`IF EXISTS` 选项是 Apache Cloudberry 的扩展。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-tablespace.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-tablespace.md index ac3b7a3e09..9285630e48 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-tablespace.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-tablespace.md @@ -22,7 +22,7 @@ DROP TABLESPACE [IF EXISTS] **`IF EXISTS`** -如果表空间不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果表空间不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -32,7 +32,7 @@ DROP TABLESPACE [IF EXISTS] 不能在事务块中执行 `DROP TABLESPACE`。 -建议在系统活动较低的时段执行 `DROP TABLESPACE` 命令,这样可以避免因表和临时对象的并发创建引发的问题。在删除表空间的过程中,存在一个极短的时间窗口,可能会有新的表被创建于即将删除的表空间内。如果出现这种情况,Cloudberry Database 将会发出警告。这是一个 `DROP TABLESPACE` 警告的示例。 +建议在系统活动较低的时段执行 `DROP TABLESPACE` 命令,这样可以避免因表和临时对象的并发创建引发的问题。在删除表空间的过程中,存在一个极短的时间窗口,可能会有新的表被创建于即将删除的表空间内。如果出现这种情况,Apache Cloudberry 将会发出警告。这是一个 `DROP TABLESPACE` 警告的示例。 ```sql testdb=# DROP TABLESPACE mytest; @@ -54,7 +54,7 @@ DROP TABLESPACE mystuff; ## 兼容性 -`DROP TABLESPACE` 是 Cloudberry Database 的扩展。 +`DROP TABLESPACE` 是 Apache Cloudberry 的扩展。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-type.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-type.md index c5c6a1631b..94485a4c7d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-type.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-type.md @@ -20,7 +20,7 @@ DROP TYPE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -如果要删除的数据类型不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果要删除的数据类型不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -32,7 +32,7 @@ DROP TYPE [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`RESTRICT`** -如果有对象依赖于要删除的数据类型,Cloudberry Database 会拒绝删除该数据类型。这是默认行为。 +如果有对象依赖于要删除的数据类型,Apache Cloudberry 会拒绝删除该数据类型。这是默认行为。 ## 示例 @@ -44,7 +44,7 @@ DROP TYPE box; ## 兼容性 -除了 `IF EXISTS` 选项是 Cloudberry Database 的扩展之外,Cloudberry Database 中的 `DROP TYPE` 命令与 SQL 标准中的 `DROP TYPE` 命令类似。但请注意,Cloudberry Database 中的 `CREATE TYPE` 命令和数据类型扩展机制与 SQL 标准有很大不同。 +除了 `IF EXISTS` 选项是 Apache Cloudberry 的扩展之外,Apache Cloudberry 中的 `DROP TYPE` 命令与 SQL 标准中的 `DROP TYPE` 命令类似。但请注意,Apache Cloudberry 中的 `CREATE TYPE` 命令和数据类型扩展机制与 SQL 标准有很大不同。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-user.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-user.md index 2305c5ddc1..a9a21e5cdb 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-user.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-user.md @@ -18,7 +18,7 @@ DROP USER [IF EXISTS] [, ...] ## 兼容性 -`DROP USER` 命令是 Cloudberry Database 特别添加的功能,并非 SQL 标准中的内容。SQL 标准没有具体规定用户的定义方式,这是由各个数据库系统自己决定的。 +`DROP USER` 命令是 Apache Cloudberry 特别添加的功能,并非 SQL 标准中的内容。SQL 标准没有具体规定用户的定义方式,这是由各个数据库系统自己决定的。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-view.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-view.md index b0c7af1e95..aa5433c499 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-view.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-drop-view.md @@ -20,7 +20,7 @@ DROP VIEW [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`IF EXISTS`** -如果要删除的视图不存在,则不抛出错误。在这种情况下,Cloudberry Database 会发出一个提醒。 +如果要删除的视图不存在,则不抛出错误。在这种情况下,Apache Cloudberry 会发出一个提醒。 **`name`** @@ -32,7 +32,7 @@ DROP VIEW [IF EXISTS] [, ...] [CASCADE | RESTRICT] **`RESTRICT`** -如果有对象依赖于该视图,Cloudberry Database 会拒绝删除该视图。这是默认行为。 +如果有对象依赖于该视图,Apache Cloudberry 会拒绝删除该视图。这是默认行为。 ## 示例 @@ -44,7 +44,7 @@ DROP VIEW topten; ## 兼容性 -`DROP VIEW` 完全符合 SQL 标准,但该标准只允许一次命令删除一个视图。此外,`IF EXISTS` 选项是 Cloudberry Database 的扩展。 +`DROP VIEW` 完全符合 SQL 标准,但该标准只允许一次命令删除一个视图。此外,`IF EXISTS` 选项是 Apache Cloudberry 的扩展。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-end.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-end.md index b4846a6849..60b80494bb 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-end.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-end.md @@ -14,7 +14,7 @@ END [WORK | TRANSACTION] [AND [NO] CHAIN] ## 描述 -`END` 提交当前事务。事务中的所有更改对其他用户可见,并且在发生崩溃时保证是持久的。该命令是 Cloudberry Database 的扩展,等效于 [`COMMIT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/commit.md)。 +`END` 提交当前事务。事务中的所有更改对其他用户可见,并且在发生崩溃时保证是持久的。该命令是 Apache Cloudberry 的扩展,等效于 [`COMMIT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/commit.md)。 ## 参数 @@ -43,7 +43,7 @@ END; ## 兼容性 -`END` 是 Cloudberry Database 的扩展,提供了与 SQL 标准中的 [`COMMIT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/commit.md) 等效的功能。 +`END` 是 Apache Cloudberry 的扩展,提供了与 SQL 标准中的 [`COMMIT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/commit.md) 等效的功能。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback-to-savepoint.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback-to-savepoint.md index 3f92b55bd4..ba916c8ccf 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback-to-savepoint.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback-to-savepoint.md @@ -67,7 +67,7 @@ COMMIT; ## 兼容性 -SQL 标准规定关键字 `SAVEPOINT` 是必须的,但 Cloudberry Database(以及 Oracle)允许它被省略。SQL 只允许在 `ROLLBACK` 之后使用 `WORK`,而不是 `TRANSACTION`。此外,SQL 还有一个可选的 `AND [NO] CHAIN` 子句,Cloudberry Database 目前不支持。除此之外,该命令符合 SQL 标准。 +SQL 标准规定关键字 `SAVEPOINT` 是必须的,但 Apache Cloudberry(以及 Oracle)允许它被省略。SQL 只允许在 `ROLLBACK` 之后使用 `WORK`,而不是 `TRANSACTION`。此外,SQL 还有一个可选的 `AND [NO] CHAIN` 子句,Apache Cloudberry 目前不支持。除此之外,该命令符合 SQL 标准。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback.md index 037350b0be..e4448b8016 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-rollback.md @@ -43,7 +43,7 @@ ROLLBACK; ## 兼容性 -`ROLLBACK` 命令符合 SQL 标准。`ROLLBACK TRANSACTION` 是 Cloudberry Database 的扩展。 +`ROLLBACK` 命令符合 SQL 标准。`ROLLBACK TRANSACTION` 是 Apache Cloudberry 的扩展。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-savepoint.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-savepoint.md index 3817bf3ef6..84340767c4 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-savepoint.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-savepoint.md @@ -86,7 +86,7 @@ COMMIT; ## 兼容性 -SQL 标准要求新的保存点建立时,自动销毁同名的旧保存点。在 Cloudberry Database 中,旧保存点会被保留,但只有最新的保存点会在回滚或释放时被使用。释放最新的保存点会使旧的保存点再次可被 [`ROLLBACK TO SAVEPOINT`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/rollback-to-savepoint.md) 和 [`RELEASE SAVEPOINT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/release-savepoint.md) 访问。除此之外,`SAVEPOINT` 完全符合 SQL 标准。 +SQL 标准要求新的保存点建立时,自动销毁同名的旧保存点。在 Apache Cloudberry 中,旧保存点会被保留,但只有最新的保存点会在回滚或释放时被使用。释放最新的保存点会使旧的保存点再次可被 [`ROLLBACK TO SAVEPOINT`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/rollback-to-savepoint.md) 和 [`RELEASE SAVEPOINT`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/release-savepoint.md) 访问。除此之外,`SAVEPOINT` 完全符合 SQL 标准。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-role.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-role.md index 09605249da..606cab6206 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-role.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-role.md @@ -70,7 +70,7 @@ SELECT SESSION_USER, CURRENT_USER; ## 兼容性 -Cloudberry Database 允许使用标识符语法(rolename),而 SQL 标准要求角色名称必须写成字符串字面量。SQL 标准不允许在事务中执行 `SET ROLE` 命令;Cloudberry Database 不做此限制。`SESSION` 和 `LOCAL` 修饰符是 Cloudberry Database 的扩展,`RESET` 语法也是如此。 +Apache Cloudberry 允许使用标识符语法(rolename),而 SQL 标准要求角色名称必须写成字符串字面量。SQL 标准不允许在事务中执行 `SET ROLE` 命令;Apache Cloudberry 不做此限制。`SESSION` 和 `LOCAL` 修饰符是 Apache Cloudberry 的扩展,`RESET` 语法也是如此。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-transaction.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-transaction.md index 23df7cbbc6..a7d36250aa 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-transaction.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-set-transaction.md @@ -31,7 +31,7 @@ SET SESSION CHARACTERISTICS AS TRANSACTION [, ...] 事务的可设置属性包括事务的隔离等级、访问模式(读/写或仅读)以及是否可延迟。另外,虽然可以为当前事务选择一个快照,但这不可作为会话的默认设置。 :::info -需要事务可序列化时,才能将其设定为可延迟。由于 Cloudberry Database 不支持可序列化的事务,因此使用 `DEFERRABLE` 选项并不会产生任何效果。 +需要事务可序列化时,才能将其设定为可延迟。由于 Apache Cloudberry 不支持可序列化的事务,因此使用 `DEFERRABLE` 选项并不会产生任何效果。 ::: 一个事务的隔离级别决定了在其他事务并发运行时,该事务能看到的数据。 @@ -39,7 +39,7 @@ SET SESSION CHARACTERISTICS AS TRANSACTION [, ...] - **READ COMMITTED**:一个语句只能看到在它开始之前提交的行。这是默认行为。 - **REPEATABLE READ**:当前事务中的所有语句只能看到在事务中执行的第一个查询或数据修改语句之前提交的行。 -SQL 标准定义了另外两个隔离级别,`READ UNCOMMITTED` 和 `SERIALIZABLE`。在 Cloudberry Database 中,`READ UNCOMMITTED` 被视为 `READ COMMITTED`。如果你指定了 `SERIALIZABLE`,Cloudberry Database 会回退到 `REPEATABLE READ`。 +SQL 标准定义了另外两个隔离级别,`READ UNCOMMITTED` 和 `SERIALIZABLE`。在 Apache Cloudberry 中,`READ UNCOMMITTED` 被视为 `READ COMMITTED`。如果你指定了 `SERIALIZABLE`,Apache Cloudberry 会回退到 `REPEATABLE READ`。 事务隔离级别在事务的第一个查询或数据修改语句(`SELECT`、`INSERT`、`DELETE`、`UPDATE`、`FETCH` 或 `COPY`)执行后不能再更改。 @@ -50,13 +50,13 @@ SQL 标准定义了另外两个隔离级别,`READ UNCOMMITTED` 和 `SERIALIZAB - 不允许 `COMMENT`、`GRANT`、`REVOKE`、`TRUNCATE`。 - 如果 `EXPLAIN ANALYZE` 和 `EXECUTE` 要运行的命令在上述列表中,则不允许 `EXPLAIN ANALYZE` 和 `EXECUTE`。这是一个高级别的只读概念,它并不阻止所有对磁盘的写入。 -`DEFERRABLE` 事务属性仅在事务同时设置为 `SERIALIZABLE` 和 `READ ONLY` 时才有效。当事务配置了这些属性后,在最初获取快照时可能会暂时阻塞,但随后能够继续执行,既避免了 `SERIALIZABLE` 事务常见的开销,也不会因为序列化错误而被取消或引起问题。这种模式特别适合执行长时间的报表生成或数据备份。由于 Cloudberry Database 不支持序列化事务,`DEFERRABLE` 属性在 Cloudberry Database 中不会产生任何效果。 +`DEFERRABLE` 事务属性仅在事务同时设置为 `SERIALIZABLE` 和 `READ ONLY` 时才有效。当事务配置了这些属性后,在最初获取快照时可能会暂时阻塞,但随后能够继续执行,既避免了 `SERIALIZABLE` 事务常见的开销,也不会因为序列化错误而被取消或引起问题。这种模式特别适合执行长时间的报表生成或数据备份。由于 Apache Cloudberry 不支持序列化事务,`DEFERRABLE` 属性在 Apache Cloudberry 中不会产生任何效果。 `SET TRANSACTION SNAPSHOT` 命令允许新事务采用与现有事务相同的快照来执行。已有事务必须已通过 `pg_export_snapshot()` 函数导出其快照。此函数返回一个快照标识符,该标识符需传递给 `SET TRANSACTION SNAPSHOT` 来指定将导入哪个快照。标识符必须以字符串字面量形式指定,例如 `'000003A1-1'`。`SET TRANSACTION SNAPSHOT` 必须在事务开始时、执行第一个查询或数据修改指令(如 `SELECT`、`INSERT`、`DELETE`、`UPDATE`、`FETCH` 或 `COPY`)之前调用。同时,事务应已设置为 `SERIALIZABLE` 或 `REPEATABLE READ` 隔离级别(否则,因为 `READ COMMITTED` 模式会为每个指令取得新快照,导致快照立即失效)。如果导入快照的事务采用 `SERIALIZABLE` 隔离级别,则导出快照的事务也必须使用相同隔离级别。另外,非只读的序列化事务不能导入来自只读事务的快照。 ## 注意事项 -如果在执行 `SET TRANSACTION` 前没有先执行 [`START TRANSACTION`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/start-transaction.md) 或 [`BEGIN`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/begin.md),Cloudberry Database 会发出警告,但不会产生其他影响。 +如果在执行 `SET TRANSACTION` 前没有先执行 [`START TRANSACTION`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/start-transaction.md) 或 [`BEGIN`](https://github.com/cloudberrydb/cloudberrydb-site/blob/cbdb-doc-validation/docs/sql-stmts/begin.md),Apache Cloudberry 会发出警告,但不会产生其他影响。 可以通过在 `BEGIN` 或 `START TRANSACTION` 中指定所需的 `transaction_modes` 来省去 `SET TRANSACTION`。但该选项不适用于 `SET TRANSACTION SNAPSHOT`。 @@ -86,13 +86,13 @@ SET TRANSACTION SNAPSHOT '00000003-0000001B-1'; ## 兼容性 -除了 `DEFERRABLE` 事务模式和 `SET TRANSACTION SNAPSHOT` 外,`SET TRANSACTION` 命令在 SQL 标准中有定义。`DEFERRABLE` 事务模式和 `SET TRANSACTION SNAPSHOT` 形式是 Cloudberry Database 的扩展。 +除了 `DEFERRABLE` 事务模式和 `SET TRANSACTION SNAPSHOT` 外,`SET TRANSACTION` 命令在 SQL 标准中有定义。`DEFERRABLE` 事务模式和 `SET TRANSACTION SNAPSHOT` 形式是 Apache Cloudberry 的扩展。 -在 SQL 标准中,`SERIALIZABLE` 是默认的事务隔离级别。在 Cloudberry Database 中,默认的是 `READ COMMITTED`。由于缺乏谓词锁定,Cloudberry Database 不完全支持 `SERIALIZABLE` 级别,因此当指定 `SERIALIZABLE` 时,它会回退到 `REPEATABLE READ` 级别。本质上,谓词锁定系统通过限制写入内容来防止幻读,而 Cloudberry Database 中使用的多版本并发控制模型(MVCC)通过限制读取内容来防止幻读。 +在 SQL 标准中,`SERIALIZABLE` 是默认的事务隔离级别。在 Apache Cloudberry 中,默认的是 `READ COMMITTED`。由于缺乏谓词锁定,Apache Cloudberry 不完全支持 `SERIALIZABLE` 级别,因此当指定 `SERIALIZABLE` 时,它会回退到 `REPEATABLE READ` 级别。本质上,谓词锁定系统通过限制写入内容来防止幻读,而 Apache Cloudberry 中使用的多版本并发控制模型(MVCC)通过限制读取内容来防止幻读。 -在 SQL 标准中,还有一个事务特性可以通过这些 `SET TRANSACTION` 命令来设置:诊断区域的大小。这个概念是特定于嵌入式 SQL 的,因此并未在 Cloudberry Database 服务器中实现。 +在 SQL 标准中,还有一个事务特性可以通过这些 `SET TRANSACTION` 命令来设置:诊断区域的大小。这个概念是特定于嵌入式 SQL 的,因此并未在 Apache Cloudberry 服务器中实现。 -SQL 标准要求在连续的 `transaction_modes` 之间使用逗号,但由于历史原因,Cloudberry Database 允许省略逗号。 +SQL 标准要求在连续的 `transaction_modes` 之间使用逗号,但由于历史原因,Apache Cloudberry 允许省略逗号。 ## 另见 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-start-transaction.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-start-transaction.md index ee2944ba3b..5e9754035d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-start-transaction.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-start-transaction.md @@ -28,11 +28,11 @@ START TRANSACTION [] [, ...] ## 兼容性 -在标准 SQL 中,不需要使用 `START TRANSACTION` 来开启一个事务块:任何 SQL 命令都会隐式地开启一个事务块。Cloudberry Database 的行为可以看作是在每个不遵循 `START TRANSACTION`(或 `BEGIN`)的命令之后隐式地执行 `COMMIT`,因此通常被称为“自动提交”。其他关系型数据库系统可能会提供自动提交功能作为一种便利。 +在标准 SQL 中,不需要使用 `START TRANSACTION` 来开启一个事务块:任何 SQL 命令都会隐式地开启一个事务块。Apache Cloudberry 的行为可以看作是在每个不遵循 `START TRANSACTION`(或 `BEGIN`)的命令之后隐式地执行 `COMMIT`,因此通常被称为“自动提交”。其他关系型数据库系统可能会提供自动提交功能作为一种便利。 -`DEFERRABLE` `transaction_mode` 是 Cloudberry Database 的语言扩展。 +`DEFERRABLE` `transaction_mode` 是 Apache Cloudberry 的语言扩展。 -SQL 标准要求在连续的 `transaction_modes` 之间使用逗号,但出于历史原因,Cloudberry Database 允许省略逗号。 +SQL 标准要求在连续的 `transaction_modes` 之间使用逗号,但出于历史原因,Apache Cloudberry 允许省略逗号。 另见 [`SET TRANSACTION`](/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/set-transaction.md) 中的兼容性部分。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-truncate.md b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-truncate.md index 10cf21924c..ec42f9eb11 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-truncate.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sql-stmts/sql-stmt-truncate.md @@ -7,7 +7,7 @@ title: TRUNCATE 清空一张表或一组表中的所有行。 :::info 注意 -Cloudberry Database 并不严格执行参照完整性语法(外键约束)。即使没有指定 `CASCADE` 选项,`TRUNCATE` 也会清空被外键约束引用的表。 +Apache Cloudberry 并不严格执行参照完整性语法(外键约束)。即使没有指定 `CASCADE` 选项,`TRUNCATE` 也会清空被外键约束引用的表。 ::: ## 语法概要 @@ -39,11 +39,11 @@ TRUNCATE [TABLE] [ONLY] [ * ] [, ...] **`CASCADE`** -因为这个关键字适用于外键引用(Cloudberry Database 不支持外键约束),所以它没有任何效果。 +因为这个关键字适用于外键引用(Apache Cloudberry 不支持外键约束),所以它没有任何效果。 **`RESTRICT`** -因为这个关键字适用于外键引用(Cloudberry Database 不支持外键约束),所以它没有任何效果。 +因为这个关键字适用于外键引用(Apache Cloudberry 不支持外键约束),所以它没有任何效果。 ## 注意事项 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/start-and-stop-cbdb-database.md b/i18n/zh/docusaurus-plugin-content-docs/current/start-and-stop-cbdb-database.md index 48f44c5659..2b03da935c 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/start-and-stop-cbdb-database.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/start-and-stop-cbdb-database.md @@ -2,33 +2,33 @@ title: 启动和停止数据库 --- -# 启动和停止 Cloudberry Database +# 启动和停止 Apache Cloudberry -在 Cloudberry Database 数据库管理系统中,数据库服务器实例(包括 Coordinator 和所有 Segment)启停和运行在所有系统主机上,这样各实例能够协同工作,形成一个统一的数据库管理系统。 +在 Apache Cloudberry 数据库管理系统中,数据库服务器实例(包括 Coordinator 和所有 Segment)启停和运行在所有系统主机上,这样各实例能够协同工作,形成一个统一的数据库管理系统。 -因为 Cloudberry Database 系统分布在多台机器上,所以 Cloudberry Database 系统启动和停止的过程与常规 PostgreSQL 数据库系统不同。 +因为 Apache Cloudberry 系统分布在多台机器上,所以 Apache Cloudberry 系统启动和停止的过程与常规 PostgreSQL 数据库系统不同。 -命令行工具 `gpstart` 和 `gpstop` 分别用于启动和停止 Cloudberry Database。它们位于 Coordinator 主机上的 `$GPHOME/bin` 目录中。 +命令行工具 `gpstart` 和 `gpstop` 分别用于启动和停止 Apache Cloudberry。它们位于 Coordinator 主机上的 `$GPHOME/bin` 目录中。 :::tip 提示 请勿使用 `kill` 命令来终止 Postgres 进程,因为使用 `kill -9` 或 `kill -11` 会损坏数据库并阻碍故障原因分析。要终止 Postgres 进程,请使用数据库函数 `pg_cancel_backend()`。 ::: -## 启动 Cloudberry Database +## 启动 Apache Cloudberry -要启动已初始化的 Cloudberry Database,在 Coordinator 实例上执行 `gpstart`。 +要启动已初始化的 Apache Cloudberry,在 Coordinator 实例上执行 `gpstart`。 -如果数据库系统已通过 `gpinitsystem` 初始化并被 `gpstop` 停止,你可以通过 `gpstart` 来重启数据库。`gpstart` 的作用是启动 Cloudberry Database 集群中的所有 `postgres` 实例,从而完成整个数据库的启动。在启动过程中,`gpstart` 会协调和并行执行所有必要的步骤。 +如果数据库系统已通过 `gpinitsystem` 初始化并被 `gpstop` 停止,你可以通过 `gpstart` 来重启数据库。`gpstart` 的作用是启动 Apache Cloudberry 集群中的所有 `postgres` 实例,从而完成整个数据库的启动。在启动过程中,`gpstart` 会协调和并行执行所有必要的步骤。 -在 Coordinator 主机上执行 `gpstart` 来启动 Cloudberry Database: +在 Coordinator 主机上执行 `gpstart` 来启动 Apache Cloudberry: ```shell $ gpstart ``` -## 重启 Cloudberry Database +## 重启 Apache Cloudberry -要重启 Cloudberry Database,请在 Coordinator 主机上执行带 `-r` 选项的 `gpstop` 命令: +要重启 Apache Cloudberry,请在 Coordinator 主机上执行带 `-r` 选项的 `gpstop` 命令: ```shell $ gpstop -r @@ -36,11 +36,11 @@ $ gpstop -r ## 重新加载配置文件更改 -你可以在不中断系统的情况下,重新加载 Cloudberry Database 配置文件的更改。 +你可以在不中断系统的情况下,重新加载 Apache Cloudberry 配置文件的更改。 `gpstop` 支持在不中断服务的情况下,重新加载 `pg_hba.conf` 配置文件和 `postgresql.conf` 中的运行时参数。客户端在重新连接到数据库时,活动会话将应用配置文件中的更改。但是,许多服务器配置参数只在完整重启系统 (`gpstop -r`) 后才生效。 -在不关闭 Cloudberry Database 系统的情况下,使用 `gpstop -u` 重新加载配置文件更改: +在不关闭 Apache Cloudberry 系统的情况下,使用 `gpstop -u` 重新加载配置文件更改: ```shell $ gpstop -u @@ -73,31 +73,31 @@ $ gpstop -u 不正确地使用维护模式可能导致系统状态不一致。建议此操作由技术支持来执行。 ::: -## 停止 Cloudberry Database +## 停止 Apache Cloudberry -`gpstop` 在 Coordinator 主机上停止或重启 Cloudberry Database 系统。执行后,`gpstop` 会停止系统中的所有 `postgres` 进程,包括 Coordinator 和所有 Segment 实例。`gpstop` 默认使用多个并行工作线程来关闭组成 Cloudberry Database 集群的 Postgres 实例。要立即停止 Cloudberry Database,请使用快速模式。 +`gpstop` 在 Coordinator 主机上停止或重启 Apache Cloudberry 系统。执行后,`gpstop` 会停止系统中的所有 `postgres` 进程,包括 Coordinator 和所有 Segment 实例。`gpstop` 默认使用多个并行工作线程来关闭组成 Apache Cloudberry 集群的 Postgres 实例。要立即停止 Apache Cloudberry,请使用快速模式。 :::tip 提示 不建议使用快速模式。此模式会停止所有数据库进程,使数据库服务器来不及完成事务处理,或来不及清理任何临时或进程中的工作文件。 ::: -- 停止 Cloudberry Database: +- 停止 Apache Cloudberry: ```shell $ gpstop ``` -- 在快速模式下停止 Cloudberry Database: +- 在快速模式下停止 Apache Cloudberry: ```shell $ gpstop -M fast ``` -默认情况下,如果有客户端连接到数据库,则不允许关闭 Cloudberry Database。使用 `-M fast` 选项可以回滚所有正在进行的事务,并在关闭 Cloudberry Database 之前终止任何连接。 +默认情况下,如果有客户端连接到数据库,则不允许关闭 Apache Cloudberry。使用 `-M fast` 选项可以回滚所有正在进行的事务,并在关闭 Apache Cloudberry 之前终止任何连接。 ## 停止客户端进程 -Cloudberry Database 为每个客户端连接启动一个新的后端进程。具有 `SUPERUSER` 权限的 Cloudberry Database 用户可以取消和终止这些客户端的后端进程。 +Apache Cloudberry 为每个客户端连接启动一个新的后端进程。具有 `SUPERUSER` 权限的 Apache Cloudberry 用户可以取消和终止这些客户端的后端进程。 通过 `pg_cancel_backend()` 函数取消后端进程,会结束正在排队或进行的客户端查询。通过 `pg_terminate_backend()` 函数终止后端进程,将终止与数据库的客户端连接。 @@ -111,7 +111,7 @@ Cloudberry Database 为每个客户端连接启动一个新的后端进程。具 - `pg_terminate_backend( pid int4 )` - `pg_terminate_backend( pid int4, msg text )` -如果你提供 `msg`,Cloudberry Database 会在返回给客户端的取消消息中包含 `msg` 文本。`msg` 限制为 128 字节,Cloudberry Database 会截断任何更长的内容。 +如果你提供 `msg`,Apache Cloudberry 会在返回给客户端的取消消息中包含 `msg` 文本。`msg` 限制为 128 字节,Apache Cloudberry 会截断任何更长的内容。 `pg_cancel_backend()` 和 `pg_terminate_backend()` 函数如果执行成功,则返回 `true`,否则返回 `false`。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sys-admin/check-database-system.md b/i18n/zh/docusaurus-plugin-content-docs/current/sys-admin/check-database-system.md index b27df76ec1..4d7e2372cc 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sys-admin/check-database-system.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sys-admin/check-database-system.md @@ -1,3 +1,3 @@ --- -title: 查看 Cloudberry Database 系统 +title: 查看 Apache Cloudberry 系统 --- diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/db-util-gpdemo.md b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/db-util-gpdemo.md index a6c1541ba9..bad01f2f8c 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/db-util-gpdemo.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/db-util-gpdemo.md @@ -4,7 +4,7 @@ title: gpdemo # gpdemo(引入自 v1.5.0 版本) -在 v1.5.0 以前,如果用户想在单节点上部署带有 Segment 的 Cloudberry Database 小型集群用于演示,需要花费时间编写配置文件和参数。自 v1.5.0 起,用户只需要通过内置的 `gpdemo` 脚本,使用一条命令就能快速部署指定 Segment 数的 Cloudberry Database 集群。 +在 v1.5.0 以前,如果用户想在单节点上部署带有 Segment 的 Apache Cloudberry 小型集群用于演示,需要花费时间编写配置文件和参数。自 v1.5.0 起,用户只需要通过内置的 `gpdemo` 脚本,使用一条命令就能快速部署指定 Segment 数的 Apache Cloudberry 集群。 `gpdemo` 将随其他系统工具(例如 `gpinitsystem`、`gpstart`、`gpstop` 等)一并安装到 `GPHOME/bin` 目录下。 @@ -52,7 +52,7 @@ NUM_PRIMARY_MIRROR_PAIRS=3 gpdemo :::info 提示 - 每个 Segment 由一个 Primary 和一个 Mirror 组成,所以该参数值每增加一,将多创建两个节点。建议设置为奇数以更好地捕捉数据分布问题。 -- 当设置为 0 时,将部署一个单计算节点集群,详见[部署单计算节点的 Cloudberry Database 集群](/i18n/zh/docusaurus-plugin-content-docs/current/deploy-cbdb-with-single-node.md)。 +- 当设置为 0 时,将部署一个单计算节点集群,详见[部署单计算节点的 Apache Cloudberry 集群](/i18n/zh/docusaurus-plugin-content-docs/current/deploy-cbdb-with-single-node.md)。 ::: #### 指定节点的数据目录 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md index a6c1541ba9..bad01f2f8c 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md @@ -4,7 +4,7 @@ title: gpdemo # gpdemo(引入自 v1.5.0 版本) -在 v1.5.0 以前,如果用户想在单节点上部署带有 Segment 的 Cloudberry Database 小型集群用于演示,需要花费时间编写配置文件和参数。自 v1.5.0 起,用户只需要通过内置的 `gpdemo` 脚本,使用一条命令就能快速部署指定 Segment 数的 Cloudberry Database 集群。 +在 v1.5.0 以前,如果用户想在单节点上部署带有 Segment 的 Apache Cloudberry 小型集群用于演示,需要花费时间编写配置文件和参数。自 v1.5.0 起,用户只需要通过内置的 `gpdemo` 脚本,使用一条命令就能快速部署指定 Segment 数的 Apache Cloudberry 集群。 `gpdemo` 将随其他系统工具(例如 `gpinitsystem`、`gpstart`、`gpstop` 等)一并安装到 `GPHOME/bin` 目录下。 @@ -52,7 +52,7 @@ NUM_PRIMARY_MIRROR_PAIRS=3 gpdemo :::info 提示 - 每个 Segment 由一个 Primary 和一个 Mirror 组成,所以该参数值每增加一,将多创建两个节点。建议设置为奇数以更好地捕捉数据分布问题。 -- 当设置为 0 时,将部署一个单计算节点集群,详见[部署单计算节点的 Cloudberry Database 集群](/i18n/zh/docusaurus-plugin-content-docs/current/deploy-cbdb-with-single-node.md)。 +- 当设置为 0 时,将部署一个单计算节点集群,详见[部署单计算节点的 Apache Cloudberry 集群](/i18n/zh/docusaurus-plugin-content-docs/current/deploy-cbdb-with-single-node.md)。 ::: #### 指定节点的数据目录 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpshrink.md b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpshrink.md index 4a7070db9c..6f5aa91834 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpshrink.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpshrink.md @@ -4,7 +4,7 @@ title: gpshrink # gpshrink -Cloudberry Database 通过 gpshrink 系统工具缩容集群。集群资源空闲时,例如磁盘空间占用长期低于 20%、CPU 或内存占用率持续较低,则可以使用 gpshrink 来实现集群的缩容,从而节省服务器资源。用户可以通过 gpshrink 工具删除多余服务器上的 segment,从而实现集群缩容。 +Apache Cloudberry 通过 gpshrink 系统工具缩容集群。集群资源空闲时,例如磁盘空间占用长期低于 20%、CPU 或内存占用率持续较低,则可以使用 gpshrink 来实现集群的缩容,从而节省服务器资源。用户可以通过 gpshrink 工具删除多余服务器上的 segment,从而实现集群缩容。 gpshrink 在执行时分为两阶段: @@ -42,8 +42,8 @@ gpshrink 在执行时分为两阶段: ```bash # 以删除一个 segment 为例,以下为写入的 primary 和 mirror 的信息 - i-thd001y0|i-thd001y0|7004|/home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2|4|2|p - i-thd001y0|i-thd001y0|7007|/home/gpadmin/cloudberrydb/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2|7|2|m + i-thd001y0|i-thd001y0|7004|/home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2|4|2|p + i-thd001y0|i-thd001y0|7007|/home/gpadmin/cloudberry/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2|7|2|m ``` 4. 执行 `gpshrink` 命令两次。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/pg-filedump.md b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/pg-filedump.md index 8c7a564c20..9e2b5adde7 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/pg-filedump.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/pg-filedump.md @@ -4,9 +4,9 @@ title: pg_filedump # pg_filedump -Filedump,即系统命令行工具 `pg_filedump`,是一个用于将 Cloudberry Database 的堆文件、索引文件和控制文件格式化为人类可读形式的实用工具。通过 filedump,用户可以以多种方式格式化和转储文件,如在[命令行选项说明](#命令选项说明)部分所述,甚至可以直接转储二进制数据。 +Filedump,即系统命令行工具 `pg_filedump`,是一个用于将 Apache Cloudberry 的堆文件、索引文件和控制文件格式化为人类可读形式的实用工具。通过 filedump,用户可以以多种方式格式化和转储文件,如在[命令行选项说明](#命令选项说明)部分所述,甚至可以直接转储二进制数据。 -Cloudberry Database 的 filedump 仓库地址:`https://github.com/cloudberrydb/filedump` +Filedump for Apache Cloudberry 的仓库地址:`https://github.com/cloudberry-contrib/filedump` ## 使用场景 @@ -18,19 +18,19 @@ Filedump 工具主要适用于以下场景: ## 编译与安装 -在编译 filedump 前,你需要安装好一套 Cloudberry Database v1.0.0 及以上版本的集群,以下是典型的编译步骤: +在编译 filedump 前,你需要安装好一套 Apache Cloudberry v1.0.0 及以上版本的集群,以下是典型的编译步骤: -1. 确保 CloudberryDB 包可以被找到: +1. 确保 Cloudberry 包可以被找到: ```bash su - gpadmin - source /usr/local/cloudberry-db/greenplum_path.sh + source /usr/local/cloudberry/greenplum_path.sh ``` -2. 将 GitHub 仓库 `cloudberrydb/filedump` 克隆至本地: +2. 将 GitHub 仓库 `cloudberry-contrib/filedump` 克隆至本地: ```bash - git clone https://github.com/cloudberrydb/filedump.git + git clone https://github.com/cloudberry-contrib/filedump.git ``` 3. 进入仓库目录,并执行编译命令: diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/table-storage-models.md b/i18n/zh/docusaurus-plugin-content-docs/current/table-storage-models.md index 938a2258e9..f73ea6a86b 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/table-storage-models.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/table-storage-models.md @@ -2,4 +2,4 @@ title: 选择表存储模型 --- -# 选择 Cloudberry Database 中的表存储模型 +# 选择 Apache Cloudberry 中的表存储模型 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md b/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md index dcbf997f00..c36a65dddd 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md @@ -4,7 +4,7 @@ title: 事务中的并发控制 # 事务中的并发控制 -本文档介绍 Cloudberry Database 中的事务并发控制,包括: +本文档介绍 Apache Cloudberry 中的事务并发控制,包括: - [多版本并发控制机制](#多版本并发控制机制) - [锁模式](#锁模式) @@ -12,15 +12,15 @@ title: 事务中的并发控制 ## 多版本并发控制机制 -Cloudberry Database 和 PostgreSQL 不使用锁机制来进行事务并发控制,而是使用多版本并发控制(MVCC)机制来维护数据一致性。MVCC 确保每个数据库会话的事务隔离,使得每个查询事务都能看到一个一致的数据快照。这保证了事务观察到的数据是一致的,不受其他并发事务的影响。 +Apache Cloudberry 和 PostgreSQL 不使用锁机制来进行事务并发控制,而是使用多版本并发控制(MVCC)机制来维护数据一致性。MVCC 确保每个数据库会话的事务隔离,使得每个查询事务都能看到一个一致的数据快照。这保证了事务观察到的数据是一致的,不受其他并发事务的影响。 然而,事务可见的具体数据变化受隔离级别的影响。默认的隔离级别是“已提交读”(`READ COMMITTED`),这意味着事务可以观察到其他已提交事务所做的数据变化。如果将隔离级别设置为“可重复读”(`REPEATABLE READ`),那么该事务中的查询将观察到事务开始时的数据状态,并且不会看到其他事务在此期间所做的更改。为了指定事务的隔离级别,你可以使用语句 `BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ` 来以“可重复读”隔离级别启动事务。 -由于 MVCC 不使用显式锁进行并发控制,因此最大限度减少了锁争用,Cloudberry Database 在多用户环境中保持了合理的性能。用于查询(读取)数据的锁不会与用于写入数据的锁发生冲突。 +由于 MVCC 不使用显式锁进行并发控制,因此最大限度减少了锁争用,Apache Cloudberry 在多用户环境中保持了合理的性能。用于查询(读取)数据的锁不会与用于写入数据的锁发生冲突。 ## 锁模式 -Cloudberry Database 提供多种锁模式来控制对表数据的并发访问。大多数 Cloudberry Database 的 SQL 命令会自动获取适当的锁,以确保在命令执行期间,被引用的表不会被删除或被以不兼容的方式修改。对于那些难以适应 MVCC 行为的应用程序,你可以使用 `LOCK` 命令来获取显式锁。然而,通常情况下,正确使用 MVCC 能够提供更好的性能。 +Apache Cloudberry 提供多种锁模式来控制对表数据的并发访问。大多数 Apache Cloudberry 的 SQL 命令会自动获取适当的锁,以确保在命令执行期间,被引用的表不会被删除或被以不兼容的方式修改。对于那些难以适应 MVCC 行为的应用程序,你可以使用 `LOCK` 命令来获取显式锁。然而,通常情况下,正确使用 MVCC 能够提供更好的性能。 |锁模式|相关 SQL 命令|与之冲突的其他锁模式| |---------|-----------------------|--------------| @@ -34,7 +34,7 @@ Cloudberry Database 提供多种锁模式来控制对表数据的并发访问。 |ACCESS EXCLUSIVE|`ALTER TABLE`, `DROP TABLE`, `TRUNCATE`, `REINDEX`, `CLUSTER`, `REFRESH MATERIALIZED VIEW`(不带 `CONCURRENTLY`), `VACUUM FULL`|ACCESS SHARE, ROW SHARE, ROW EXCLUSIVE, SHARE UPDATE EXCLUSIVE, SHARE, SHARE ROW EXCLUSIVE, EXCLUSIVE, ACCESS EXCLUSIVE| :::info 注意 -全局死锁检测器 (Global Deadlock Detector) 是默认关闭的。Cloudberry Database 在执行 `UPDATE` 和 `DELETE` 操作时会获取更为严格的 `EXCLUSIVE`(排他锁),而不是 PostgreSQL 中的 `ROW EXCLUSIVE`(行排他锁)。 +全局死锁检测器 (Global Deadlock Detector) 是默认关闭的。Apache Cloudberry 在执行 `UPDATE` 和 `DELETE` 操作时会获取更为严格的 `EXCLUSIVE`(排他锁),而不是 PostgreSQL 中的 `ROW EXCLUSIVE`(行排他锁)。 当全局死锁检测器开启后: @@ -43,9 +43,9 @@ Cloudberry Database 提供多种锁模式来控制对表数据的并发访问。 ## 全局死锁检测器 -Cloudberry Database 的全局死锁检测器 (Global Deadlock Detector) 后台工作进程会收集所有 Segment 上的锁信息,并使用一种有向算法来检测本地和全局死锁是否存在。这种算法让 Cloudberry Database 放宽了对堆表上并发更新和删除操作的限制。尽管如此,Cloudberry Database 在 AO/CO 表上仍采用表级锁定,限制并发的 `UPDATE`、`DELETE` 和 `SELECT...FOR lock_strength` 操作。 +Apache Cloudberry 的全局死锁检测器 (Global Deadlock Detector) 后台工作进程会收集所有 Segment 上的锁信息,并使用一种有向算法来检测本地和全局死锁是否存在。这种算法让 Apache Cloudberry 放宽了对堆表上并发更新和删除操作的限制。尽管如此,Apache Cloudberry 在 AO/CO 表上仍采用表级锁定,限制并发的 `UPDATE`、`DELETE` 和 `SELECT...FOR lock_strength` 操作。 -默认情况下,全局死锁检测器是关闭的。在 Cloudberry Database 中,堆表上的并发 `UPDATE` 和 `DELETE` 操作会串行运行。若要激活这些并发更新并让全局死锁检测器决定何时存在死锁,你可以在 `postgresql.conf` 配置文件中设置参数 `gp_enable_global_deadlock_detector` 为 `on`,并重启数据库。 +默认情况下,全局死锁检测器是关闭的。在 Apache Cloudberry 中,堆表上的并发 `UPDATE` 和 `DELETE` 操作会串行运行。若要激活这些并发更新并让全局死锁检测器决定何时存在死锁,你可以在 `postgresql.conf` 配置文件中设置参数 `gp_enable_global_deadlock_detector` 为 `on`,并重启数据库。 当全局死锁检测器启用时,后台工作进程会在 Coordinator 主机上自动启动。你可以通过在 `postgresql.conf` 配置文件中的 `gp_global_deadlock_detector_period` 服务器配置参数来配置全局死锁检测器收集和分析锁等待数据的间隔。 @@ -58,11 +58,11 @@ Cloudberry Database 的全局死锁检测器 (Global Deadlock Detector) 后台 - 基于 GPORCA 优化器,针对哈希表上同一行的并发更新事务。 :::tip 提示 -Cloudberry Database 使用服务器配置参数 `deadlock_timeout` 所指定的间隔来进行本地死锁检测。由于本地和全局死锁检测算法不同,根据哪个检测器(本地或全局)首先触发,被取消的进程可能会有所不同。 +Apache Cloudberry 使用服务器配置参数 `deadlock_timeout` 所指定的间隔来进行本地死锁检测。由于本地和全局死锁检测算法不同,根据哪个检测器(本地或全局)首先触发,被取消的进程可能会有所不同。 ::: :::tip 提示 -如果启用了 `lock_timeout` 服务器配置参数,并将其设置为小于 `deadlock_timeout` 和 `gp_global_deadlock_detector_period` 的值,Cloudberry Database 将在会话中触发死锁检查之前取消一个语句。 +如果启用了 `lock_timeout` 服务器配置参数,并将其设置为小于 `deadlock_timeout` 和 `gp_global_deadlock_detector_period` 的值,Apache Cloudberry 将在会话中触发死锁检查之前取消一个语句。 ::: 若要查看所有 Segment 的等锁信息,请执行用户定义函数 `gp_dist_wait_status()`。通过函数的输出,可以确定哪些事务正在等锁,哪些事务持有锁,锁的类型和模式,等待者和持有者的会话标识符,以及哪些 Segment 正在运行事务。以下是一个 `gp_dist_wait_status()` 函数的示例输出: diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/work-with-transactions.md b/i18n/zh/docusaurus-plugin-content-docs/current/work-with-transactions.md index def78635ed..2a46c4b8d5 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/work-with-transactions.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/work-with-transactions.md @@ -2,9 +2,9 @@ title: 事务操作 --- -# 在 Cloudberry Database 中使用事务 +# 在 Apache Cloudberry 中使用事务 -SQL 事务允许你将多个 SQL 语句捆绑在一起,形成一个全部成功或全部失败的操作。对于 Cloudberry Database,其 SQL 事务命令包括: +SQL 事务允许你将多个 SQL 语句捆绑在一起,形成一个全部成功或全部失败的操作。对于 Apache Cloudberry,其 SQL 事务命令包括: - `BEGIN` 或 `START TRANSACTION`:开始一个事务块。 - `END` 或 `COMMIT`:提交事务的结果。 @@ -15,16 +15,16 @@ SQL 事务允许你将多个 SQL 语句捆绑在一起,形成一个全部成 ## 事务隔离级别 -Cloudberry Database 支持以下标准 SQL 事务隔离级别: +Apache Cloudberry 支持以下标准 SQL 事务隔离级别: - `READ UNCOMMITTED`(读未提交)和 `READ COMMITTED`(读已提交)的行为与标准的 `READ COMMITTED` 相同。 - `REPEATABLE READ`(可重复读)和 `SERIALIZABLE`(可序列化)的行为与 `REPEATABLE READ` 相同。 -以下内容描述了 Cloudberry Database 事务隔离级别的行为。 +以下内容描述了 Apache Cloudberry 事务隔离级别的行为。 ### 未提交读与已提交读 -在 Cloudberry Database 中,任何执行中的命令都无法查看其他并行事务中的未提交更新,因此 `READ UNCOMMITTED` 模式与 `READ COMMITTED` 模式的效果是相同的。`READ COMMITTED` 模式提供了一种快速且简易的方式,能够实现事务的部分隔离效果。在这种模式下,`SELECT`、`UPDATE` 和 `DELETE` 语句都是在查询开始时基于数据库当前的快照来执行的。 +在 Apache Cloudberry 中,任何执行中的命令都无法查看其他并行事务中的未提交更新,因此 `READ UNCOMMITTED` 模式与 `READ COMMITTED` 模式的效果是相同的。`READ COMMITTED` 模式提供了一种快速且简易的方式,能够实现事务的部分隔离效果。在这种模式下,`SELECT`、`UPDATE` 和 `DELETE` 语句都是在查询开始时基于数据库当前的快照来执行的。 具体来说,一个 `SELECT` 查询会: @@ -39,7 +39,7 @@ Cloudberry Database 支持以下标准 SQL 事务隔离级别: #### 可重复读与序列化隔离级别 -在 SQL 标准中,`SERIALIZABLE` 事务隔离级别旨在确保,即便事务是并发运行的,其产生的结果也应当与事务依次运行时的结果相同。在 Cloudberry Database 中,当指定 `SERIALIZABLE` 隔离级别时,实际上会使用 `REPEATABLE READ` 隔离级别。`REPEATABLE READ` 隔离级别可以在无需使用复杂锁定机制的情况下,防止脏读、不可重复读和幻读现象,但这种模式并不能检测到所有在并发事务执行期间可能出现的序列化冲突。因此,你需要仔细检查并发事务,以识别哪些冲突是仅靠禁止对同一数据并发更新所无法预防的。为避免这类冲突,可以采用显式表锁或者更新某个特设的代表冲突的虚拟行。 +在 SQL 标准中,`SERIALIZABLE` 事务隔离级别旨在确保,即便事务是并发运行的,其产生的结果也应当与事务依次运行时的结果相同。在 Apache Cloudberry 中,当指定 `SERIALIZABLE` 隔离级别时,实际上会使用 `REPEATABLE READ` 隔离级别。`REPEATABLE READ` 隔离级别可以在无需使用复杂锁定机制的情况下,防止脏读、不可重复读和幻读现象,但这种模式并不能检测到所有在并发事务执行期间可能出现的序列化冲突。因此,你需要仔细检查并发事务,以识别哪些冲突是仅靠禁止对同一数据并发更新所无法预防的。为避免这类冲突,可以采用显式表锁或者更新某个特设的代表冲突的虚拟行。 在 `REPEATABLE READ` 隔离级别下,`SELECT` 查询将: @@ -51,7 +51,7 @@ Cloudberry Database 支持以下标准 SQL 事务隔离级别: - 在同一事务中连续执行的 `SELECT` 命令总是看到一致的数据。 - `UPDATE`、`DELETE`、`SELECT FOR UPDATE` 和 `SELECT FOR SHARE` 命令仅能找到在命令开始前已经提交的行。如果有并发事务对目标行进行了更新、删除或加锁,`REPEATABLE READ` 事务将等待该并发事务提交或撤销更改。若并发事务提交了更改,`REPEATABLE READ` 事务将选择回滚;若并发事务撤销了更改,`REPEATABLE READ` 事务则可以提交自己的更改。 -在 Cloudberry Database 中,默认的事务隔离级别为 `READ COMMITTED`。若需更改事务的隔离级别,可在开始事务时声明隔离级别,或在事务已开始后使用 `SET TRANSACTION` 命令进行设置。 +在 Apache Cloudberry 中,默认的事务隔离级别为 `READ COMMITTED`。若需更改事务的隔离级别,可在开始事务时声明隔离级别,或在事务已开始后使用 `SET TRANSACTION` 命令进行设置。 ## 另请参阅 diff --git a/src/components/bootcamp/TextPic/index.tsx b/src/components/bootcamp/TextPic/index.tsx index 4421ba1c39..939a7bf5a8 100644 --- a/src/components/bootcamp/TextPic/index.tsx +++ b/src/components/bootcamp/TextPic/index.tsx @@ -6,9 +6,9 @@ export default function TextPic() { return (
- These tutorials showcase how Cloudberry Database can address day-to-day + These tutorials showcase how Apache Cloudberry can address day-to-day tasks performed in typical DW, BI and data science environments. It is - designed to be used with the Cloudberry Database Sandbox, which is based + designed to be used with the Apache Cloudberry Sandbox, which is based on the Docker with the CentOS 7.9 OS.
diff --git a/src/components/common/Layout/index.tsx b/src/components/common/Layout/index.tsx index 3e3dcdd82c..ab7e925a6c 100644 --- a/src/components/common/Layout/index.tsx +++ b/src/components/common/Layout/index.tsx @@ -9,7 +9,7 @@ export default function CommonLayout({ children }: IProps): JSX.Element { return ( {/* The main tag is used for special handling of layout styles, and cannot be a div tag here */}
{children}
diff --git a/src/consts/bootcamp.tsx b/src/consts/bootcamp.tsx index 03135c2e07..378070e6d2 100644 --- a/src/consts/bootcamp.tsx +++ b/src/consts/bootcamp.tsx @@ -32,7 +32,7 @@ let BOOTCAMP_PAGE_CONFIG = { title: "Lesson 0", link: { text: "Introduction to Database and Cloudberry Architecture", - href: "/bootcamp/101-0-introduction-to-database-and-cloudberrydb-architecture", + href: "/bootcamp/101-0-introduction-to-database-and-cloudberry-architecture", }, style: { width: 221 }, }, @@ -129,7 +129,7 @@ let BOOTCAMP_PAGE_CONFIG = { style: { width: 474 }, link: { text: "Introduction to Cloudberry In-Database Analytics", - href: "/bootcamp/104-1-introduction-to-cloudberrydb-in-database-analytics", + href: "/bootcamp/104-1-introduction-to-cloudberry-in-database-analytics", }, }, ], diff --git a/src/pages/bootcamp/101-0-introduction-to-database-and-cloudberrydb-architecture.md b/src/pages/bootcamp/101-0-introduction-to-database-and-cloudberry-architecture.md similarity index 100% rename from src/pages/bootcamp/101-0-introduction-to-database-and-cloudberrydb-architecture.md rename to src/pages/bootcamp/101-0-introduction-to-database-and-cloudberry-architecture.md diff --git a/src/pages/bootcamp/101-6-backup-and-recovery-operations.md b/src/pages/bootcamp/101-6-backup-and-recovery-operations.md index 5c2ac3f04b..187b4bf851 100644 --- a/src/pages/bootcamp/101-6-backup-and-recovery-operations.md +++ b/src/pages/bootcamp/101-6-backup-and-recovery-operations.md @@ -62,7 +62,7 @@ These exercises will walk through how to create a full backup of your database a 20230727:10:28:28 gpbackup:gpadmin:mdw:020061-[INFO]:-Data backup complete 20230727:10:28:28 gpbackup:gpadmin:mdw:020061-[INFO]:-Skipped data backup of 3 external/foreign table(s). 20230727:10:28:28 gpbackup:gpadmin:mdw:020061-[INFO]:-See /home/gpadmin/gpAdminLogs/gpbackup_20230727.log for a complete list of skipped tables. - 20230727:10:28:29 gpbackup:gpadmin:mdw:020061-[INFO]:-Found neither /usr/local/cloudberry-db/bin/gp_email_contacts.yaml nor /home/gpadmin/gp_email_contacts.yaml + 20230727:10:28:29 gpbackup:gpadmin:mdw:020061-[INFO]:-Found neither /usr/local/cloudberry/bin/gp_email_contacts.yaml nor /home/gpadmin/gp_email_contacts.yaml 20230727:10:28:29 gpbackup:gpadmin:mdw:020061-[INFO]:-Email containing gpbackup report /tmp/gpseg-1/backups/20230727/20230727102819/gpbackup_20230727102819_report will not be sent 20230727:10:28:29 gpbackup:gpadmin:mdw:020061-[INFO]:-Backup completed successfully ``` @@ -123,7 +123,7 @@ These exercises will walk through how to create a full backup of your database a 20230727:10:38:30 gprestore:gpadmin:mdw:020373-[INFO]:-Sequence values restore complete Tables restored: 1 / 1 [===============================================================] 100.00% 5s 20230727:10:38:35 gprestore:gpadmin:mdw:020373-[INFO]:-Data restore complete - 20230727:10:38:35 gprestore:gpadmin:mdw:020373-[INFO]:-Found neither /usr/local/cloudberry-db/bin/gp_email_contacts.yaml nor /home/gpadmin/gp_email_contacts.yaml + 20230727:10:38:35 gprestore:gpadmin:mdw:020373-[INFO]:-Found neither /usr/local/cloudberry/bin/gp_email_contacts.yaml nor /home/gpadmin/gp_email_contacts.yaml 20230727:10:38:35 gprestore:gpadmin:mdw:020373-[INFO]:-Email containing gprestore report /tmp/gpseg-1/backups/20230727/20230727102819/gprestore_20230727102819_20230727103829_report will not be sent 20230727:10:38:35 gprestore:gpadmin:mdw:020373-[INFO]:-Restore completed successfully ``` diff --git a/src/pages/bootcamp/102-cbdb-crash-course.md b/src/pages/bootcamp/102-cbdb-crash-course.md index 65edf3d546..e263961487 100644 --- a/src/pages/bootcamp/102-cbdb-crash-course.md +++ b/src/pages/bootcamp/102-cbdb-crash-course.md @@ -832,7 +832,7 @@ Explore the data directory and subdirectories. Take a look at the configuration ``` [gpadmin@mdw ~]$ ps aux|grep 5432 -gpadmin 4409 0.0 0.4 209960 39776 ? Ss 16:14 0:00 /usr/local/cloudberry-db/bin/postgres -D /data0/database/master/gpseg-1 -p 5432 -c gp_role=dispatch +gpadmin 4409 0.0 0.4 209960 39776 ? Ss 16:14 0:00 /usr/local/cloudberry/bin/postgres -D /data0/database/master/gpseg-1 -p 5432 -c gp_role=dispatch gpadmin 4410 0.0 0.0 45544 5160 ? Ss 16:14 0:00 postgres: 5432, master logger process gpadmin 4412 0.0 0.1 210256 9484 ? Ss 16:14 0:00 postgres: 5432, checkpointer gpadmin 4413 0.0 0.0 210124 7440 ? Ss 16:14 0:00 postgres: 5432, background writer @@ -851,7 +851,7 @@ gpadmin 4846 0.0 0.5 294692 48516 ? Ssl 16:25 0:00 postgres: 5432 ``` [gpadmin@mdw ~]$ ps aux|grep 40000 -gpadmin 4373 0.0 0.5 212912 41404 ? Ss 16:14 0:00 /usr/local/cloudberry-db/bin/postgres -D /data0/database/primary/gpseg0 -p 40000 -c gp_role=execute +gpadmin 4373 0.0 0.5 212912 41404 ? Ss 16:14 0:00 /usr/local/cloudberry/bin/postgres -D /data0/database/primary/gpseg0 -p 40000 -c gp_role=execute gpadmin 4377 0.0 0.0 45540 5272 ? Ss 16:14 0:00 postgres: 40000, logger process gpadmin 4390 0.0 0.1 213212 9328 ? Ss 16:14 0:00 postgres: 40000, checkpointer gpadmin 4391 0.0 0.0 213076 7856 ? Ss 16:14 0:00 postgres: 40000, background writer @@ -867,7 +867,7 @@ gpadmin 4400 0.0 0.1 214868 12432 ? Ss 16:14 0:00 postgres: 40000 ``` [gpadmin@mdw ~]$ ps aux|grep 41000 -gpadmin 4375 0.0 0.5 212912 41196 ? Ss 16:14 0:00 /usr/local/cloudberry-db/bin/postgres -D /data0/database/mirror/gpseg0 -p 41000 -c gp_role=execute +gpadmin 4375 0.0 0.5 212912 41196 ? Ss 16:14 0:00 /usr/local/cloudberry/bin/postgres -D /data0/database/mirror/gpseg0 -p 41000 -c gp_role=execute gpadmin 4379 0.0 0.0 45540 5160 ? Ss 16:14 0:00 postgres: 41000, logger process gpadmin 4383 0.0 0.1 213344 10908 ? Ss 16:14 0:00 postgres: 41000, startup recovering 000000010000000000000004 gpadmin 4385 0.0 0.1 212912 8352 ? Ss 16:14 0:00 postgres: 41000, checkpointer diff --git a/src/pages/bootcamp/104-1-introduction-to-cloudberrydb-in-database-analytics.md b/src/pages/bootcamp/104-1-introduction-to-cloudberry-in-database-analytics.md similarity index 100% rename from src/pages/bootcamp/104-1-introduction-to-cloudberrydb-in-database-analytics.md rename to src/pages/bootcamp/104-1-introduction-to-cloudberry-in-database-analytics.md From 8daa0906284ead9aeb77164365697c3c463bcb86 Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Tue, 17 Dec 2024 19:10:41 +0800 Subject: [PATCH 2/3] Fix broken anchors warnings in the docs --- docs/cbdb-linux-compile.md | 2 +- docs/functions/index.md | 2 +- docs/functions/json-functions-and-operators.md | 2 +- docs/sql-stmts/copy.md | 2 +- docs/sql-stmts/select.md | 6 +++--- docs/sys-utilities/gpaddmirrors.md | 2 +- docs/sys-utilities/gpexpand.md | 2 +- docs/sys-utilities/gpload.md | 2 +- docs/sys-utilities/psql.md | 2 +- docs/table-storage-models.md | 6 ------ .../data-loading/{load-data-overview.md => index.md} | 0 .../current/transactional-concurrency-control.md | 8 +++++--- src/consts/bootcamp.tsx | 2 +- src/pages/bootcamp/102-cbdb-crash-course.md | 2 +- src/pages/bootcamp/cbdb-sandbox.md | 2 +- 15 files changed, 19 insertions(+), 23 deletions(-) rename i18n/zh/docusaurus-plugin-content-docs/current/data-loading/{load-data-overview.md => index.md} (100%) diff --git a/docs/cbdb-linux-compile.md b/docs/cbdb-linux-compile.md index 29442687cb..8a03b391d2 100644 --- a/docs/cbdb-linux-compile.md +++ b/docs/cbdb-linux-compile.md @@ -18,7 +18,7 @@ Take the following steps to compile and install Apache Cloudberry: 1. [Clone GitHub repo](#step-1-clone-github-repo). 2. [Install dependencies](#step-2-install-dependencies). 3. [Perform prerequisite platform tasks](#step-3-perform-prerequisite-platform-tasks). -4. [Build Apache Cloudberry](#step-4-build-apache-database). +4. [Build Apache Cloudberry](#step-4-build-apache-cloudberry). 5. [Verify the cluster](#step-5-verify-the-cluster). ## Step 1. Clone GitHub repo diff --git a/docs/functions/index.md b/docs/functions/index.md index 77f68c66a7..affb10bfe9 100644 --- a/docs/functions/index.md +++ b/docs/functions/index.md @@ -42,7 +42,7 @@ Apache Cloudberry does not support functions that return a table reference (`ran ## Built-in functions and operators -The following table lists the categories of built-in functions and operators supported by PostgreSQL. All functions and operators are supported in Apache Cloudberry as in PostgreSQL with the exception of `STABLE` and `VOLATILE` functions, which are subject to the restrictions noted in [Apache Cloudberry Function Types](#topic27). See the [Functions and Operators](https://www.postgresql.org/docs/14/functions.html) section of the PostgreSQL documentation for more information about these built-in functions and operators. +The following table lists the categories of built-in functions and operators supported by PostgreSQL. All functions and operators are supported in Apache Cloudberry as in PostgreSQL with the exception of `STABLE` and `VOLATILE` functions, which are subject to the restrictions noted in [Apache Cloudberry Function Types](#built-in-functions-and-operators). See the [Functions and Operators](https://www.postgresql.org/docs/14/functions.html) section of the PostgreSQL documentation for more information about these built-in functions and operators. |Operator/Function Category|VOLATILE Functions|STABLE Functions|Restrictions| |--------------------------|------------------|----------------|------------| diff --git a/docs/functions/json-functions-and-operators.md b/docs/functions/json-functions-and-operators.md index 5b266f151f..d2f0cae07d 100644 --- a/docs/functions/json-functions-and-operators.md +++ b/docs/functions/json-functions-and-operators.md @@ -16,7 +16,7 @@ Apache Cloudberry includes built-in functions and operators that create and mani - [JSON operators](#json-operators) - [JSON creation functions](#json-creation-functions) - [JSON aggregate functions](#json-aggregate-functions) -- [JSON processing functions](#json-processing-functions) +- [JSON processing functions](#sqljson-path-language) ### JSON operators diff --git a/docs/sql-stmts/copy.md b/docs/sql-stmts/copy.md index e868af9564..163f8f2693 100644 --- a/docs/sql-stmts/copy.md +++ b/docs/sql-stmts/copy.md @@ -204,7 +204,7 @@ Specifies the newline used in your data files — `LF` (Line feed, 0x0A), `CR` ( **`CSV`** -Selects Comma Separated Value (CSV) mode. See [CSV Format](#section9). +Selects Comma Separated Value (CSV) mode. See [CSV Format](#file-formats). **`FILL MISSING FIELDS`** diff --git a/docs/sql-stmts/select.md b/docs/sql-stmts/select.md index 69ccb5e03f..11bf9ab7a0 100644 --- a/docs/sql-stmts/select.md +++ b/docs/sql-stmts/select.md @@ -371,11 +371,11 @@ If an existing_window_name is specified, it must refer to an earlier entry in th **`PARTITION BY`** -The `PARTITION BY` clause organizes the result set into logical groups based on the unique values of the specified expression. The elements of the `PARTITION BY` clause are interpreted in much the same fashion as elements of a [GROUP BY Clause](#groupbyclause), except that they are always simple expressions and never the name or number of an output column. Another difference is that these expressions can contain aggregate function calls, which are not allowed in a regular `GROUP BY` clause. They are allowed here because windowing occurs after grouping and aggregation. When used with window functions, the functions are applied to each partition independently. For example, if you follow `PARTITION BY` with a column name, the result set is partitioned by the distinct values of that column. If omitted, the entire result set is considered one partition. +The `PARTITION BY` clause organizes the result set into logical groups based on the unique values of the specified expression. The elements of the `PARTITION BY` clause are interpreted in much the same fashion as elements of a [GROUP BY Clause](#the-group-by-clause), except that they are always simple expressions and never the name or number of an output column. Another difference is that these expressions can contain aggregate function calls, which are not allowed in a regular `GROUP BY` clause. They are allowed here because windowing occurs after grouping and aggregation. When used with window functions, the functions are applied to each partition independently. For example, if you follow `PARTITION BY` with a column name, the result set is partitioned by the distinct values of that column. If omitted, the entire result set is considered one partition. **`ORDER BY`** -Similarly, the elements of the `ORDER BY` list are interpreted in much the same fashion as elements of an [ORDER BY Clause](#orderbyclause), except that the expressions are always taken as simple expressions and never the name or number of an output column. +Similarly, the elements of the `ORDER BY` list are interpreted in much the same fashion as elements of an [ORDER BY Clause](#the-order-by-clause), except that the expressions are always taken as simple expressions and never the name or number of an output column. > **Note** The elements of the `ORDER BY` clause define how to sort the rows in each partition of the result set. If omitted, rows are returned in whatever order is most efficient and may vary. @@ -869,7 +869,7 @@ Apache Cloudberry recognizes functional dependency (allowing columns to be omitt **LIMIT and OFFSET** -The clauses `LIMIT` and `OFFSET` are Apache Cloudberry-specific syntax, also used by MySQL. The SQL:2008 standard has introduced the clauses `OFFSET .. FETCH {FIRST|NEXT} ...` for the same functionality, as shown above in [LIMIT Clause](#limitclause). This syntax is also used by IBM DB2. (Applications for Oracle frequently use a workaround involving the automatically generated `rownum` column, which is not available in Apache Cloudberry, to implement the effects of these clauses.) +The clauses `LIMIT` and `OFFSET` are Apache Cloudberry-specific syntax, also used by MySQL. The SQL:2008 standard has introduced the clauses `OFFSET .. FETCH {FIRST|NEXT} ...` for the same functionality, as shown above in [LIMIT Clause](#the-limit-clause). This syntax is also used by IBM DB2. (Applications for Oracle frequently use a workaround involving the automatically generated `rownum` column, which is not available in Apache Cloudberry, to implement the effects of these clauses.) **FOR NO KEY UPDATE, FOR UPDATE, FOR SHARE, and FOR KEY SHARE** diff --git a/docs/sys-utilities/gpaddmirrors.md b/docs/sys-utilities/gpaddmirrors.md index 75bc7ca587..6f3e973bc2 100644 --- a/docs/sys-utilities/gpaddmirrors.md +++ b/docs/sys-utilities/gpaddmirrors.md @@ -106,7 +106,7 @@ A configuration file containing one line for each mirror segment you want to cre |
|| ``` -Where `` is the segment instance content ID, `
` is the hostname or IP address of the segment host, `` is the communication port, and `` is the segment instance data directory. For information about using a hostname or IP address, see [Specifying Hosts using Hostnames or IP Addresses](#specifying-hosts-using-hostnames-or-ip-addresses). Also, see [Using Host Systems with Multiple NICs](#using-host-systems-with-multiple-nics). +Where `` is the segment instance content ID, `
` is the hostname or IP address of the segment host, `` is the communication port, and `` is the segment instance data directory. For information about using a hostname or IP address, see [Specifying Hosts using Hostnames or IP Addresses](#specify-hosts-using-hostnames-or-ip-addresses). Also, see [Using Host Systems with Multiple NICs](#use-host-systems-with-multiple-nics). **`-l logfile_directory`** diff --git a/docs/sys-utilities/gpexpand.md b/docs/sys-utilities/gpexpand.md index 4c7d23b5a3..e691bde613 100644 --- a/docs/sys-utilities/gpexpand.md +++ b/docs/sys-utilities/gpexpand.md @@ -100,7 +100,7 @@ This file can contain hostnames with or without network interfaces specified. Th > **Note** The Apache Cloudberry segment host naming convention is `sdwN` where `sdw` is a prefix and `N` is an integer. For example, `sdw1`, `sdw2` and so on. For hosts with multiple interfaces, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. -For information about using a hostname or IP address, see [Specifying Hosts using Hostnames or IP Addresses](#specify-hosts-using-hostnames-or-ip-addresses). Also, see [Using Host Systems with Multiple NICs](#using-host-systems-with-multiple-nics). +For information about using a hostname or IP address, see [Specifying Hosts using Hostnames or IP Addresses](#specify-hosts-using-hostnames-or-ip-addresses). Also, see [Using Host Systems with Multiple NICs](#use-host-systems-with-multiple-nics). **`-i | --input input_file`** diff --git a/docs/sys-utilities/gpload.md b/docs/sys-utilities/gpload.md index 4bc7780f5f..c55964f382 100644 --- a/docs/sys-utilities/gpload.md +++ b/docs/sys-utilities/gpload.md @@ -200,7 +200,7 @@ Required. Begins the load specification section. A `GPLOAD` specification must h Required. Defines the location and the format of the input data to be loaded. `gpload` will start one or more instances of the [gpfdist](/docs/sys-utilities/gpfdist.md) file distribution program on the current host and create the required external table definition(s) in Apache Cloudberry that point to the source data. Note that the host from which you run `gpload` must be accessible over the network by all Apache Cloudberry hosts (coordinator and segments). -SOURCE** +**`SOURCE`** Required. The `SOURCE` block of an `INPUT` specification defines the location of a source file. An `INPUT` section can have more than one `SOURCE` block defined. Each `SOURCE` block defined corresponds to one instance of the [gpfdist](/docs/sys-utilities/gpfdist.md) file distribution program that will be started on the local machine. Each `SOURCE` block defined must have a `FILE` specification. diff --git a/docs/sys-utilities/psql.md b/docs/sys-utilities/psql.md index 99f9b85775..cf6acc784d 100644 --- a/docs/sys-utilities/psql.md +++ b/docs/sys-utilities/psql.md @@ -442,7 +442,7 @@ The new query buffer is then re-parsed according to the normal rules of `psql`, If a line number is specified, `psql` will position the cursor on the specified line of the file or query buffer. Note that if a single all-digits argument is given, `psql` assumes it is a line number, not a file name. -See [Environment](#section17) for information about configuring and customizing your editor. +See [Environment](#environment) for information about configuring and customizing your editor. **`\echo text [ ... ]`** diff --git a/docs/table-storage-models.md b/docs/table-storage-models.md index c0906548b1..5e9937b89e 100644 --- a/docs/table-storage-models.md +++ b/docs/table-storage-models.md @@ -6,12 +6,6 @@ title: Choose the Table Storage Model Apache Cloudberry supports several storage models and a mix of storage models. When you create a table, you choose how to store its data. This document explains the options for table storage and how to choose the best storage model for your workload. -- [Heap Storage](#heap-storage) -- [Append-Optimized Storage](#append-optimized-storage) -- [Choosing Row or Column-Oriented Storage](#choosing-row-or-column-oriented-storage) -- [Altering a Table](#altering-a-table) -- [Dropping a Table](#dropping-a-table) - :::info To simplify the creation of database tables, you can specify the default values for some table storage options with the Apache Cloudberry server configuration parameter `gp_default_storage_options`. ::: diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/data-loading/load-data-overview.md b/i18n/zh/docusaurus-plugin-content-docs/current/data-loading/index.md similarity index 100% rename from i18n/zh/docusaurus-plugin-content-docs/current/data-loading/load-data-overview.md rename to i18n/zh/docusaurus-plugin-content-docs/current/data-loading/index.md diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md b/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md index c36a65dddd..b49b483ffb 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/transactional-concurrency-control.md @@ -6,9 +6,11 @@ title: 事务中的并发控制 本文档介绍 Apache Cloudberry 中的事务并发控制,包括: -- [多版本并发控制机制](#多版本并发控制机制) -- [锁模式](#锁模式) -- [全局死锁检测器](#全局死锁检测器) +- [事务中的并发控制](#事务中的并发控制) + - [多版本并发控制机制](#多版本并发控制机制) + - [锁模式](#锁模式) + - [全局死锁检测器](#全局死锁检测器) + - [全局死锁检测器对并发 `UPDATE` 和 `DELETE` 操作的管理](#全局死锁检测器对并发-update-和-delete-操作的管理) ## 多版本并发控制机制 diff --git a/src/consts/bootcamp.tsx b/src/consts/bootcamp.tsx index 378070e6d2..39502ee217 100644 --- a/src/consts/bootcamp.tsx +++ b/src/consts/bootcamp.tsx @@ -23,7 +23,7 @@ let BOOTCAMP_PAGE_CONFIG = { This part contains a series of tutorials for quickly trying out Cloudberry based on the Cloudberry Sandbox. Before starting to read the tutorials, you are expected to finish installing the - single-node Cloudberry Database by following the{" "} + single-node Apache Cloudberry by following the{" "} Cloudberry Sandbox. ), diff --git a/src/pages/bootcamp/102-cbdb-crash-course.md b/src/pages/bootcamp/102-cbdb-crash-course.md index e263961487..982feb6583 100644 --- a/src/pages/bootcamp/102-cbdb-crash-course.md +++ b/src/pages/bootcamp/102-cbdb-crash-course.md @@ -33,7 +33,7 @@ Topics include: ## Lesson 0. Prerequisite -Before starting this crash course, spend some time going through the [Apache Cloudberry Tutorials Based on Single-Node Installation](./#1-cloudberry-sandbox) to get familiar with Apache Cloudberry and how it works. +Before starting this crash course, spend some time going through the [Apache Cloudberry Tutorials Based on Single-Node Installation](./cbdb-sandbox.md) to get familiar with Apache Cloudberry and how it works. ## Lesson 1. Where to read the official documentation diff --git a/src/pages/bootcamp/cbdb-sandbox.md b/src/pages/bootcamp/cbdb-sandbox.md index 39d353ffa4..75937013b3 100644 --- a/src/pages/bootcamp/cbdb-sandbox.md +++ b/src/pages/bootcamp/cbdb-sandbox.md @@ -87,5 +87,5 @@ In addition to using the `docker exec` command, you can also use the `ssh` comma ssh gpadmin@localhost # Password: cbdb@123 ``` -Now you have a Apache Cloudberry and can continue with [101 Apache Cloudberry Tutorials](./#2-101-cloudberry-tourials)! Enjoy! +Now you have a Apache Cloudberry and can continue with [101 Apache Cloudberry Tutorials](./102-cbdb-crash-course.md)! Enjoy! From 1b702f169d9dcec4b24e6e001139b48a4543bfcb Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Tue, 17 Dec 2024 19:21:53 +0800 Subject: [PATCH 3/3] Delete the non-ASF release --- docs/releases/index.md | 9 +- docs/releases/release-1.5.0.md | 178 ------------------ docs/releases/release-1.5.1.md | 38 ---- docs/releases/release-1.5.2.md | 65 ------- docs/releases/release-1.5.3.md | 75 -------- docs/releases/release-1.5.4.md | 53 ------ docs/releases/release-1.6.0.md | 137 -------------- .../current/releases/index.md | 8 + .../current/releases/release-1.5.0.md | 170 ----------------- .../current/releases/release-1.5.1.md | 31 --- .../current/releases/release-1.5.2.md | 59 ------ .../current/releases/release-1.5.3.md | 72 ------- .../current/releases/release-1.5.4.md | 47 ----- .../current/releases/release-1.6.0.md | 131 ------------- sidebars.ts | 8 +- 15 files changed, 16 insertions(+), 1065 deletions(-) delete mode 100644 docs/releases/release-1.5.0.md delete mode 100644 docs/releases/release-1.5.1.md delete mode 100644 docs/releases/release-1.5.2.md delete mode 100644 docs/releases/release-1.5.3.md delete mode 100644 docs/releases/release-1.5.4.md delete mode 100644 docs/releases/release-1.6.0.md create mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/index.md delete mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.0.md delete mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.1.md delete mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.2.md delete mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.3.md delete mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.4.md delete mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.6.0.md diff --git a/docs/releases/index.md b/docs/releases/index.md index 1e9e18fa88..34d745bf31 100644 --- a/docs/releases/index.md +++ b/docs/releases/index.md @@ -1,7 +1,8 @@ # Releases -```mdx-code-block -import DocCardList from '@theme/DocCardList'; +**Note:** - -``` +We are currently preparing for the release under the Apache Incubator. Previous versions were non-Apache releases. If you want to install Apache Cloudberry, you can either: + +* **Manually build Cloudberry from the main branch**: This approach is suitable for developers or users who want to experience the latest features. You can clone the repository and follow the build instructions provided in the documentation. +* **Install the non-Apache release: [v1.6.0](https://github.com/apache/cloudberry/releases/tag/1.6.0)**: This version is stable and ready for production use, offering the rpm packages for both Enterprise Linux 8 and 9 version. \ No newline at end of file diff --git a/docs/releases/release-1.5.0.md b/docs/releases/release-1.5.0.md deleted file mode 100644 index de9fb0d123..0000000000 --- a/docs/releases/release-1.5.0.md +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: 1.5.0 ---- - -:::caution - -This is not an Apache release! - -::: - -# Cloudberry Database v1.5.0 Release Notes - -Version number: v1.5.0 - -Cloudberry Database v1.5.0 adds several new features, and includes several performance/stability optimizations and bug fixes. - -Quick try: [v1.5.0](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.0) - -## New features - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TypeFeature
Query processingSupports creating AO or AOCO tables and refreshing materialized views in parallel.
Supports automatically using materialized views to optimize queries.
Supports deploying a cluster with only a single computing node.
Supports quickly deploying a cluster with only a command.
StorageSupports incremental materialized views.
Supports using unique indexes on AO tables.
SecuritySupports configuring user password policy using Profile.
- -Each new feature is described as follows: - -### Query processing - -- **Supports creating AO or AOCO tables and refreshing materialized views in parallel.** - - Starting from v1.5.0, Cloudberry Database supports creating append-optimized (AO) tables and append-optimized column-oriented (AOCO) tables in parallel by using the `CREATE TABLE AS` statement, and supports refreshing materialized views in parallel based on the AO or AOCO tables. Parallel processing accelerates table creation and materialized view refresh. - - See [Create AO/AOCO Tables and Refresh Materialized Views in Parallel](/docs/parallel-create-ao-refresh-mv.md) for details. - -- **Supports automatically using materialized views to optimize queries.** - - Starting from v1.5.0, Cloudberry Database supports automatically using materialized views to process some or all queries (called AQUMV) during the query planning phase. The applicable scenarios include: - - - Aggregation queries on large data sets: For queries that need to aggregate results from millions of records, AQUMV can significantly reduce query time. - - Frequently updated large tables: In an environment where data is frequently updated, IMV can ensure that the query results are real-time and accurate. - - See [Use Automatic Materialized Views for Query Optimization](/docs/use-auto-materialized-view-to-answer-queries.md) for details. - -### Cluster management - -- **Supports deploying a cluster with only a single computing node.** - - Before v1.5.0, when you deployed a Cloudberry Database cluster, you needed to deploy at least one coordinator node and one segment node, as well as specify numerous configuration information and startup parameters. This process was relatively complex and time-consuming. - - Starting from v1.5.0, you can deploy a single-computing-node Cloudberry Database cluster just like deploying PostgreSQL. The cluster only contains one coordinator node, without the need to specify numerous configuration parameters or pre-allocate computing resources for multiple nodes. - - You can quickly deploy a single-node Cloudberry Database cluster using the tool `gpdemo`. See [Deploy Cloudberry Database with a Single Computing Node](/docs/deploy-cbdb-with-single-node.md) for details. - -- **Supports quickly deploying a cluster with only a command.** - - Before v1.5.0, you needed to spend time writing configuration files and parameters if you wanted to deploy a small cluster with segments on a single node to make a demo. Starting from v1.5.0, to deploy a Cloudberry Database cluster with a specified number of segments, you only need to use the built-in script `gpdemo` with a single command, that is: - - ```bash - gpdemo - ``` - - See [Use gpdemo to Quickly Deploy Cloudberry Database](/docs/sys-utilities/gpdemo.md) for details. - -### Storage - -- **Supports incremental materialized views.** - - The incremental materialized view is a special form of materialized view. When data is inserted, updated, or deleted in a base table in Cloudberry Database, the incremental materialized view does not need to recalculate all the data in the entire view. Instead, it only updates the parts that have been updated since the last refresh. This can save a lot of computing resources and time, and significantly improve performance, especially when dealing with large datasets. - - Starting from v1.5.0, if a query involves intermediary result sets that require acceleration, or in scenarios with many read operations but few write operations, you can use incremental materialized views to speed up the queries. See [Incremental Materialized View in Cloudberry Database](/docs/use-incremental-materialized-view.md) for details. - -- **Supports using unique indexes on AO tables.** - - Starting from v1.5.0, you can create a unique index on an Append-Optimized (AO) or Append-Optimized Column Store (AOCS) table in Cloudberry Database. With a unique index, Cloudberry Database checks the unique constraint when data is inserted into the AO table to ensure the uniqueness of the data. At the same time, the database optimizes specific queries with the characteristic of uniqueness to improve the query performance. - - See [Create Unique Index on AO Table](/docs/use-unique-index-on-ao-tables.md) for details. - -### Security - -- **Supports configuring user password policy using Profile.** - - Profile refers to the password policy configuration, which is used to control the password security policy of users in Cloudberry Database. You can bind a profile to one or more users to control the password security policy of database users. Profile defines the rules for user management and password reuse. With Profile, the database administrator can use SQL to force some constraints, such as locking accounts after login failures or controlling the number of password reuses. - - Starting from v1.5.0, Cloudberry Database supports creating profiles through SQL statements and binding profiles to one or more users, thereby controlling the password policy for database users. - - See [Configure password policy in Cloudberry Database](/docs/set-password-profile.md) for details. - -## Change description - -### SQL syntax changes - -`CREATE MATERIALIZED VIEW` now supports the `INCREMENTAL` option. You can use the SQL command `CREATE INCREMENTAL MATERIALIZED VIEW` to create incremental materialized views. The complete syntax support is as follows: - -```sql -CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name - [ (column_name [, ...] ) ] - [ USING method ] - [ WITH ( storage_parameter [= value] [, ... ] ) ] - [ TABLESPACE tablespace_name ] - AS query - [ WITH [ NO ] DATA ] -``` - -### Functionality changes - -None - -### Parameter changes - -None - -## Bug fixes - -- Fixed an issue of memory overflow in AOCO tables. This bug led to the following error message: - - ```sql - SET default_table_access_method=ao_column; - CREATE temp TABLE nocolumns(); - - SELECT EXISTS(SELECT * FROM nocolumns); - - WARNING: detected write past chunk end in ExecutorState 0x8f79b78 (seg0 slice1 127.0.1.1:7002 pid=16215) - ``` - -- Fixed the alignment issue of `operatorMem` in the output results when viewing query plans using `EXPLAIN`. The display before the fix is as follows: - - ```sql - SET gp_resqueue_print_operator_memory_limits=ON; - EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM test_hj_spill; - - QUERY PLAN - ---------------------------------------------------------------------------- - Finalize AggregateoperatorMem: 100 kB - - -> Gather Motion 3:1 (slice1; segments: 3)operatorMem: 100 kB - - -> Partial AggregateoperatorMem: 100 kB - - -> Seq Scan on test_hj_spilloperatorMem: 100 kB - ``` - -- Fixed an issue causing memory exception in the snapshot under certain conditions, which might lead to a core dump during transaction processing in some scenarios. -- Improved the accuracy of internal table size estimation in parallel hash joins during parallel scan operations. -- Added support for Semi HashJoin types during parallel scans. -- Improved the logic for handling `NOT IN` clauses. Now Cloudberry Database can correctly handle scenarios involving `NULL` values. For example, executing queries like `SELECT c1 FROM t1_lasj WHERE c1 NOT IN (SELECT c1n FROM t2_lasj_has_null WHERE c1n IS NULL OR c1n IS NULL);` can output the correct results. -- Fixed issues encountered when compiling and running Cloudberry Database on macOS. -- Fixed an issue where the user's `search_path` was altered during `CREATE EXTENSION`. diff --git a/docs/releases/release-1.5.1.md b/docs/releases/release-1.5.1.md deleted file mode 100644 index f67e828bb9..0000000000 --- a/docs/releases/release-1.5.1.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: 1.5.1 ---- - -:::caution - -This is not an Apache release! - -::: - -# Cloudberry Database v1.5.1 Release Notes - -Version number: v1.5.1 - -Cloudberry Database v1.5.1 is a minor release that includes a few bug fixes. - -Quick try: [v1.5.1](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.1) - -## Improvements - -* Check the uniqueness of index and skip prefetching for non-heap relations [#337](https://github.com/cloudberrydb/cloudberrydb/pull/337) @[gfphoenix78](https://github.com/gfphoenix78) -* Compute aggregations on materialized views [#322](https://github.com/cloudberrydb/cloudberrydb/pull/322) @[avamingli](https://github.com/avamingli) -* Introduce the `pg_ext_aux` namespace for extensions [#333](https://github.com/cloudberrydb/cloudberrydb/pull/333) @[gfphoenix78](https://github.com/gfphoenix78) -* Implement a DML hook for extensions [#332](https://github.com/cloudberrydb/cloudberrydb/pull/332) @[gfphoenix78](https://github.com/gfphoenix78) -* Support custom object classes [#335](https://github.com/cloudberrydb/cloudberrydb/pull/335) @[gfphoenix78](https://github.com/gfphoenix78) -* Add reloption support for custom table access methods [#336](https://github.com/cloudberrydb/cloudberrydb/pull/336) @[gfphoenix78](https://github.com/gfphoenix78) -* Introduce a callback in `TableAmRoutine` to manage swapping relation files [#338](https://github.com/cloudberrydb/cloudberrydb/pull/338) @[gfphoenix78](https://github.com/gfphoenix78) -* Update terminal information to reflect CloudberryDB [#300](https://github.com/cloudberrydb/cloudberrydb/pull/300) @[tuhaihe](https://github.com/tuhaihe) -* Refactor table AM to include execution context in `scan_begin_extractcolumns` [#329](https://github.com/cloudberrydb/cloudberrydb/pull/329) @[gfphoenix78](https://github.com/gfphoenix78) -* Expose functions to support PAX for partition tables [#328](https://github.com/cloudberrydb/cloudberrydb/pull/328) @[gfphoenix78](https://github.com/gfphoenix78) - - -## Bug fixes - -* Fix an illegal `PGnotify` declaration issue [#325](https://github.com/cloudberrydb/cloudberrydb/pull/325) @[jiaqizho](https://github.com/jiaqizho) -* Fix a potential Use-After-Free (UAF) issue in `get_size_from_segDBs` [#326](https://github.com/cloudberrydb/cloudberrydb/pull/326) @[jiaqizho](https://github.com/jiaqizho) -* Fix the storage name display in `\d` command in psql from `pg_am` [#330](https://github.com/cloudberrydb/cloudberrydb/pull/330) @[gfphoenix78](https://github.com/gfphoenix78) -* Fix issues related to the `pg_ext_aux` namespace [#340](https://github.com/cloudberrydb/cloudberrydb/pull/340) @[gfphoenix78](https://github.com/gfphoenix78) diff --git a/docs/releases/release-1.5.2.md b/docs/releases/release-1.5.2.md deleted file mode 100644 index a20af76ccb..0000000000 --- a/docs/releases/release-1.5.2.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: 1.5.2 ---- - -:::caution - -This is not an Apache release! - -::: - -# Cloudberry Database v1.5.2 Release Notes - -Version number: v1.5.2 - -Cloudberry Database v1.5.2 is a minor release that includes some improvements and bug fixes. - -Quick try: [v1.5.2](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.2) - -Full changelog: [https://github.com/cloudberrydb/cloudberrydb/compare/1.5.1...1.5.2](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.1...1.5.2) - -## Improvements - -- Support `GROUP BY`, `GROUPING SETS`, `ROLLUP`, `CUBE` in origin queries for materialized views by @[avamingli](https://github.com/avamingli) in [#342](https://github.com/cloudberrydb/cloudberrydb/pull/342) -- Use `pg_class` instead of `gp_segment_configuration` to test `Entry` by @[avamingli](https://github.com/avamingli) in [#294](https://github.com/cloudberrydb/cloudberrydb/pull/294) -- The GPORCA optimizer now supports the PAX (Partition Attributes Across) storage table by @[gfphoenix78](https://github.com/gfphoenix78) in [#346](https://github.com/cloudberrydb/cloudberrydb/pull/346) -- Add the `RelationIsNonblockRelation` macro to expand code path like `AO`/`CO` by @[gfphoenix78](https://github.com/gfphoenix78) in [#347](https://github.com/cloudberrydb/cloudberrydb/pull/347) -- Feature encoding options for the custom table access method by @[gfphoenix78](https://github.com/gfphoenix78) in [#343](https://github.com/cloudberrydb/cloudberrydb/pull/343) -- Enable `enable_shared_postgres_backend` by default by @[gfphoenix78](https://github.com/gfphoenix78) in [#351](https://github.com/cloudberrydb/cloudberrydb/pull/351) -- Correct `PlannerInfo` fields after rewritten for materialized views by @[avamingli](https://github.com/avamingli) in [#348](https://github.com/cloudberrydb/cloudberrydb/pull/348) -- Support the `HAVING` clause in origin queries for materialized views by @[avamingli](https://github.com/avamingli) in [#354](https://github.com/cloudberrydb/cloudberrydb/pull/354) -- Avoid misbehaviors that are not supported currently by @[avamingli](https://github.com/avamingli) in [#357](https://github.com/cloudberrydb/cloudberrydb/pull/357) -- Support `ORDER BY` in origin queries for materialized views by @[avamingli](https://github.com/avamingli) in [#358](https://github.com/cloudberrydb/cloudberrydb/pull/358) -- Make `shareinput_Xslice_dsm_handle_ptr` and `shareinput_Xslice_hash` non-static by @[shmiwy](https://github.com/shmiwy) in [#361](https://github.com/cloudberrydb/cloudberrydb/pull/361) -- Revert `ci` in the `upterm` stage to avoid failure by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#371](https://github.com/cloudberrydb/cloudberrydb/pull/371) -- Remove `b` and `\r` in the `gpssh` command output by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#355](https://github.com/cloudberrydb/cloudberrydb/pull/355) -- Do not inherit the parent's reloptions if the child partition's `AM` differs by @[yjhjstz](https://github.com/yjhjstz) in [#375](https://github.com/cloudberrydb/cloudberrydb/pull/375) -- Extend a new table access method to do acquire sample rows by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#374](https://github.com/cloudberrydb/cloudberrydb/pull/374) -- Use materialized views' `TupleDesc` to construct final columns by @[avamingli](https://github.com/avamingli) in [#366](https://github.com/cloudberrydb/cloudberrydb/pull/366) -- Add tests and benchmark in the `interconnect` module by @[jiaqizho](https://github.com/jiaqizho) in [#384](https://github.com/cloudberrydb/cloudberrydb/pull/384) -- Add a new callback `'scan_flags'` for the table access method by @[HuSen8891](https://github.com/HuSen8891) in [#391](https://github.com/cloudberrydb/cloudberrydb/pull/391) -- Export numeric structure and interface to public by @[jiaqizho](https://github.com/jiaqizho) in [#392](https://github.com/cloudberrydb/cloudberrydb/pull/392) -- Move the preloaded `interconnect` to the header file by @[gfphoenix78](https://github.com/gfphoenix78) in [#388](https://github.com/cloudberrydb/cloudberrydb/pull/388) -- Add an inline function `'table_scan_flags'` for table access method to get the flags by @[HuSen8891](https://github.com/HuSen8891) in [#395](https://github.com/cloudberrydb/cloudberrydb/pull/395) -- Add `gpshrink` to support elastic scaling by @[lss602726449](https://github.com/lss602726449) in [#393](https://github.com/cloudberrydb/cloudberrydb/pull/393) -- Revert [#201](https://github.com/cloudberrydb/cloudberrydb/pull/201) partially by @[Ray-Eldath](https://github.com/Ray-Eldath) in [#386](https://github.com/cloudberrydb/cloudberrydb/pull/386) -- Offload the entry root slice to `QE` by @[Ray-Eldath](https://github.com/Ray-Eldath) in [#385](https://github.com/cloudberrydb/cloudberrydb/pull/385) - -## Bug fixes - -- Fix the `AO`/`AOCS` `insertDesc` memory issue by @[avamingli](https://github.com/avamingli) in [#365](https://github.com/cloudberrydb/cloudberrydb/pull/365) -- Fix the issue that `CopyCreateStmtFields` lost the `intoPolicy` field by @[yjhjstz](https://github.com/yjhjstz) in [#372](https://github.com/cloudberrydb/cloudberrydb/pull/372) -- Fix the issue that `configue` is not consistent with `configure.ac` by @[lss602726449](https://github.com/lss602726449) in [#373](https://github.com/cloudberrydb/cloudberrydb/pull/373) -- Fix the unstable `ao`, `vacuum` and `icw` tests by @[jiaqizho](https://github.com/jiaqizho) in [#376](https://github.com/cloudberrydb/cloudberrydb/pull/376) -- Fix the issue that the shell script involves demo cluster by @[gfphoenix78](https://github.com/gfphoenix78) in [#377](https://github.com/cloudberrydb/cloudberrydb/pull/377) -- Fix `CREATE TYPE` in namespace `pg_ext_aux` by @[gfphoenix78](https://github.com/gfphoenix78) in [#380](https://github.com/cloudberrydb/cloudberrydb/pull/380) -- Fix the issue that `parallel_workers` is initialized as `0` for `CdbPathLocus_HashedOJ` by @[HuSen8891](https://github.com/HuSen8891) in [#387](https://github.com/cloudberrydb/cloudberrydb/pull/387) -- Fix the redefined `sm4` in `pgcrypto` and backend/crypto by @[jiaqizho](https://github.com/jiaqizho) in [#394](https://github.com/cloudberrydb/cloudberrydb/pull/394) - -## 🙌🏻️ New contributor - -@[shmiwy](https://github.com/shmiwy) made his (or her) first contribution in [#361](https://github.com/cloudberrydb/cloudberrydb/pull/361). - -## 🧑🏻‍💻 Contributors - -Thanks to all the contributors to make this release happen: @[avamingli](https://github.com/avamingli), @[gfphoenix78](https://github.com/gfphoenix78), @[shmiwy](https://github.com/shmiwy), @[wenchaozhang-123](https://github.com/wenchaozhang-123), @[yjhjstz](https://github.com/yjhjstz), @[lss602726449](https://github.com/lss602726449), @[jiaqizho](https://github.com/jiaqizho), @[HuSen8891](https://github.com/HuSen8891), @[Ray-Eldath](https://github.com/Ray-Eldath) 👍 diff --git a/docs/releases/release-1.5.3.md b/docs/releases/release-1.5.3.md deleted file mode 100644 index f8073e4327..0000000000 --- a/docs/releases/release-1.5.3.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: 1.5.3 ---- - -:::caution - -This is not an Apache release! - -::: - -# Cloudberry Database v1.5.3 Release Notes - -Version number: v1.5.3 - -Cloudberry Database v1.5.3 is a minor release that includes some improvements, bug fixes and doc updates. - -Quick try: [v1.5.3](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.3) - -:::note - -Before running the `./configure` command to [build the Cloudberry Database](/docs/cbdb-linux-compile#step-4-build-cloudberry-database) from the v1.5.3 source code files, make sure to execute the following commands to install the dependencies. - -```bash -yum install -y go -export GOPROXY=https://goproxy.io,direct -``` - -In later versions, we have refactored the relative module using Python (See PR [#435](https://github.com/cloudberrydb/cloudberrydb/pull/435)). If you are building the database from the source code of a later version, you can skip the above commands. -::: - -Full changelog: [https://github.com/cloudberrydb/cloudberrydb/compare/1.5.2...1.5.3](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.2...1.5.3) - -## New feature - -- Directory table - - Starting from v1.5.3, Cloudberry Database supports directory tables for managing multiple types of unstructured data. Developer users can use simple SQL statements to invoke the capabilities of multiple computing engines to achieve one-stop data processing and application development. - - Directory tables store, manage, and analyze unstructured data objects. They reside within tablespaces. When unstructured data files are imported, a directory table record (file metadata) is created, and the file itself is loaded into object storage. The table metadata remains associated with the corresponding object storage file. - - For details, see the [user document](/docs/advanced-analytics/directory-tables.md). - -## Improvements - -- Support `postgres_fdw` in the default build by @[smartyhero](https://github.com/smartyhero) in [#400](https://github.com/cloudberrydb/cloudberrydb/pull/400) -- Support using access method flags to specify the column-oriented scanning on custom tables by @[gongxun0928](https://github.com/gongxun0928) in [407#](https://github.com/cloudberrydb/cloudberrydb/pull/407) -- Add a configuration parameter `gp_random_insert_segments` to control the number of segments used for inserting data into randomly distributed tables by @[foreyes](https://github.com/foreyes) in [#406](https://github.com/cloudberrydb/cloudberrydb/pull/406) -- Implement Directory Table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#390](https://github.com/cloudberrydb/cloudberrydb/pull/390) -- Disable dumping pax tables in `pg_dump` by @[jiaqizho](https://github.com/jiaqizho) in [#412](https://github.com/cloudberrydb/cloudberrydb/pull/412) -- Update the `googletest` module URL by @[tuhaihe](https://github.com/tuhaihe) in [#429](https://github.com/cloudberrydb/cloudberrydb/pull/429) -- Enable privilege check when dropping directory table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#425](https://github.com/cloudberrydb/cloudberrydb/pull/425) - - -## Bug fixes - -- Fix the issue that the outbound data buffer is not enough when calling `EVP_DecryptUpdate` (#479) by @[kongfanshen-0801](https://github.com/kongfanshen-0801) in [#408](https://github.com/cloudberrydb/cloudberrydb/pull/408) -- Fix the issue that `pgrx` cannot find the function `numeric_is_nan` or `numeric_is_inf` after numeric change interface by @[jiaqizho](https://github.com/jiaqizho) in [#410](https://github.com/cloudberrydb/cloudberrydb/pull/410) -- Fix a copy issue for Directory Table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#416](https://github.com/cloudberrydb/cloudberrydb/pull/416) -- Fix visimap consults for unique checks during UPDATEs by @[lss602726449](https://github.com/lss602726449) in [#423](https://github.com/cloudberrydb/cloudberrydb/pull/423) -- Fix some CI pipeline issues for Directory Table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#414](https://github.com/cloudberrydb/cloudberrydb/pull/414) -- Fix the issue that `gpconfig` does not escape the `$` character by @[Ray-Eldath](https://github.com/Ray-Eldath) in [403#](https://github.com/cloudberrydb/cloudberrydb/pull/403) - -## Doc updates - -- Update the `README.md` file by @[tuhaihe](https://github.com/tuhaihe) in [#411](https://github.com/cloudberrydb/cloudberrydb/pull/411) -- Update the deployment `README.md` by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) in [#409](https://github.com/cloudberrydb/cloudberrydb/pull/409) - -## 🙌🏻️ New contributors - -- @[kongfanshen-0801](https://github.com/kongfanshen-0801) made his (or her) first contribution in [#408](https://github.com/cloudberrydb/cloudberrydb/pull/408) -- @[foreyes](https://github.com/foreyes) made his (or her) first contribution in [#406](https://github.com/cloudberrydb/cloudberrydb/pull/406) - -## 🧑🏻‍💻 Contributors - -Thanks to all the contributors to make this release happen: @[smartyhero](https://github.com/smartyhero), @[Ray-Eldath](https://github.com/Ray-Eldath), @[gongxun0928](https://github.com/gongxun0928), @[kongfanshen-0801](https://github.com/kongfanshen-0801), @[foreyes](https://github.com/foreyes), @[tuhaihe](https://github.com/tuhaihe), @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata), @[jiaqizho](https://github.com/jiaqizho), @[wenchaozhang-123](https://github.com/wenchaozhang-123), @[lss602726449](https://github.com/lss602726449), @[soumyadeep2007](https://github.com/soumyadeep2007), @[ashwinstar](https://github.com/ashwinstar) 👍 \ No newline at end of file diff --git a/docs/releases/release-1.5.4.md b/docs/releases/release-1.5.4.md deleted file mode 100644 index 2bcbc01688..0000000000 --- a/docs/releases/release-1.5.4.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: 1.5.4 ---- - -:::caution - -This is not an Apache release! - -::: - -# Cloudberry Database v1.5.4 Release Notes - -Version number: v1.5.4 - -Cloudberry Database v1.5.4 is a minor release that includes some improvements, changes, and bug fixes. - -Quick try: [v1.5.4](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.4) - -Full Changelog: [https://github.com/cloudberrydb/cloudberrydb/compare/1.5.3...1.5.4](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.3...1.5.4) - -## Improvements - -- Add the `cbdb_relation_size` function by [@fanfuxiaoran](https://github.com/fanfuxiaoran) in [#428](https://github.com/cloudberrydb/cloudberrydb/pull/428) -- Cherry-pick the updates from Greenplum Database upstream (02/07/2022-02/28/2022) by [@avamingli](https://github.com/avamingli) in [#432](https://github.com/cloudberrydb/cloudberrydb/pull/432) -- Support the `DISTINCT` clause in origin queries for "Answer Query Using Materialized Views" by [@avamingli](https://github.com/avamingli) in [#439](https://github.com/cloudberrydb/cloudberrydb/pull/439) -- Support the Postgres-specific `DISTINCT ON` clause in origin queries for "Answer Query Using Materialized Views" by [@avamingli](https://github.com/avamingli) in [#441](https://github.com/cloudberrydb/cloudberrydb/pull/441) -- Expand a new external variable tag by [@jiaqizho](https://github.com/jiaqizho) in [#443](https://github.com/cloudberrydb/cloudberrydb/pull/443) -- Expand the pending deletes interface by [@jiaqizho](https://github.com/jiaqizho) in [#442](https://github.com/cloudberrydb/cloudberrydb/pull/442) -- Support the `LIMIT`/`OFFSET`/`FETCH` clause in origin queries for "Answer Query Using Materialized Views" by [@avamingli](https://github.com/avamingli) in [#446](https://github.com/cloudberrydb/cloudberrydb/pull/446) -- Clean up the build tools and guide directories by [@tuhaihe](https://github.com/tuhaihe) in [#445](https://github.com/cloudberrydb/cloudberrydb/pull/445) -- Insert more data to make tuplestore spill in `regress/misc_jiras.sql` by [@fanfuxiaoran](https://github.com/fanfuxiaoran) in [#452](https://github.com/cloudberrydb/cloudberrydb/pull/452) - -## Bug fixes - -- Fix the motion toast error by [@gfphoenix78](https://github.com/gfphoenix78) in [#436](https://github.com/cloudberrydb/cloudberrydb/pull/436) -- Fix the issue of checking password file permissions in `dbconn.py` by [@fanfuxiaoran](https://github.com/fanfuxiaoran) in [#438](https://github.com/cloudberrydb/cloudberrydb/pull/438) -- Fix the ORCA issue of the flaky `Invalid key is inaccessible` fallback (#15147) by [@fanfuxiaoran](https://github.com/fanfuxiaoran) in [#437](https://github.com/cloudberrydb/cloudberrydb/pull/437) -- Fix `explain(locus)` issues by [@avamingli](https://github.com/avamingli) in [#433](https://github.com/cloudberrydb/cloudberrydb/pull/433) -- Use the checkberry `gpcheckperf` series updates to solve the problem that the `gpcheckperf` in Cloudberry Database v1.5.2 does not display disk information by [@August-beaulo](https://github.com/August-beaulo) in [#430](https://github.com/cloudberrydb/cloudberrydb/pull/430) - -## Other changes - -- Remove the `cbload`-related code by [@wenchaozhang-123](https://github.com/wenchaozhang-123) in [#431](https://github.com/cloudberrydb/cloudberrydb/pull/431) -- Refactor `cbload` to `gpdirtableload` using Python by [@wenchaozhang-123](https://github.com/wenchaozhang-123) in [#435](https://github.com/cloudberrydb/cloudberrydb/pull/435) -- Remove the CPP keywords that were used as function or parameter names by [@jiaqizho](https://github.com/jiaqizho) in [#449](https://github.com/cloudberrydb/cloudberrydb/pull/449) - -## 🙌🏻️ New contributor - -[@fanfuxiaoran](https://github.com/fanfuxiaoran) made their first contribution in [#428](https://github.com/cloudberrydb/cloudberrydb/pull/428) - -## 🧑🏻‍💻 Contributor list - -Thanks to all the contributors to make this release happen: [@higuoxing](https://github.com/higuoxing), [@tuhaihe](https://github.com/tuhaihe), [@August-beaulo](https://github.com/August-beaulo), [@avamingli](https://github.com/avamingli), [@piyushc01](https://github.com/piyushc01), [@red1452](https://github.com/red1452), [@edespino](https://github.com/edespino), [@jnihal](https://github.com/jnihal), [@Annu149](https://github.com/Annu149), [@jiaqizho](https://github.com/jiaqizho), [@wenchaozhang-123](https://github.com/wenchaozhang-123), [@dgkimura](https://github.com/dgkimura), [@fanfuxiaoran](https://github.com/fanfuxiaoran), [@gfphoenix78](https://github.com/gfphoenix78), [@HelloYJohn](https://github.com/HelloYJohn), [@adam8157](https://github.com/adam8157), [@xiaoxiaoHe-E](https://github.com/xiaoxiaoHe-E), [@InnerLife0](https://github.com/InnerLife0), [@wuyuhao28](https://github.com/wuyuhao28), [@l-wang](https://github.com/l-wang), [@lij55](https://github.com/lij55), [@huansong](https://github.com/huansong), [@chrishajas](https://github.com/chrishajas), [@tglsfdc](https://github.com/tglsfdc), [@fairyfar](https://github.com/fairyfar), [@kainwen](https://github.com/kainwen), [@nmisch](https://github.com/nmisch), [@my-ship-it](https://github.com/my-ship-it) \ No newline at end of file diff --git a/docs/releases/release-1.6.0.md b/docs/releases/release-1.6.0.md deleted file mode 100644 index 83dd36fea1..0000000000 --- a/docs/releases/release-1.6.0.md +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: 1.6.0 ---- - -:::caution - -This is not an Apache release! - -::: - -# Cloudberry Database v1.6.0 Release Notes - -Version number: v1.6.0 - -Cloudberry Database v1.6.0 is a minor release that includes some improvements, changes, and bug fixes. - -Quick try: [v1.6.0](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.6.0) - -Full Changelog: [https://github.com/cloudberrydb/cloudberrydb/compare/1.5.4...1.6.0](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.4...1.6.0) - -## Improvements and modifications - -:::tip -In the following notes, "AQUMV" refers to the "Answer Query Using Materialized Views" feature. See [Use Automatic Materialized Views for Query Optimization](/docs/performance/use-auto-materialized-view-to-answer-queries.md) for details. -::: - -- Add `warehouse_id` to `pg_stat_activity_extended` by @[lss602726449](https://github.com/lss602726449) in [#453](https://github.com/cloudberrydb/cloudberrydb/pull/453) -- Modify the ORCA optimizer's processing of `UNION ALL` distribution strategy by @[Light-City](https://github.com/Light-City) in [#399](https://github.com/cloudberrydb/cloudberrydb/pull/399) -- Update the location of `python-dependencies.txt` by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) in [#460](https://github.com/cloudberrydb/cloudberrydb/pull/460) -- Add hook for `exec_simple_query` and support custom spilling memory threshold in the `cost` module by @[songdongxiaoa2](https://github.com/songdongxiaoa2) in [#447](https://github.com/cloudberrydb/cloudberrydb/pull/447) -- Use `contain_var_clause` instead of `pull_var_clause` in AQUMV by @[avamingli](https://github.com/avamingli) in [#451](https://github.com/cloudberrydb/cloudberrydb/pull/451) -- Add `matchignore` and remove meta-command username output for Directory Table by @[edespino](https://github.com/edespino) in [#464](https://github.com/cloudberrydb/cloudberrydb/pull/464) -- Add hook to support different methods to create, drop, or alter warehouses by @[lss602726449](https://github.com/lss602726449) in [#462](https://github.com/cloudberrydb/cloudberrydb/pull/462) -- Re-use index after `ALTER COLUMN TYPE` shouldn't change `relfilenode` by @[lss602726449](https://github.com/lss602726449) in [#474](https://github.com/cloudberrydb/cloudberrydb/pull/474) -- Cherry-pick Resgroup-related code from Greenplum from Mar 2, 2022 to Feb 7, 2023 by @[foreyes](https://github.com/foreyes) in [#448](https://github.com/cloudberrydb/cloudberrydb/pull/448) -- Add a weekly build and release process by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) in [#459](https://github.com/cloudberrydb/cloudberrydb/pull/459) -- Consider mutable functions and bypass expressions with no `Vars` for view query targets in AQUMV by @[avamingli](https://github.com/avamingli) in [#455](https://github.com/cloudberrydb/cloudberrydb/pull/455) -- Expose the function to adjust view query and `varno` fix in AQUMV by @[avamingli](https://github.com/avamingli) in [#469](https://github.com/cloudberrydb/cloudberrydb/pull/469) -- Modify weekly build release details by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) in [#477](https://github.com/cloudberrydb/cloudberrydb/pull/477) -- Call the `query_info_collect_hook` function directly if an exception occurs by @[foreyes](https://github.com/foreyes) in [#481](https://github.com/cloudberrydb/cloudberrydb/pull/481) -- Cherry-pick: Work around a spurious compiler warning in `inet` operators by @[gfphoenix78](https://github.com/gfphoenix78) in [#499](https://github.com/cloudberrydb/cloudberrydb/pull/499) -- Add LoongArch (`loongarch64`) support to `s_lock.h` by @[wangzw](https://github.com/wangzw) in [#500](https://github.com/cloudberrydb/cloudberrydb/pull/500) -- Implement features of the directory table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#484](https://github.com/cloudberrydb/cloudberrydb/pull/484) -- Re-enable the external FTS ICW by @[jiaqizho](https://github.com/jiaqizho) in [#483](https://github.com/cloudberrydb/cloudberrydb/pull/483) -- Change `AOCO_Compression` test case to validate `pg_relation_size` and `get_ao_compression_ratio` within a ±10% expected range by @[congxuebin](https://github.com/congxuebin) in [#493](https://github.com/cloudberrydb/cloudberrydb/pull/493) -- Maintain materialized view data status by @[avamingli](https://github.com/avamingli) in [#501](https://github.com/cloudberrydb/cloudberrydb/pull/501) -- Define `EXT_OID_START` to suggest an OID range for extensions by @[avamingli](https://github.com/avamingli) in [#514](https://github.com/cloudberrydb/cloudberrydb/pull/514) -- Ignore `pg_upgrade` to resolve CI issues by @[avamingli](https://github.com/avamingli) in [#515](https://github.com/cloudberrydb/cloudberrydb/pull/515) -- Reduce flakiness in `fts_segment_reset` test by @[jiaqizho](https://github.com/jiaqizho) in [#518](https://github.com/cloudberrydb/cloudberrydb/pull/518) -- Stabilize `gp_dqa` test case by @[congxuebin](https://github.com/congxuebin) in [#521](https://github.com/cloudberrydb/cloudberrydb/pull/521) -- Doc: Add more tools documentation for Cloudberry Database by @[tuhaihe](https://github.com/tuhaihe) in [#523](https://github.com/cloudberrydb/cloudberrydb/pull/523) -- Reimplement `COPY FROM` for directory table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#527](https://github.com/cloudberrydb/cloudberrydb/pull/527) -- Add materialized view-related trigger cases in SingleNode mode by @[avamingli](https://github.com/avamingli) in [#517](https://github.com/cloudberrydb/cloudberrydb/pull/517) -- Refactor view query target list processing in AQUMV by @[avamingli](https://github.com/avamingli) in [#525](https://github.com/cloudberrydb/cloudberrydb/pull/525) -- Implement the tagging feature by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#444](https://github.com/cloudberrydb/cloudberrydb/pull/444) -- Update `orafce` to version 4.9 and enhance it by @[foreyes](https://github.com/foreyes) in [#524](https://github.com/cloudberrydb/cloudberrydb/pull/524) -- Allow normal materialized views to answer queries in AQUMV by @[avamingli](https://github.com/avamingli) in [#528](https://github.com/cloudberrydb/cloudberrydb/pull/528) -- Support `COPY FROM` for directory tables with entry distribution policy by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#533](https://github.com/cloudberrydb/cloudberrydb/pull/533) -- Minor improvements to `README.md` by @[reshke](https://github.com/reshke) in [#534](https://github.com/cloudberrydb/cloudberrydb/pull/534) -- Use `FDW` to query multiple servers as shards by @[oppenheimer01](https://github.com/oppenheimer01) in [#320](https://github.com/cloudberrydb/cloudberrydb/pull/320) -- Add an option in `psql` to avoid encoding issues on certain platforms by @[gfphoenix78](https://github.com/gfphoenix78) in [#535](https://github.com/cloudberrydb/cloudberrydb/pull/535) -- Refactor `cbdb_log` to use `vfprintf` by @[ruhuang2001](https://github.com/ruhuang2001) in [#506](https://github.com/cloudberrydb/cloudberrydb/pull/506) -- Update `aocsam.c`: Fix `safeFSWriteSize` argument type by @[reshke](https://github.com/reshke) in [#540](https://github.com/cloudberrydb/cloudberrydb/pull/540) -- Update the CI image to `RockyLinux8` to ensure CI proper operations by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) in [#556](https://github.com/cloudberrydb/cloudberrydb/pull/556) -- Remove the unsupported `AC_FUNC_FSEEKO` macro by @[gfphoenix78](https://github.com/gfphoenix78) in [#543](https://github.com/cloudberrydb/cloudberrydb/pull/543) -- Adjust test cases for the `cloudberrydb` binary swap tests by @[congxuebin](https://github.com/congxuebin) in [#537](https://github.com/cloudberrydb/cloudberrydb/pull/537) -- Implement `CREATE FOREIGN TABLE LIKE` by @[avamingli](https://github.com/avamingli) in [#554](https://github.com/cloudberrydb/cloudberrydb/pull/554) -- Enable `SingleQE` join with `SegmentGeneralWorkers` by @[avamingli](https://github.com/avamingli) in [#327](https://github.com/cloudberrydb/cloudberrydb/pull/327) -- Use `syscache` lookup in `ShouldUseReservedSegno` by @[gongxun0928](https://github.com/gongxun0928) in [#541](https://github.com/cloudberrydb/cloudberrydb/pull/541) -- Implement `READ_STRING_FIELD_NULL` serializable read function by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#553](https://github.com/cloudberrydb/cloudberrydb/pull/553) -- Update `appendonlywriter.c` to fix debug messages by @[reshke](https://github.com/reshke) in [#564](https://github.com/cloudberrydb/cloudberrydb/pull/564) -- Support locking directory tables by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#572](https://github.com/cloudberrydb/cloudberrydb/pull/572) -- Update `ALTER TABLE` help command content by @[EcaleD](https://github.com/EcaleD) in [#574](https://github.com/cloudberrydb/cloudberrydb/pull/574) -- Cherry-pick `Resgroup V2` and toolkit from Greenplum Database by @[jiaqizho](https://github.com/jiaqizho) in [#531](https://github.com/cloudberrydb/cloudberrydb/pull/531) -- Fallback to PostgreSQL optimizer for ORCA when NL inner plan is index scan by @[gfphoenix78](https://github.com/gfphoenix78) in [#565](https://github.com/cloudberrydb/cloudberrydb/pull/565) -- Add `FIXME` for redundant parameter in `ivm_visible_in_prestate` call by @[reshke](https://github.com/reshke) in [#578](https://github.com/cloudberrydb/cloudberrydb/pull/578) -- Support `DISTRIBUTED BY` clause specification when creating materialized views with the `IF NOT EXISTS` clause by @[reshke](https://github.com/reshke) in [#563](https://github.com/cloudberrydb/cloudberrydb/pull/563) -- Cherry-pick commits related to the recent `Resgroup V2` merge by @[reshke](https://github.com/reshke) in [#579](https://github.com/cloudberrydb/cloudberrydb/pull/579) -- Change temporary table names used for IMMV to less frequent sequences by @[reshke](https://github.com/reshke) in [#581](https://github.com/cloudberrydb/cloudberrydb/pull/581) -- Rephrase comments for deferred IVM case by @[reshke](https://github.com/reshke) in [#576](https://github.com/cloudberrydb/cloudberrydb/pull/576) -- Enable `fsync=on` by default in `gpAux/gpdemo/gpdemo-defaults.sh` by @[yjhjstz](https://github.com/yjhjstz) in [#585](https://github.com/cloudberrydb/cloudberrydb/pull/585) -- Forbid inherited tables from storing in `gp_matview_aux` by @[avamingli](https://github.com/avamingli) in [#587](https://github.com/cloudberrydb/cloudberrydb/pull/587) -- Check that relations have children when performing view matching in AQUMV by @[avamingli](https://github.com/avamingli) in [#577](https://github.com/cloudberrydb/cloudberrydb/pull/577) -- Update `check.c` to fix incorrect references to Cloudberry Database by @[reshke](https://github.com/reshke) in [#600](https://github.com/cloudberrydb/cloudberrydb/pull/600) -- Send rows in binary mode for `ANALYZE` by @[Light-City](https://github.com/Light-City) in [#601](https://github.com/cloudberrydb/cloudberrydb/pull/601) -- Enable `Resgroup` test cases in CI by @[jiaqizho](https://github.com/jiaqizho) in [#539](https://github.com/cloudberrydb/cloudberrydb/pull/539) -- Remove the `cbdb` weekly and release workflows by @[edespino](https://github.com/edespino) in [#615](https://github.com/cloudberrydb/cloudberrydb/pull/615) -- Free `tupleDesc` on commit or abort transaction by @[yjhjstz](https://github.com/yjhjstz) in [#551](https://github.com/cloudberrydb/cloudberrydb/pull/551) -- Replace `gpscp` with `gpsync` in external FTS by @[jiaqizho](https://github.com/jiaqizho) in [#470](https://github.com/cloudberrydb/cloudberrydb/pull/470) -- Add `ao_unique_index_build` test in `greenplum_schedule` by @[lss602726449](https://github.com/lss602726449) in [#562](https://github.com/cloudberrydb/cloudberrydb/pull/562) -- Avoid executing the `qual` clause twice by @[jiaqizho](https://github.com/jiaqizho) in [#396](https://github.com/cloudberrydb/cloudberrydb/pull/396) - -## Bug fixes - -- Fix the compile error caused by redefinition of `pipe` by @[gfphoenix78](https://github.com/gfphoenix78) in [#349](https://github.com/cloudberrydb/cloudberrydb/pull/349) -- Fix the issue with recording `password_history` when the role is not allowed to use profile by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#480](https://github.com/cloudberrydb/cloudberrydb/pull/480) -- Resolve the inconsistent result issue in `gpdtm_plpgsql` test case by @[congxuebin](https://github.com/congxuebin) in [#491](https://github.com/cloudberrydb/cloudberrydb/pull/491) -- Fix the issue of Cloudberry Database CI not running properly by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) in [#497](https://github.com/cloudberrydb/cloudberrydb/pull/497) -- Fix the incorrect display of copy number when using `COPY TO` on a replicated table by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#498](https://github.com/cloudberrydb/cloudberrydb/pull/498) -- Fix the memory block size issue in `bitmapinsert` by @[gfphoenix78](https://github.com/gfphoenix78) in [#495](https://github.com/cloudberrydb/cloudberrydb/pull/495) -- Fix the issue of ignoring direct table test files by @[avamingli](https://github.com/avamingli) in [#502](https://github.com/cloudberrydb/cloudberrydb/pull/502) -- Fix `gpinitsystem` issues by @[fanfuxiaoran](https://github.com/fanfuxiaoran) in [#490](https://github.com/cloudberrydb/cloudberrydb/pull/490) -- Fix compile errors detected by GCC 12 by @[gfphoenix78](https://github.com/gfphoenix78) in [#503](https://github.com/cloudberrydb/cloudberrydb/pull/503) -- Fix the `bsearch` compare function in `guc.c` by @[gfphoenix78](https://github.com/gfphoenix78) in [#507](https://github.com/cloudberrydb/cloudberrydb/pull/507) -- Fix commands that forgot to mark meta track by @[avamingli](https://github.com/avamingli) in [#505](https://github.com/cloudberrydb/cloudberrydb/pull/505) -- Fix compile error in C++20 by @[gfphoenix78](https://github.com/gfphoenix78) in [#510](https://github.com/cloudberrydb/cloudberrydb/pull/510) -- Fix the issue that `COPY TO` on directory table always returns `1` by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#522](https://github.com/cloudberrydb/cloudberrydb/pull/522) -- Fix `segfilecount` of AO/AOCO during bulk insertion using `COPY` by @[avamingli](https://github.com/avamingli) in [#530](https://github.com/cloudberrydb/cloudberrydb/pull/530) -- Fix the crash of `COPY FROM` on AO/AOCO/PAX partitioned tables by @[avamingli](https://github.com/avamingli) in [#549](https://github.com/cloudberrydb/cloudberrydb/pull/549) -- Fix the issue that occurs when copying some directory tables by @[wenchaozhang-123](https://github.com/wenchaozhang-123) in [#550](https://github.com/cloudberrydb/cloudberrydb/pull/550) -- Fix bugs with base relation truncation for IMMV by @[reshke](https://github.com/reshke) in [#570](https://github.com/cloudberrydb/cloudberrydb/pull/570) -- Fix the compile-time error in `SparseData.h` by @[reshke](https://github.com/reshke) in [#566](https://github.com/cloudberrydb/cloudberrydb/pull/566) -- Fix `pxf_fragment.c` compilation failure by @[Terry1504](https://github.com/Terry1504) in [#590](https://github.com/cloudberrydb/cloudberrydb/pull/590) -- Fix `pg_upgrade` version parsing when upgrading from Greenplum by @[reshke](https://github.com/reshke) in [#599](https://github.com/cloudberrydb/cloudberrydb/pull/599) -- Fix serialization of expression `AEXPR_NOT_DISTINCT` by @[avamingli](https://github.com/avamingli) in [#598](https://github.com/cloudberrydb/cloudberrydb/pull/598) -- Fix writable rules on tables with related materialized views by @[avamingli](https://github.com/avamingli) in [#584](https://github.com/cloudberrydb/cloudberrydb/pull/584) -- Fix the issue with writable CTEs causing incorrect materialized view data status by @[avamingli](https://github.com/avamingli) in [#602](https://github.com/cloudberrydb/cloudberrydb/pull/602) -- Fix the issue of being unable to pull up equivalence class using the projected target list by @[yjhjstz](https://github.com/yjhjstz) in [#606](https://github.com/cloudberrydb/cloudberrydb/pull/606) - -## 🙌🏻️ New contributors - -- @[Light-City](https://github.com/Light-City) made their first contribution in [#399](https://github.com/cloudberrydb/cloudberrydb/pull/399) -- @[songdongxiaoa2](https://github.com/songdongxiaoa2) made their first contribution in [#447](https://github.com/cloudberrydb/cloudberrydb/pull/447) -- @[edespino](https://github.com/edespino) made their first contribution in [#464](https://github.com/cloudberrydb/cloudberrydb/pull/464) -- @[congxuebin](https://github.com/congxuebin) made their first contribution in [#491](https://github.com/cloudberrydb/cloudberrydb/pull/491) -- @[wangzw](https://github.com/wangzw) made their first contribution in [#500](https://github.com/cloudberrydb/cloudberrydb/pull/500) -- @[reshke](https://github.com/reshke) made their first contribution in [#534](https://github.com/cloudberrydb/cloudberrydb/pull/534) -- @[oppenheimer01](https://github.com/oppenheimer01) made their first contribution in [#320](https://github.com/cloudberrydb/cloudberrydb/pull/320) -- @[ruhuang2001](https://github.com/ruhuang2001) made their first contribution in [#506](https://github.com/cloudberrydb/cloudberrydb/pull/506) -- @[EcaleD](https://github.com/EcaleD) made their first contribution in [#574](https://github.com/cloudberrydb/cloudberrydb/pull/574) -- @[Terry1504](https://github.com/Terry1504) made their first contribution in [#590](https://github.com/cloudberrydb/cloudberrydb/pull/590) - -## 🧑🏻‍💻 Contributors - -Thanks to all the contributors to make this release happen: @[Aegeaner](https://github.com/Aegeaner), @[EcaleD](https://github.com/EcaleD), @[Light-City](https://github.com/Light-City), @[RMTT](https://github.com/RMTT), @[SmartKeyerror](https://github.com/SmartKeyerror), @[Tao-T](https://github.com/Tao-T), @[Terry1504](https://github.com/Terry1504), @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata), @[adam8157](https://github.com/adam8157), @[airfan1994](https://github.com/airfan1994), @[andr-sokolov](https://github.com/andr-sokolov), @[ashwinstar](https://github.com/ashwinstar), @[avamingli](https://github.com/avamingli), @[beeender](https://github.com/beeender), @[bmdoil](https://github.com/bmdoil), @[charliettxx](https://github.com/charliettxx), @[congxuebin](https://github.com/congxuebin), @[dgkimura](https://github.com/dgkimura), @[dh-cloud](https://github.com/dh-cloud), @[divyeshddv](https://github.com/divyeshddv), @[dreamedcheng](https://github.com/dreamedcheng), @[edespino](https://github.com/edespino), @[eespino](https://github.com/eespino), @[fairyfar](https://github.com/fairyfar), @[fanfuxiaoran](https://github.com/fanfuxiaoran), @[foreyes](https://github.com/foreyes), @[gfphoenix78](https://github.com/gfphoenix78), @[gongxun0928](https://github.com/gongxun0928), @[gpopt](https://github.com/gpopt), @[higuoxing](https://github.com/higuoxing), @[huansong](https://github.com/huansong), @[hyongtao-db](https://github.com/hyongtao-db), @[jchampio](https://github.com/jchampio), @[jiaqizho](https://github.com/jiaqizho), @[jimmyyih](https://github.com/jimmyyih), @[kainwen](https://github.com/kainwen), @[l-wang](https://github.com/l-wang), @[lss602726449](https://github.com/lss602726449), @[oppenheimer01](https://github.com/oppenheimer01), @[reshke](https://github.com/reshke), @[ruhuang2001](https://github.com/ruhuang2001), @[songdongxiaoa2](https://github.com/songdongxiaoa2), @[soumyadeep2007](https://github.com/soumyadeep2007), @[thedanhoffman](https://github.com/thedanhoffman), @[tuhaihe](https://github.com/tuhaihe), @[wangzw](https://github.com/wangzw), @[wenchaozhang-123](https://github.com/wenchaozhang-123), @[yanwr1](https://github.com/yanwr1), @[yaowangm](https://github.com/yaowangm), @[yjhjstz](https://github.com/yjhjstz), @[zhrt123](https://github.com/zhrt123), @[zxuejing](https://github.com/zxuejing) - -🧂🧪 Try out Cloudberry Database via building [one Docker-based Sandbox](https://github.com/cloudberrydb/bootcamp), which is tailored to help you gain a basic understanding of Cloudberry Database's capabilities and features a range of materials, including tutorials, sample code, and crash courses. diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/index.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/index.md new file mode 100644 index 0000000000..34d745bf31 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/current/releases/index.md @@ -0,0 +1,8 @@ +# Releases + +**Note:** + +We are currently preparing for the release under the Apache Incubator. Previous versions were non-Apache releases. If you want to install Apache Cloudberry, you can either: + +* **Manually build Cloudberry from the main branch**: This approach is suitable for developers or users who want to experience the latest features. You can clone the repository and follow the build instructions provided in the documentation. +* **Install the non-Apache release: [v1.6.0](https://github.com/apache/cloudberry/releases/tag/1.6.0)**: This version is stable and ready for production use, offering the rpm packages for both Enterprise Linux 8 and 9 version. \ No newline at end of file diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.0.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.0.md deleted file mode 100644 index 85c802bdd6..0000000000 --- a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.0.md +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: 1.5.0 ---- - -# Cloudberry Database v1.5.0 发版说明 - -版本号:v1.5.0 - -Cloudberry Database v1.5.0 新增了若干功能,包含了若干产品性能/稳定性优化,修复了若干错误 (bug)。 - -快速试用:[v1.5.0](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.0) - -## 新功能 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
分类功能
查询处理支持并行创建 AO/AOCO 表和并行刷新物化视图
支持自动使用物化视图进行查询优化
支持部署单计算节点的集群
支持使用命令行“一键”部署集群
存储支持增量物化视图
支持在 AO 表上使用唯一索引
安全支持登录错误输入密码次数检查
- -各个新功能的详细说明如下: - -### 查询处理 - -- **支持并行创建 AO/AOCO 表和并行刷新物化视图。** - - 支持使用 `CREATE TABLE AS` 语句并行创建 Append-Optimized (AO) 表和 Append-Optimized Column Oriented (AOCO) 表,同时支持并行刷新基于该表的物化视图,从而加速建表和物化视图刷新。 - - 详情参见文档[并行创建 AO/AOCO 表与刷新物化视图](/i18n/zh/docusaurus-plugin-content-docs/current/parallel-create-ao-refresh-mv.md)。 - -- **支持自动使用物化视图进行查询优化**,即在查询规划阶段自动使用物化视图来计算部分或全部查询 (AQUMV)。这一功能使用场景: - - - 大数据量的聚合查询:对于需要从数百万条记录中进行聚合的查询,AQUMV 能显著减少查询时间。 - - 频繁更新的大表:在数据频繁更新的环境中,使用 IMV 可以确保查询结果的实时性和准确性。 - - 详情参见文档[使用自动物化视图进行查询优化](/i18n/zh/docusaurus-plugin-content-docs/current/use-auto-materialized-view-to-answer-queries.md)。 - -### 集群管理 - -- **支持以单计算节点部署集群。** - - 在 v1.5.0 以前,用户部署一套 Cloudberry Database 集群时,至少需要部署一个 Coordinator 节点和一个 Segment 节点,还需要指定繁多的配置信息和启动参数,这一过程相对复杂耗时。 - - 从 v1.5.0 开始,用户可以像部署 PostgreSQL 一样部署单计算节点的 Cloudberry Database 集群,集群仅包含一个 Coordinator 节点,无需指定繁多的配置参数,也无需预分配多个节点的计算资源。 - - 用户只需使用脚本工具 `gpdemo` 即可快速部署一套单节点的 Cloudberry Database 集群。详情参见文档[部署单计算节点的 Cloudberry Database 集群](/i18n/zh/docusaurus-plugin-content-docs/current/deploy-cbdb-with-single-node.md)。 - -- **支持使用命令行工具“一键”部署集群。** - - 在 v1.5.0 以前,如果用户想在单节点上部署带有 Segment 的小型集群用于演示,需要花费时间编写配置文件和参数。自 v1.5.0 起,用户只需要通过内置的 `gpdemo` 脚本,使用一条命令就能快速部署指定 Segment 数的 Cloudberry Database 集群。即: - - ```bash - gpdemo - ``` - - 详情参见文档 [gpdemo 快捷部署使用文档](/i18n/zh/docusaurus-plugin-content-docs/current/sys-utilities/gpdemo.md)。 - -### 存储 - -- **支持增量物化视图。** - - 增量物化视图是物化视图的一种特殊形式。当数据在基础表中发生变化时(例如插入、更新、删除操作),增量物化视图不需要重新计算整个视图中的所有数据。相反,它只更新那些自上次刷新以来发生变化的部分。这样可以节省大量的计算资源和时间,显著提高性能,尤其是在处理大型数据集时。 - - 自 v1.5.0 起,如果查询时有中间结果集需要加速,或者在读多写少的场景下,用户可以使用增量物化视图来加速查询,详情参见文档[增量物化视图说明文档](/i18n/zh/docusaurus-plugin-content-docs/current/use-incremental-materialized-view.md)。 - -- **在 AO 表上使用唯一索引。** - - 自 v1.5.0 起,你可以在 Cloudberry Database 的 Append-Optimized (AO) 或 Append-Optimized Column Store (AOCS) 表上添加唯一索引。有了唯一索引,Cloudberry Database 会在将数据插入到 AO 表时,强制检查唯一性约束,从而保证数据的唯一性,同时能够与优化器一起优化特定的查询,从而提高数据库的查询性能。但这也带来的一定的开销用于维护唯一索引,尤其是在插入数据时。 - - 详情参见文档[在 AO 表上使用唯一索引](/i18n/zh/docusaurus-plugin-content-docs/current/use-unique-index-on-ao-tables.md)。 - -### 安全 - -- **支持创建和绑定数据库的用户密码策略配置。** - - Profile,即密码策略配置,用于控制数据库中用户的密码安全策略。Profile 定义了用户管理和重复使用密码的规则。通过配置 Profile,数据库管理员可以使用 SQL 语句强制添加一些约束,例如在一定次数的登录失败后锁定账户,或者控制密码重复使用次数。 - - 自 v1.5.0 起,Cloudberry Database 支持通过 SQL 语句创建 Profile,并将 Profile 绑定到一个或多个用户中,从而控制数据库用户的密码安全策略。 - - 详情参见文档[在 Cloudberry Database 中配置密码策略](/i18n/zh/docusaurus-plugin-content-docs/current/set-password-profile.md)。 - -## 变更说明 - -### SQL 语法变更说明 - -`CREATE MATERIALIZED VIEW` 新支持 `INCREMENTAL` 选项。你可以使用 SQL 命令 `CREATE INCREMENTAL MATERIALIZED VIEW` 来创建增量物化视图。完整的语法支持如下: - -```sql -CREATE [INCREMENTAL] MATERIALIZED VIEW [ IF NOT EXISTS ] table_name - [ (column_name [, ...] ) ] - [ USING method ] - [ WITH ( storage_parameter [= value] [, ... ] ) ] - [ TABLESPACE tablespace_name ] - AS query - [ WITH [ NO ] DATA ] -``` - -### 功能变更说明 - -无 - -### 参数变更说明 - -无 - -## Bug 修复 - -- 修复了 AOCO 表内存越界的问题。该 Bug 导致的报错如下所示: - - ```sql - SET default_table_access_method=ao_column; - CREATE temp TABLE nocolumns(); - - SELECT EXISTS(SELECT * FROM nocolumns); - - WARNING: detected write past chunk end in ExecutorState 0x8f79b78 (seg0 slice1 127.0.1.1:7002 pid=16215) - ``` - -- 修复了使用 `EXPLAIN` 查看查询计划时,输出结果中的 `operatorMem` 对齐问题,修复前显示如下: - - ```sql - SET gp_resqueue_print_operator_memory_limits=ON; - EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM test_hj_spill; - - QUERY PLAN - ---------------------------------------------------------------------------- - Finalize AggregateoperatorMem: 100 kB - - -> Gather Motion 3:1 (slice1; segments: 3)operatorMem: 100 kB - - -> Partial AggregateoperatorMem: 100 kB - - -> Seq Scan on test_hj_spilloperatorMem: 100 kB - ``` - -- 修复了在特定条件下导致快照功能内存异常的问题,这个问题可能会在某些情况下使事务处理过程中发生 core dump。 -- 改进了并行扫描操作时,并行哈希连接中内部表格大小的估算精度。 -- 并行扫描时新增对 Semi HashJoin 类型的支持。 -- 改进了 `NOT IN` 语句的处理逻辑,现在它可以正确地处理包含 `NULL` 值的情况。例如,在执行像 `SELECT c1 FROM t1_lasj WHERE c1 NOT IN (SELECT c1n FROM t2_lasj_has_null WHERE c1n IS NULL OR c1n IS NULL);` 这样的查询时,将得到正确的结果。 -- 修复了在 macOS 上编译运行遇到的问题。 -- 修复了 `CREATE EXTENSION` 时,用户的 `search_path` 发生变更的问题。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.1.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.1.md deleted file mode 100644 index 8bcd56b500..0000000000 --- a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.1.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: 1.5.1 ---- - -# Cloudberry Database v1.5.1 发版说明 - -版本号:v1.5.1 - -Cloudberry Database v1.5.1 是一个小版本,包含了一些 bug 修复。 - -快速试用:[v1.5.1](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.1) - -## 提升改进 - -* 检查索引的唯一性并对非堆关系跳过预取 [#337](https://github.com/cloudberrydb/cloudberrydb/pull/337) @[gfphoenix78](https://github.com/gfphoenix78) -* 在物化视图上计算聚合 [#322](https://github.com/cloudberrydb/cloudberrydb/pull/322) @[avamingli](https://github.com/avamingli) -* 引入 `pg_ext_aux` 命名空间用于扩展 [#333](https://github.com/cloudberrydb/cloudberrydb/pull/333) @[gfphoenix78](https://github.com/gfphoenix78) -* 为扩展实现 DML hook [#332](https://github.com/cloudberrydb/cloudberrydb/pull/332) @[gfphoenix78](https://github.com/gfphoenix78) -* 支持自定义对象类 [#335](https://github.com/cloudberrydb/cloudberrydb/pull/335) @[gfphoenix78](https://github.com/gfphoenix78) -* 为自定义表访问方法添加 reloption 支持 [#336](https://github.com/cloudberrydb/cloudberrydb/pull/336) @[gfphoenix78](https://github.com/gfphoenix78) -* 在 `TableAmRoutine` 中引入回调以管理交换关系文件 [#338](https://github.com/cloudberrydb/cloudberrydb/pull/338) @[gfphoenix78](https://github.com/gfphoenix78) -* 更新终端中与 CloudberryDB 相关的字段显示信息 [#300](https://github.com/cloudberrydb/cloudberrydb/pull/300) @[tuhaihe](https://github.com/tuhaihe) -* 重构表 AM,以在 `scan_begin_extractcolumns` 中包含执行上下文 [#329](https://github.com/cloudberrydb/cloudberrydb/pull/329) @[gfphoenix78](https://github.com/gfphoenix78) -* 公开函数以支持分区表的 PAX [#328](https://github.com/cloudberrydb/cloudberrydb/pull/328) @[gfphoenix78](https://github.com/gfphoenix78) - -## Bug 修复 - -* 修复非法 `PGnotify` 声明的问题 [#325](https://github.com/cloudberrydb/cloudberrydb/pull/325) @[jiaqizho](https://github.com/jiaqizho) -* 修复 `get_size_from_segDBs` 中可能的使用后释放 (UAF) 问题 [#326](https://github.com/cloudberrydb/cloudberrydb/pull/326) @[jiaqizho](https://github.com/jiaqizho) -* 修正 psql 中 `\d` 命令从 `pg_am` 显示存储名称的问题 [#330](https://github.com/cloudberrydb/cloudberrydb/pull/330) @[gfphoenix78](https://github.com/gfphoenix78) -* 修复与 `pg_ext_aux` 命名空间相关的问题 [#340](https://github.com/cloudberrydb/cloudberrydb/pull/340) @[gfphoenix78](https://github.com/gfphoenix78) diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.2.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.2.md deleted file mode 100644 index 9312335c3a..0000000000 --- a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.2.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: 1.5.2 ---- - -# Cloudberry Database v1.5.2 发版说明 - -版本号:v1.5.2 - -Cloudberry Database v1.5.2 是一个小版本,包含了一些提升改进和 bug 修复。 - -快速试用:[v1.5.2](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.2) - -完整的变更日志:[https://github.com/cloudberrydb/cloudberrydb/compare/1.5.1...1.5.2](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.1...1.5.2) - -## 提升改进 - -- 对于物化视图功能,支持原查询中的 `GROUP BY`、`GROUPING SETS`、`ROLLUP` 和 `CUBE` 子句 [#342](https://github.com/cloudberrydb/cloudberrydb/pull/342) by @[avamingli](https://github.com/avamingli) -- 使用 `pg_class` 替代 `gp_segment_configuration` 来测试 `Entry` [#294](https://github.com/cloudberrydb/cloudberrydb/pull/294) by @[avamingli](https://github.com/avamingli) -- GPORCA 优化器现在支持 PAX(Partition Attributes Across)存储表 [#346](https://github.com/cloudberrydb/cloudberrydb/pull/346) by @[gfphoenix78](https://github.com/gfphoenix78) -- 添加 `RelationIsNonblockRelation` 宏以扩展类似 `AO`/`CO` 的代码路径 [#347](https://github.com/cloudberrydb/cloudberrydb/pull/347) by @[gfphoenix78](https://github.com/gfphoenix78) -- 为自定义表访问方式添加特性编码选项 [#343](https://github.com/cloudberrydb/cloudberrydb/pull/343) by @[gfphoenix78](https://github.com/gfphoenix78) -- 默认启用 `enable_shared_postgres_backend` [#351](https://github.com/cloudberrydb/cloudberrydb/pull/351) by @[gfphoenix78](https://github.com/gfphoenix78) -- 对于物化视图,修正重写后的 `PlannerInfo` 字段 [#348](https://github.com/cloudberrydb/cloudberrydb/pull/348) by @[avamingli](https://github.com/avamingli) -- 对于物化视图,支持原查询中的 `HAVING` 子句 [#354](https://github.com/cloudberrydb/cloudberrydb/pull/354) by @[avamingli](https://github.com/avamingli) -- 避免当前不支持的行为 [#357](https://github.com/cloudberrydb/cloudberrydb/pull/357) by @[avamingli](https://github.com/avamingli) -- 对于物化视图,支持原查询中的 `ORDER BY` 子句 [#358](https://github.com/cloudberrydb/cloudberrydb/pull/358) by @[avamingli](https://github.com/avamingli) -- 使 `shareinput_Xslice_dsm_handle_ptr` 和 `shareinput_Xslice_hash` 非静态化 [#361](https://github.com/cloudberrydb/cloudberrydb/pull/361) by @[shmiwy](https://github.com/shmiwy) -- 在 `upterm` 阶段撤销 `ci` 以避免失败 [#371](https://github.com/cloudberrydb/cloudberrydb/pull/371) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 从 `gpssh` 命令输出中移除 `b` 和 `\r` [#355](https://github.com/cloudberrydb/cloudberrydb/pull/355) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 如果子分区的表访问方式不同,则不继承父分区的 `reloptions` [#375](https://github.com/cloudberrydb/cloudberrydb/pull/375) by @[yjhjstz](https://github.com/yjhjstz) -- 扩展新的表访问方法以获取样本行 [#374](https://github.com/cloudberrydb/cloudberrydb/pull/374) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 使用物化视图的 `TupleDesc` 构建最终列 [#366](https://github.com/cloudberrydb/cloudberrydb/pull/366) by @[avamingli](https://github.com/avamingli) -- 在 `interconnect` 模块中添加测试和基准测试 [#384](https://github.com/cloudberrydb/cloudberrydb/pull/384) by @[jiaqizho](https://github.com/jiaqizho) -- 为表访问方法添加新的回调 `'scan_flags'` [#391](https://github.com/cloudberrydb/cloudberrydb/pull/391) by @[HuSen8891](https://github.com/HuSen8891) -- 将数值结构和接口导出到公共部分 [#392](https://github.com/cloudberrydb/cloudberrydb/pull/392) by @[jiaqizho](https://github.com/jiaqizho) -- 将预加载的 `interconnect` 移动到头文件中 [#388](https://github.com/cloudberrydb/cloudberrydb/pull/388) by @[gfphoenix78](https://github.com/gfphoenix78) -- 为表访问方法添加内联函数 `'table_scan_flags'` 以获取标志 [#395](https://github.com/cloudberrydb/cloudberrydb/pull/395) by @[HuSen8891](https://github.com/HuSen8891) -- 添加 `gpshrink` 以支持弹性扩缩容 [#393](https://github.com/cloudberrydb/cloudberrydb/pull/393) by @[lss602726449](https://github.com/lss602726449) -- 提交 [#386](https://github.com/cloudberrydb/cloudberrydb/pull/386) 以部分回滚 [#201](https://github.com/cloudberrydb/cloudberrydb/pull/201) by @[Ray-Eldath](https://github.com/Ray-Eldath) -- 将入口根切片卸载到 `QE` [#385](https://github.com/cloudberrydb/cloudberrydb/pull/385) by @[Ray-Eldath](https://github.com/Ray-Eldath) - -## Bug 修复 - -- 修复 `AO`/`AOCS` `insertDesc` 内存问题 [#365](https://github.com/cloudberrydb/cloudberrydb/pull/365) by @[avamingli](https://github.com/avamingli) -- 修复 `CopyCreateStmtFields` 丢失 `intoPolicy` 字段的问题 [#372](https://github.com/cloudberrydb/cloudberrydb/pull/372) by @[yjhjstz](https://github.com/yjhjstz) -- 修复 `configue` 与 `configure.ac` 不一致的问题 [#373](https://github.com/cloudberrydb/cloudberrydb/pull/373) by @[lss602726449](https://github.com/lss602726449) -- 修复不稳定的 `ao`、`vacuum` 和 `icw` 测试 [#376](https://github.com/cloudberrydb/cloudberrydb/pull/376) by @[jiaqizho](https://github.com/jiaqizho) -- 修复涉及演示集群的 shell 脚本问题 [#377](https://github.com/cloudberrydb/cloudberrydb/pull/377) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复命名空间 `pg_ext_aux` 中的 `CREATE TYPE` [#380](https://github.com/cloudberrydb/cloudberrydb/pull/380) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复 `CdbPathLocus_HashedOJ` 对于 `parallel_workers` 初始化为 `0` 的问题 [#387](https://github.com/cloudberrydb/cloudberrydb/pull/387) by @[HuSen8891](https://github.com/HuSen8891) -- 修复 `pgcrypto` 和后端/crypto 中重新定义的 `sm4` [#394](https://github.com/cloudberrydb/cloudberrydb/pull/394) by @[jiaqizho](https://github.com/jiaqizho) - -## 🙌🏻️ 新贡献者 - -@[shmiwy](https://github.com/) 在 [#361](https://github.com/cloudberrydb/cloudberrydb/pull/361) 中做出了他的首次贡献。 - -## 🧑🏻‍💻 贡献者列表 - -感谢所有贡献者使此版本发布成为可能:@[avamingli](https://github.com/avamingli), @[gfphoenix78](https://github.com/gfphoenix78), @[shmiwy](https://github.com/shmiwy), @[wenchaozhang-123](https://github.com/wenchaozhang-123), @[yjhjstz](https://github.com/yjhjstz), @[lss602726449](https://github.com/lss602726449), @[jiaqizho](https://github.com/jiaqizho), @[HuSen8891](https://github.com/HuSen8891), @[Ray-Eldath](https://github.com/Ray-Eldath) 👍 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.3.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.3.md deleted file mode 100644 index 61401c7312..0000000000 --- a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.3.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: 1.5.3 ---- - -# Cloudberry Database v1.5.3 发版说明 - -版本号: v1.5.3 - -Cloudberry Database v1.5.3 是一个小版本更新,包含了一些提升改进、 bug 修复和文档更新。 - -快速试用: [v1.5.3](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.3) - -:::note - -如使用源码部署 1.5.3 版本的 Cloudberry Database,请在执行[构建 Cloudberry Database](/i18n/zh/docusaurus-plugin-content-docs/current/cbdb-linux-compile.md#第-4-步构建-cloudberry-database)中的 `./configure` 文件前运行以下命令,以安装所需的依赖。 - -``` -yum install -y go -export GOPROXY=https://goproxy.io,direct -``` - -在后续版本中,我们使用 Python 重构了相关模块(见 PR [#435](https://github.com/cloudberrydb/cloudberrydb/pull/435))。因此,如果你使用最新源码进行编译,可以略过上述命令。 -::: - - -完整更新日志: [https://github.com/cloudberrydb/cloudberrydb/compare/1.5.2...1.5.3](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.2...1.5.3) - -## 新功能 - -- Directory Table(目录表) - - 自 v1.5.3 版本起,Cloudberry Database 引入了 Directory Table,用于统一纳管对象存储上的非结构化数据。 - - 面对这些技术挑战,Cloudberry Database 定义了一种 Directory Table 表,用于纳管多种类型的非结构化数据。开发者用户只需使用简单的 SQL 语句就能调用各种计算引擎的能力,实现一站式数据加工和应用开发。 - - Directory Table 定位于存储、管理和分析非结构化数据对象。Directory Table 位于表空间中,将非结构化数据文件导入 Directory Table 后,会在 Directory Table 上创建一条记录,即该文件的元数据信息,而文件本身则加载进对象存储中。表上的元数据与对象存储上的对应文件相关联。 - - 详情参见[用户文档](/i18n/zh/docusaurus-plugin-content-docs/current/advanced-analytics/directory-tables.md)。 - -## 提升改进 - -- 在默认 build 中支持 `postgres_fdw` [#400](https://github.com/cloudberrydb/cloudberrydb/pull/400) by @[smartyhero](https://github.com/smartyhero) -- 访问方法 flags 现在可以指示是否支持自定义表的列定向扫描 [#407](https://github.com/cloudberrydb/cloudberrydb/pull/407) by @[gongxun0928](https://github.com/gongxun0928) -- 添加配置参数 `gp_random_insert_segments` 以控制用于随机分布表插入的 segment 数量 [#406](https://github.com/cloudberrydb/cloudberrydb/pull/406) by @[foreyes](https://github.com/foreyes) -- 支持目录表 [#390](https://github.com/cloudberrydb/cloudberrydb/pull/390) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 禁止在 `pg_dump` 中导出 pax 表 [#412](https://github.com/cloudberrydb/cloudberrydb/pull/412) by @[jiaqizho](https://github.com/jiaqizho) -- 更新 `googletest` 模块 URL [#429](https://github.com/cloudberrydb/cloudberrydb/pull/429) by @[tuhaihe](https://github.com/tuhaihe) - -## Bug 修复 - -- 修复调用 `EVP_DecryptUpdate` 时出站数据缓冲区不足的问题 (#479) [#408](https://github.com/cloudberrydb/cloudberrydb/pull/408) by @[kongfanshen-0801](https://github.com/kongfanshen-0801) -- 修复 `pgrx` 在数值变化接口后找不到 `numeric_is_nan` or `numeric_is_inf` 的问题 [#410](https://github.com/cloudberrydb/cloudberrydb/pull/410) by @[jiaqizho](https://github.com/jiaqizho) -- 修复从目录表复制时存在的问题 [#416](https://github.com/cloudberrydb/cloudberrydb/pull/416) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 修复 `UPDATE` 时用于唯一性检查的 `visimap` 查询 [#423](https://github.com/cloudberrydb/cloudberrydb/pull/423) by @[lss602726449](https://github.com/lss602726449) -- 修复目录表 CI 管道存在的问题 [#414](https://github.com/cloudberrydb/cloudberrydb/pull/414) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 修复删除目录权限检查问题 [#425](https://github.com/cloudberrydb/cloudberrydb/pull/425) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 修复 `gpconfig` 不转义 '$' 字符的问题 [#403](https://github.com/cloudberrydb/cloudberrydb/pull/403) by @[Ray-Eldath](https://github.com/Ray-Eldath) - -## 文档更新 - -- 更新 README.md 文件 [#411](https://github.com/cloudberrydb/cloudberrydb/pull/411) by @[tuhaihe](https://github.com/tuhaihe) -- 更新部署的 README.md [#409](https://github.com/cloudberrydb/cloudberrydb/pull/409) - by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) - -## 🙌🏻️ 新贡献者 - -- @[kongfanshen-0801](https://github.com/kongfanshen-0801) 在 [#408](https://github.com/cloudberrydb/cloudberrydb/pull/408) 做出了他的首次贡献。 -- @[foreyes](https://github.com/foreyes) 在 [#406](https://github.com/cloudberrydb/cloudberrydb/pull/406) 做出了他的首次贡献。 - -## 🧑🏻‍💻 贡献者列表 - -感谢所有贡献者使此版本发布成为可能: @[smartyhero](https://github.com/smartyhero), @[Ray-Eldath](https://github.com/Ray-Eldath), @[gongxun0928](https://github.com/gongxun0928), @[kongfanshen-0801](https://github.com/kongfanshen-0801), @[foreyes](https://github.com/foreyes), @[tuhaihe](https://github.com/tuhaihe), @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata), @[jiaqizho](https://github.com/jiaqizho), @[wenchaozhang-123](https://github.com/wenchaozhang-123), @[lss602726449](https://github.com/lss602726449), @[soumyadeep2007](https://github.com/soumyadeep2007), @[ashwinstar](https://github.com/ashwinstar) 👍 \ No newline at end of file diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.4.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.4.md deleted file mode 100644 index 27a680ac3a..0000000000 --- a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.5.4.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: 1.5.4 ---- - -# Cloudberry Database v1.5.4 发版说明 - -版本号:v1.5.4 - -Cloudberry Database v1.5.4 是一个小版本,包含了一些提升改进、变更和 bug 修复。 - -快速试用:[v1.5.4](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.5.4) - -完整的变更日志:[https://github.com/cloudberrydb/cloudberrydb/compare/1.5.3...1.5.4](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.3...1.5.4) - -## 提升改进 - -- 添加 `cbdb_relation_size` 函数 [#428](https://github.com/cloudberrydb/cloudberrydb/pull/428) by [@fanfuxiaoran](https://github.com/fanfuxiaoran) -- 从 Greenplum Database 上游 cherry-pick 更新 (02/07/2022-02/28/2022) [#432](https://github.com/cloudberrydb/cloudberrydb/pull/432) by [@avamingli](https://github.com/avamingli) -- 在 "Answer Query Using Materialized Views" 的原始查询中支持 `DISTINCT` 子句 [#439](https://github.com/cloudberrydb/cloudberrydb/pull/439) by [@avamingli](https://github.com/avamingli) -- 在 "Answer Query Using Materialized Views" 的原始查询中支持 Postgres 特有的 `DISTINCT ON` 子句 [#441](https://github.com/cloudberrydb/cloudberrydb/pull/441) by [@avamingli](https://github.com/avamingli) -- 扩展新的外部变量标签 [#443](https://github.com/cloudberrydb/cloudberrydb/pull/443) by [@jiaqizho](https://github.com/jiaqizho) -- 扩展挂起删除接口 [#442](https://github.com/cloudberrydb/cloudberrydb/pull/442) by [@jiaqizho](https://github.com/jiaqizho) -- 在 "Answer Query Using Materialized Views" 的原始查询中支持 `LIMIT`/`OFFSET`/`FETCH` 子句 [#446](https://github.com/cloudberrydb/cloudberrydb/pull/446) by [@avamingli](https://github.com/avamingli) -- 清理构建工具和指南目录 [#445](https://github.com/cloudberrydb/cloudberrydb/pull/445) by [@tuhaihe](https://github.com/tuhaihe) -- 插入更多数据以在 `regress/misc_jiras.sql` 中使 tuplestore 溢出 [#452](https://github.com/cloudberrydb/cloudberrydb/pull/452) by [@fanfuxiaoran](https://github.com/fanfuxiaoran) - -## Bug 修复 - -- 修复 motion toast 错误 [#436](https://github.com/cloudberrydb/cloudberrydb/pull/436) by [@gfphoenix78](https://github.com/gfphoenix78) -- 修复 `dbconn.py` 中检查密码文件权限的问题 [#438](https://github.com/cloudberrydb/cloudberrydb/pull/438) by [@fanfuxiaoran](https://github.com/fanfuxiaoran) -- 修复 ORCA 的 `Invalid key is inaccessible` 备用方案的间歇性问题 (#15147) [#437](https://github.com/cloudberrydb/cloudberrydb/pull/437) by [@fanfuxiaoran](https://github.com/fanfuxiaoran) -- 修复 `explain(locus)` 的问题 [#433](https://github.com/cloudberrydb/cloudberrydb/pull/433) by [@avamingli](https://github.com/avamingli) -- 使用 checkberry `gpcheckperf` 系列更新解决 Cloudberry Database v1.5.2 中 `gpcheckperf` 不显示磁盘信息的问题 [#430](https://github.com/cloudberrydb/cloudberrydb/pull/430) by [@August-beaulo](https://github.com/August-beaulo) - -## 其他变更 - -- 移除 `cbload` 相关代码 [#431](https://github.com/cloudberrydb/cloudberrydb/pull/431) by [@wenchaozhang-123](https://github.com/wenchaozhang-123) -- 使用 Python 重构 `cbload` 为 `gpdirtableload` [#435](https://github.com/cloudberrydb/cloudberrydb/pull/435) by [@wenchaozhang-123](https://github.com/wenchaozhang-123) -- 移除用作函数或参数名称的 CPP 关键字 [#449](https://github.com/cloudberrydb/cloudberrydb/pull/449) by [@jiaqizho](https://github.com/jiaqizho) - -## 🙌🏻️ 新贡献者 - -[@fanfuxiaoran](https://github.com/fanfuxiaoran) 在 [#428](https://github.com/cloudberrydb/cloudberrydb/pull/428) 中做出了他的首次贡献。 - -## 🧑🏻‍💻 贡献者列表 - -感谢所有贡献者使此版本发布成为可能:[@higuoxing](https://github.com/higuoxing), [@tuhaihe](https://github.com/tuhaihe), [@August-beaulo](https://github.com/August-beaulo), [@avamingli](https://github.com/avamingli), [@piyushc01](https://github.com/piyushc01), [@red1452](https://github.com/red1452), [@edespino](https://github.com/edespino), [@jnihal](https://github.com/jnihal), [@Annu149](https://github.com/Annu149), [@jiaqizho](https://github.com/jiaqizho), [@wenchaozhang-123](https://github.com/wenchaozhang-123), [@dgkimura](https://github.com/dgkimura), [@fanfuxiaoran](https://github.com/fanfuxiaoran), [@gfphoenix78](https://github.com/gfphoenix78), [@HelloYJohn](https://github.com/HelloYJohn), [@adam8157](https://github.com/adam8157), [@xiaoxiaoHe-E](https://github.com/xiaoxiaoHe-E), [@InnerLife0](https://github.com/InnerLife0), [@wuyuhao28](https://github.com/wuyuhao28), [@l-wang](https://github.com/l-wang), [@lij55](https://github.com/lij55), [@huansong](https://github.com/huansong), [@chrishajas](https://github.com/chrishajas), [@tglsfdc](https://github.com/tglsfdc), [@fairyfar](https://github.com/fairyfar), [@kainwen](https://github.com/kainwen), [@nmisch](https://github.com/nmisch), [@my-ship-it](https://github.com/my-ship-it) diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.6.0.md b/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.6.0.md deleted file mode 100644 index 0241300d81..0000000000 --- a/i18n/zh/docusaurus-plugin-content-docs/current/releases/release-1.6.0.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: 1.6.0 ---- - -# Cloudberry Database v1.6.0 发版说明 - -版本号:v1.6.0 - -Cloudberry Database v1.6.0 是一个小版本,包含一系列提升改进、变更和 bug 修复。 - -快速试用:[v1.6.0](https://github.com/cloudberrydb/cloudberrydb/releases/tag/1.6.0) - -完整的变更日志:[https://github.com/cloudberrydb/cloudberrydb/compare/1.5.4...1.6.0](https://github.com/cloudberrydb/cloudberrydb/compare/1.5.4...1.6.0) - -## 提升与变更 - -:::tip 提示 -在以下说明中,"AQUMV" 指 "Answer Query Using Materialized Views" 功能,即使用物化视图来加速查询,详情请参阅[自动使用物化视图进行查询优化](/i18n/zh/docusaurus-plugin-content-docs/current/use-auto-materialized-view-to-answer-queries.md)。 -::: - -- 将 `warehouse_id` 添加到 `pg_stat_activity_extended` [#453](https://github.com/cloudberrydb/cloudberrydb/pull/453) by @[lss602726449](https://github.com/lss602726449) -- 修改 ORCA 优化器对 `UNION ALL` 分发策略的处理 [#399](https://github.com/cloudberrydb/cloudberrydb/pull/399) by @[Light-City](https://github.com/Light-City) -- 更新 `python-dependencies.txt` 的位置 [#460](https://github.com/cloudberrydb/cloudberrydb/pull/460) by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) -- 为 `exec_simple_query` 添加 hook,并支持在 `cost` 模块中自定义溢出内存阈值 [#447](https://github.com/cloudberrydb/cloudberrydb/pull/447) by @[songdongxiaoa2](https://github.com/songdongxiaoa2) -- 在 AQUMV 中使用 `contain_var_clause` 代替 `pull_var_clause` [#451](https://github.com/cloudberrydb/cloudberrydb/pull/451) by @[avamingli](https://github.com/avamingli) -- 为目录表添加 `matchignore` 并移除 meta-command 用户名的输出 [#464](https://github.com/cloudberrydb/cloudberrydb/pull/464) by @[edespino](https://github.com/edespino) -- 添加 hook 以支持不同方法创建、删除或修改仓库 [#462](https://github.com/cloudberrydb/cloudberrydb/pull/462) by @[lss602726449](https://github.com/lss602726449) -- `ALTER COLUMN TYPE` 后重用索引时不再更改 `relfilenode` [#474](https://github.com/cloudberrydb/cloudberrydb/pull/474) by @[lss602726449](https://github.com/lss602726449) -- 从 Greenplum cherry-pick 2022 年 3 月 2 日 - 2023 年 2 月 7 日期间与 Resgroup 相关的代码 [#448](https://github.com/cloudberrydb/cloudberrydb/pull/448) by @[foreyes](https://github.com/foreyes) -- 添加每周构建和发布流程 [#459](https://github.com/cloudberrydb/cloudberrydb/pull/459) by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) -- 在 AQUMV 中考虑可变函数并绕过无 `Vars` 的视图查询目标表达式 [#455](https://github.com/cloudberrydb/cloudberrydb/pull/455) by @[avamingli](https://github.com/avamingli) -- 暴露调整视图查询和 `varno` 修复的函数到 AQUMV [#469](https://github.com/cloudberrydb/cloudberrydb/pull/469) by @[avamingli](https://github.com/avamingli) -- 修改每周构建发布的细节 [#477](https://github.com/cloudberrydb/cloudberrydb/pull/477) by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) -- 如果发生异常,直接调用 `query_info_collect_hook` 函数 [#481](https://github.com/cloudberrydb/cloudberrydb/pull/481) by @[foreyes](https://github.com/foreyes) -- Cherry-pick:在 `inet` 操作符中解决错误的编译器警告 [#499](https://github.com/cloudberrydb/cloudberrydb/pull/499) by @[gfphoenix78](https://github.com/gfphoenix78) -- 为 `s_lock.h` 添加 LoongArch (`loongarch64`) 支持 [#500](https://github.com/cloudberrydb/cloudberrydb/pull/500) by @[wangzw](https://github.com/wangzw) -- 实现目录表的功能 [#484](https://github.com/cloudberrydb/cloudberrydb/pull/484) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 重新启用外部 FTS ICW [#483](https://github.com/cloudberrydb/cloudberrydb/pull/483) by @[jiaqizho](https://github.com/jiaqizho) -- 修改 `AOCO_Compression` 测试用例,验证 `pg_relation_size` 和 `get_ao_compression_ratio` 是否在 ±10% 的预期范围内 [#493](https://github.com/cloudberrydb/cloudberrydb/pull/493) by @[congxuebin](https://github.com/congxuebin) -- 维护物化视图的数据状态 [#501](https://github.com/cloudberrydb/cloudberrydb/pull/501) by @[avamingli](https://github.com/avamingli) -- 定义 `EXT_OID_START` 以建议扩展的 OID 范围 [#514](https://github.com/cloudberrydb/cloudberrydb/pull/514) by @[avamingli](https://github.com/avamingli) -- 忽略 `pg_upgrade` 以解决 CI 问题 [#515](https://github.com/cloudberrydb/cloudberrydb/pull/515) by @[avamingli](https://github.com/avamingli) -- 减少 `fts_segment_reset` 测试的不稳定性 [#518](https://github.com/cloudberrydb/cloudberrydb/pull/518) by @[jiaqizho](https://github.com/jiaqizho) -- 稳定 `gp_dqa` 测试用例 [#521](https://github.com/cloudberrydb/cloudberrydb/pull/521) by @[congxuebin](https://github.com/congxuebin) -- 文档:为 Cloudberry Database 添加更多工具文档 [#523](https://github.com/cloudberrydb/cloudberrydb/pull/523) by @[tuhaihe](https://github.com/tuhaihe) -- 重新实现目录表的 `COPY FROM` 功能 [#527](https://github.com/cloudberrydb/cloudberrydb/pull/527) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 在 SingleNode 模式中添加与物化视图相关的触发器用例 [#517](https://github.com/cloudberrydb/cloudberrydb/pull/517) by @[avamingli](https://github.com/avamingli) -- 重构 AQUMV 中视图查询目标列表的处理 [#525](https://github.com/cloudberrydb/cloudberrydb/pull/525) by @[avamingli](https://github.com/avamingli) -- 实现标签功能 [#444](https://github.com/cloudberrydb/cloudberrydb/pull/444) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 更新 `orafce` 到 4.9 版并进行增强 [#524](https://github.com/cloudberrydb/cloudberrydb/pull/524) by @[foreyes](https://github.com/foreyes) -- 允许普通物化视图在 AQUMV 中回答查询 [#528](https://github.com/cloudberrydb/cloudberrydb/pull/528) by @[avamingli](https://github.com/avamingli) -- 带有 entry 分布策略的目录表支持 `COPY FROM` 功能 [#533](https://github.com/cloudberrydb/cloudberrydb/pull/533) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 对 `README.md` 进行小幅改进 [#534](https://github.com/cloudberrydb/cloudberrydb/pull/534) by @[reshke](https://github.com/reshke) -- 使用 `FDW` 以将多个服务器作为分片进行查询 [#320](https://github.com/cloudberrydb/cloudberrydb/pull/320) by @[oppenheimer01](https://github.com/oppenheimer01) -- 在 `psql` 中添加选项以避免某些平台的编码问题 [#535](https://github.com/cloudberrydb/cloudberrydb/pull/535) by @[gfphoenix78](https://github.com/gfphoenix78) -- 重构 `cbdb_log` 以使用 `vfprintf` [#506](https://github.com/cloudberrydb/cloudberrydb/pull/506) by @[ruhuang2001](https://github.com/ruhuang2001) -- 更新 `aocsam.c`:修复 `safeFSWriteSize` 参数类型 [#540](https://github.com/cloudberrydb/cloudberrydb/pull/540) by @[reshke](https://github.com/reshke) -- 将 CI 镜像更新为 `RockyLinux8` 以确保 CI 的正常运行 [#556](https://github.com/cloudberrydb/cloudberrydb/pull/556) by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) -- 移除不支持的 `AC_FUNC_FSEEKO` 宏 [#543](https://github.com/cloudberrydb/cloudberrydb/pull/543) by @[gfphoenix78](https://github.com/gfphoenix78) -- 调整 `cloudberrydb` 二进制交换测试的测试用例 [#537](https://github.com/cloudberrydb/cloudberrydb/pull/537) by @[congxuebin](https://github.com/congxuebin) -- 实现 `CREATE FOREIGN TABLE LIKE` [#554](https://github.com/cloudberrydb/cloudberrydb/pull/554) by @[avamingli](https://github.com/avamingli) -- 启用 `SingleQE` 与 `SegmentGeneralWorkers` 的连接 [#327](https://github.com/cloudberrydb/cloudberrydb/pull/327) by @[avamingli](https://github.com/avamingli) -- 在 `ShouldUseReservedSegno` 中使用 `syscache` 查找 [#541](https://github.com/cloudberrydb/cloudberrydb/pull/541) by @[gongxun0928](https://github.com/gongxun0928) -- 实现 `READ_STRING_FIELD_NULL` 可序列化的读取函数 [#553](https://github.com/cloudberrydb/cloudberrydb/pull/553) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 更新 `appendonlywriter.c` 以修复调试消息 [#564](https://github.com/cloudberrydb/cloudberrydb/pull/564) by @[reshke](https://github.com/reshke) -- 支持锁定目录表 [#572](https://github.com/cloudberrydb/cloudberrydb/pull/572) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 更新 `ALTER TABLE` 帮助命令的内容 [#574](https://github.com/cloudberrydb/cloudberrydb/pull/574) by @[EcaleD](https://github.com/EcaleD) -- 从 Greenplum Database cherry-pick `Resgroup V2` 和工具包 [#531](https://github.com/cloudberrydb/cloudberrydb/pull/531) by @[jiaqizho](https://github.com/jiaqizho) -- 当 NL 内部计划是索引扫描时,将 ORCA 回退到 PostgreSQL 优化器 [#565](https://github.com/cloudberrydb/cloudberrydb/pull/565) by @[gfphoenix78](https://github.com/gfphoenix78) -- 在调用 `ivm_visible_in_prestate` 时为冗余参数添加 `FIXME` [#578](https://github.com/cloudberrydb/cloudberrydb/pull/578) by @[reshke](https://github.com/reshke) -- 支持在创建物化视图时使用 `IF NOT EXISTS` 子句的同时指定 `DISTRIBUTED BY` 子句 [#563](https://github.com/cloudberrydb/cloudberrydb/pull/563) by @[reshke](https://github.com/reshke) -- Cherry-pick 与最近 `Resgroup V2` 合并相关的提交 [#579](https://github.com/cloudberrydb/cloudberrydb/pull/579) by @[reshke](https://github.com/reshke) -- 更改 IMMV 使用的临时表名为较不常见的序列 [#581](https://github.com/cloudberrydb/cloudberrydb/pull/581) by @[reshke](https://github.com/reshke) -- 修改延迟 IVM 案例的注释 [#576](https://github.com/cloudberrydb/cloudberrydb/pull/576) by @[reshke](https://github.com/reshke) -- 在 `gpAux/gpdemo/gpdemo-defaults.sh` 中默认启用 `fsync=on` [#585](https://github.com/cloudberrydb/cloudberrydb/pull/585) by @[yjhjstz](https://github.com/yjhjstz) -- 禁止继承表存储在 `gp_matview_aux` 中 [#587](https://github.com/cloudberrydb/cloudberrydb/pull/587) by @[avamingli](https://github.com/avamingli) -- 在 AQUMV 中检查执行视图匹配时关系是否有子项 [#577](https://github.com/cloudberrydb/cloudberrydb/pull/577) by @[avamingli](https://github.com/avamingli) -- 更新 `check.c` 以修复对 Cloudberry Database 的错误引用 [#600](https://github.com/cloudberrydb/cloudberrydb/pull/600) by @[reshke](https://github.com/reshke) -- 为 `ANALYZE` 以二进制模式发送行数据 [#601](https://github.com/cloudberrydb/cloudberrydb/pull/601) by @[Light-City](https://github.com/Light-City) -- 在 CI 中启用 `Resgroup` 测试用例 [#539](https://github.com/cloudberrydb/cloudberrydb/pull/539) by @[jiaqizho](https://github.com/jiaqizho) -- 移除 `cbdb` 每周构建和发布的工作流 [#615](https://github.com/cloudberrydb/cloudberrydb/pull/615) by @[edespino](https://github.com/edespino) -- 在提交或中止事务时释放 `tupleDesc` [#551](https://github.com/cloudberrydb/cloudberrydb/pull/551) by @[yjhjstz](https://github.com/yjhjstz) -- 在外部 FTS 中用 `gpsync` 替换 `gpscp` [#470](https://github.com/cloudberrydb/cloudberrydb/pull/470) by @[jiaqizho](https://github.com/jiaqizho) -- 在 `greenplum_schedule` 中添加 `ao_unique_index_build` 测试 [#562](https://github.com/cloudberrydb/cloudberrydb/pull/562) by @[lss602726449](https://github.com/lss602726449) -- 避免重复执行 `qual` 子句 [#396](https://github.com/cloudberrydb/cloudberrydb/pull/396) by @[jiaqizho](https://github.com/jiaqizho) - -## Bug 修复 - -- 修复由于重新定义 `pipe` 导致的编译错误 [#349](https://github.com/cloudberrydb/cloudberrydb/pull/349) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复在角色不允许使用 profile 时记录 `password_history` 的问题 [#480](https://github.com/cloudberrydb/cloudberrydb/pull/480) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 解决 `gpdtm_plpgsql` 测试用例中的结果不一致问题 [#491](https://github.com/cloudberrydb/cloudberrydb/pull/491) by @[congxuebin](https://github.com/congxuebin) -- 修复 Cloudberry Database CI 无法正常运行的问题 [#497](https://github.com/cloudberrydb/cloudberrydb/pull/497) by @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata) -- 修复在复制表上使用 `COPY TO` 时复制数量显示不正确的问题 [#498](https://github.com/cloudberrydb/cloudberrydb/pull/498) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 修复 `bitmapinsert` 中的内存块大小问题 [#495](https://github.com/cloudberrydb/cloudberrydb/pull/495) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复忽略直接表测试文件的问题 [#502](https://github.com/cloudberrydb/cloudberrydb/pull/502) by @[avamingli](https://github.com/avamingli) -- 修复 `gpinitsystem` 问题 [#490](https://github.com/cloudberrydb/cloudberrydb/pull/490) by @[fanfuxiaoran](https://github.com/fanfuxiaoran) -- 修复 GCC 12 检测到的编译错误 [#503](https://github.com/cloudberrydb/cloudberrydb/pull/503) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复 `guc.c` 中 `bsearch` 比较函数的问题 [#507](https://github.com/cloudberrydb/cloudberrydb/pull/507) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复忘记标记 meta track 的命令 [#505](https://github.com/cloudberrydb/cloudberrydb/pull/505) by @[avamingli](https://github.com/avamingli) -- 修复 C++20 中的编译错误 [#510](https://github.com/cloudberrydb/cloudberrydb/pull/510) by @[gfphoenix78](https://github.com/gfphoenix78) -- 修复在目录表上执行 `COPY TO` 时总是返回 `1` 的问题 [#522](https://github.com/cloudberrydb/cloudberrydb/pull/522) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 修复使用 `COPY` 进行批量插入时 AO/AOCO 的 `segfilecount` 问题 [#530](https://github.com/cloudberrydb/cloudberrydb/pull/530) by @[avamingli](https://github.com/avamingli) -- 修复在 AO/AOCO/PAX 分区表上执行 `COPY FROM` 时的崩溃问题 [#549](https://github.com/cloudberrydb/cloudberrydb/pull/549) by @[avamingli](https://github.com/avamingli) -- 修复在某些目录表上复制时出现的问题 [#550](https://github.com/cloudberrydb/cloudberrydb/pull/550) by @[wenchaozhang-123](https://github.com/wenchaozhang-123) -- 修复 IMMV 的基础关系截断问题 [#570](https://github.com/cloudberrydb/cloudberrydb/pull/570) by @[reshke](https://github.com/reshke) -- 修复 `SparseData.h` 中的编译错误 [#566](https://github.com/cloudberrydb/cloudberrydb/pull/566) by @[reshke](https://github.com/reshke) -- 修复 `pxf_fragment.c` 的编译失败问题 [#590](https://github.com/cloudberrydb/cloudberrydb/pull/590) by @[Terry1504](https://github.com/Terry1504) -- 修复从 Greenplum 升级时 `pg_upgrade` 版本解析问题 [#599](https://github.com/cloudberrydb/cloudberrydb/pull/599) by @[reshke](https://github.com/reshke) -- 修复表达式 `AEXPR_NOT_DISTINCT` 的序列化问题 [#598](https://github.com/cloudberrydb/cloudberrydb/pull/598) by @[avamingli](https://github.com/avamingli) -- 修复与物化视图相关的表上的可写规则问题 [#584](https://github.com/cloudberrydb/cloudberrydb/pull/584) by @[avamingli](https://github.com/avamingli) -- 修复可写 CTE 导致物化视图数据状态不正确的问题 [#602](https://github.com/cloudberrydb/cloudberrydb/pull/602) by @[avamingli](https://github.com/avamingli) -- 修复无法使用投影目标列表拉起等价类的问题 [#606](https://github.com/cloudberrydb/cloudberrydb/pull/606) by @[yjhjstz](https://github.com/yjhjstz) - -## 🙌🏻️ 新贡献者 - -- @[Light-City](https://github.com/Light-City) 在 [#399](https://github.com/cloudberrydb/cloudberrydb/pull/399) 中做出了他们的首次贡献。 -- @[songdongxiaoa2](https://github.com/songdongxiaoa2) 在 [#447](https://github.com/cloudberrydb/cloudberrydb/pull/447) 中做出了他们的首次贡献。 -- @[edespino](https://github.com/edespino) 在 [#464](https://github.com/cloudberrydb/cloudberrydb/pull/464) 中做出了他们的首次贡献。 -- @[congxuebin](https://github.com/congxuebin) 在 [#491](https://github.com/cloudberrydb/cloudberrydb/pull/491) 中做出了他们的首次贡献。 -- @[wangzw](https://github.com/wangzw) 在 [#500](https://github.com/cloudberrydb/cloudberrydb/pull/500) 中做出了他们的首次贡献。 -- @[reshke](https://github.com/reshke) 在 [#534](https://github.com/cloudberrydb/cloudberrydb/pull/534) 中做出了他们的首次贡献。 -- @[oppenheimer01](https://github.com/oppenheimer01) 在 [#320](https://github.com/cloudberrydb/cloudberrydb/pull/320) 中做出了他们的首次贡献。 -- @[ruhuang2001](https://github.com/ruhuang2001) 在 [#506](https://github.com/cloudberrydb/cloudberrydb/pull/506) 中做出了他们的首次贡献。 -- @[EcaleD](https://github.com/EcaleD) 在 [#574](https://github.com/cloudberrydb/cloudberrydb/pull/574) 中做出了他们的首次贡献。 -- @[Terry1504](https://github.com/Terry1504) 在 [#590](https://github.com/cloudberrydb/cloudberrydb/pull/590) 中做出了他们的首次贡献。 - -## 🧑🏻‍💻 贡献者列表 - -感谢所有贡献者使此版本发布成为可能:@[Aegeaner](https://github.com/Aegeaner), @[EcaleD](https://github.com/EcaleD), @[Light-City](https://github.com/Light-City), @[RMTT](https://github.com/RMTT), @[SmartKeyerror](https://github.com/SmartKeyerror), @[Tao-T](https://github.com/Tao-T), @[Terry1504](https://github.com/Terry1504), @[Zhangbaowen-Hashdata](https://github.com/Zhangbaowen-Hashdata), @[adam8157](https://github.com/adam8157), @[airfan1994](https://github.com/airfan1994), @[andr-sokolov](https://github.com/andr-sokolov), @[ashwinstar](https://github.com/ashwinstar), @[avamingli](https://github.com/avamingli), @[beeender](https://github.com/beeender), @[bmdoil](https://github.com/bmdoil), @[charliettxx](https://github.com/charliettxx), @[congxuebin](https://github.com/congxuebin), @[dgkimura](https://github.com/dgkimura), @[dh-cloud](https://github.com/dh-cloud), @[divyeshddv](https://github.com/divyeshddv), @[dreamedcheng](https://github.com/dreamedcheng), @[edespino](https://github.com/edespino), @[eespino](https://github.com/eespino), @[fairyfar](https://github.com/fairyfar), @[fanfuxiaoran](https://github.com/fanfuxiaoran), @[foreyes](https://github.com/foreyes), @[gfphoenix78](https://github.com/gfphoenix78), @[gongxun0928](https://github.com/gongxun0928), @[gpopt](https://github.com/gpopt), @[higuoxing](https://github.com/higuoxing), @[huansong](https://github.com/huansong), @[hyongtao-db](https://github.com/hyongtao-db), @[jchampio](https://github.com/jchampio), @[jiaqizho](https://github.com/jiaqizho), @[jimmyyih](https://github.com/jimmyyih), @[kainwen](https://github.com/kainwen), @[l-wang](https://github.com/l-wang), @[lss602726449](https://github.com/lss602726449), @[oppenheimer01](https://github.com/oppenheimer01), @[reshke](https://github.com/reshke), @[ruhuang2001](https://github.com/ruhuang2001), @[songdongxiaoa2](https://github.com/songdongxiaoa2), @[soumyadeep2007](https://github.com/soumyadeep2007), @[thedanhoffman](https://github.com/thedanhoffman), @[tuhaihe](https://github.com/tuhaihe), @[wangzw](https://github.com/wangzw), @[wenchaozhang-123](https://github.com/wenchaozhang-123), @[yanwr1](https://github.com/yanwr1), @[yaowangm](https://github.com/yaowangm), @[yjhjstz](https://github.com/yjhjstz), @[zhrt123](https://github.com/zhrt123), @[zxuejing](https://github.com/zxuejing) - -🧂🧪 快来试用[基于 Docker 的沙盒](https://github.com/cloudberrydb/bootcamp),该沙盒专为初次体验 Cloudberry Database 而打造,包含一系列材料,包括教程、示例代码和速成课程,帮助您快速上车 Cloudberry Database。 diff --git a/sidebars.ts b/sidebars.ts index 23505a625e..4e57115108 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -395,11 +395,9 @@ const sidebars: SidebarsConfig = { { type: "category", label: "Releases", - link: { - type: "doc", - id: "releases/index", - }, - items: ['releases/release-1.6.0','releases/release-1.5.4','releases/release-1.5.3','releases/release-1.5.2','releases/release-1.5.1', 'releases/release-1.5.0'], + items: [ + "releases/index" + ] }, ] }