diff --git a/.Rbuildignore b/.Rbuildignore index 906424c3..4cc5b621 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,6 +1,7 @@ +^rcppExports.cpp$ +^stanExports_* + ^\.travis\.yml$ -rccpExports.cpp -stanExports_* ^data-raw$ .Rhistory README.Rmd @@ -18,5 +19,4 @@ tests/testthat/test-rstan-conformity-results NEWS.md index.html cran-comments.md - ^CRAN-RELEASE$ diff --git a/.gitignore b/.gitignore index 3f54a4bc..24ea156a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,14 @@ -src/geostan.so -src/geostan.dll +^rcppExports.cpp$ +^stanExports_* !man/geostan-internal.Rd !man/figures/logo.png man/*.Rd .Rhistory -*~ -inst/doc -cran-comments.md \ No newline at end of file +*~$ +cran-comments.md + +src/geostan.so +src/geostan.dll +man/*.Rd +.Rhistory +cran-comments.md diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1db501b6..00000000 --- a/.travis.yml +++ /dev/null @@ -1,51 +0,0 @@ -language: r -sudo: false -r: devel -cache: packages - -latex: true - - -env: - matrix: - - CXX_OLEVEL=2 CXX=clang++ - -matrix: - include: - - os: linux - compiler: clang - addons: - apt: - sources: [ 'ubuntu-toolchain-r-test', 'llvm-toolchain-precise-5.0' ] - packages: - - llvm-5.0-dev - env: - - CXX_OLEVEL=2 CXX=clang++ - - -before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - sudo apt-get update -q - - mkdir -p ~/.R/ - - echo "CXX = `R CMD config CXX`" >> ~/.R/Makevars - - echo "CXXFLAGS = `R CMD config CXXFLAGS` -pedantic -g0 -flto -stdlib=libc++" >> ~/.R/Makevars - - echo "LDFLAGS += -flto -stdlib=libc++" >> ~/.R/Makevars - - export CLANG_EXTRA_ARG="" - - if [[ $CXX = "clang++" ]] ; then export CLANG_EXTRA_ARG=" -Qunused-arguments -fcolor-diagnostics " ; fi - - sed -i.bak "s/ g++/ ${CXX}${CLANG_EXTRA_ARG}/" ~/.R/Makevars - - sed -i.bak "s/O[0-3]/O$CXX_OLEVEL/" ~/.R/Makevars - -script: - - | - travis_wait 42 R CMD build . - travis_wait 59 R CMD check geostan*tar.gz - -after_script: - - tar -ztvf geostan_*.tar.gz - - echo ${NOT_CRAN} - -after_success: - - travis_wait 40 tar -C .. -xf $PKG_TARBALL - -after_failure: -- cat geostan.Rcheck/00* diff --git a/DESCRIPTION b/DESCRIPTION index cb026ce9..64d03f41 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,10 +15,12 @@ Description: For Bayesian inference with spatial data, provides exploratory anal License: GPL (>= 3) Encoding: UTF-8 LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.1 Biarch: true Depends: R (>= 3.4.0) -Imports: +Imports: spdep (>= 1.1-8), sf, ggplot2 (>= 3.0.0), @@ -33,20 +35,14 @@ Imports: utils, Matrix (>= 1.3), Rcpp (>= 0.12.0), - RcppParallel, + RcppParallel (>= 5.0.1), rstan (>= 2.18.1), - rstantools (>= 2.0.0) + rstantools (>= 2.1.1) LinkingTo: BH (>= 1.66.0), Rcpp (>= 0.12.0), RcppEigen (>= 0.3.3.3.0), + RcppParallel (>= 5.0.1), rstan (>= 2.18.1), - StanHeaders (>= 2.18.0), - RcppParallel -Suggests: - testthat, - knitr, - rmarkdown + StanHeaders (>= 2.18.0) SystemRequirements: GNU make -RoxygenNote: 7.1.1 -VignetteBuilder: knitr diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index 175443ce..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,595 +0,0 @@ -GNU General Public License -========================== - -_Version 3, 29 June 2007_ -_Copyright © 2007 Free Software Foundation, Inc. <>_ - -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -## Preamble - -The GNU General Public License is a free, copyleft license for software and other -kinds of works. - -The licenses for most software and other practical works are designed to take away -your freedom to share and change the works. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change all versions of a -program--to make sure it remains free software for all its users. We, the Free -Software Foundation, use the GNU General Public License for most of our software; it -applies also to any other work released this way by its authors. You can apply it to -your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our General -Public Licenses are designed to make sure that you have the freedom to distribute -copies of free software (and charge for them if you wish), that you receive source -code or can get it if you want it, that you can change the software or use pieces of -it in new free programs, and that you know you can do these things. - -To protect your rights, we need to prevent others from denying you these rights or -asking you to surrender the rights. Therefore, you have certain responsibilities if -you distribute copies of the software, or if you modify it: responsibilities to -respect the freedom of others. - -For example, if you distribute copies of such a program, whether gratis or for a fee, -you must pass on to the recipients the same freedoms that you received. You must make -sure that they, too, receive or can get the source code. And you must show them these -terms so they know their rights. - -Developers that use the GNU GPL protect your rights with two steps: **(1)** assert -copyright on the software, and **(2)** offer you this License giving you legal permission -to copy, distribute and/or modify it. - -For the developers' and authors' protection, the GPL clearly explains that there is -no warranty for this free software. For both users' and authors' sake, the GPL -requires that modified versions be marked as changed, so that their problems will not -be attributed erroneously to authors of previous versions. - -Some devices are designed to deny users access to install or run modified versions of -the software inside them, although the manufacturer can do so. This is fundamentally -incompatible with the aim of protecting users' freedom to change the software. The -systematic pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we have designed -this version of the GPL to prohibit the practice for those products. If such problems -arise substantially in other domains, we stand ready to extend this provision to -those domains in future versions of the GPL, as needed to protect the freedom of -users. - -Finally, every program is threatened constantly by software patents. States should -not allow patents to restrict development and use of software on general-purpose -computers, but in those that do, we wish to avoid the special danger that patents -applied to a free program could make it effectively proprietary. To prevent this, the -GPL assures that patents cannot be used to render the program non-free. - -The precise terms and conditions for copying, distribution and modification follow. - -## TERMS AND CONDITIONS - -### 0. Definitions - -“This License” refers to version 3 of the GNU General Public License. - -“Copyright” also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - -“The Program” refers to any copyrightable work licensed under this -License. Each licensee is addressed as “you”. “Licensees” and -“recipients” may be individuals or organizations. - -To “modify” a work means to copy from or adapt all or part of the work in -a fashion requiring copyright permission, other than the making of an exact copy. The -resulting work is called a “modified version” of the earlier work or a -work “based on” the earlier work. - -A “covered work” means either the unmodified Program or a work based on -the Program. - -To “propagate” a work means to do anything with it that, without -permission, would make you directly or secondarily liable for infringement under -applicable copyright law, except executing it on a computer or modifying a private -copy. Propagation includes copying, distribution (with or without modification), -making available to the public, and in some countries other activities as well. - -To “convey” a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through a computer -network, with no transfer of a copy, is not conveying. - -An interactive user interface displays “Appropriate Legal Notices” to the -extent that it includes a convenient and prominently visible feature that **(1)** -displays an appropriate copyright notice, and **(2)** tells the user that there is no -warranty for the work (except to the extent that warranties are provided), that -licensees may convey the work under this License, and how to view a copy of this -License. If the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - -### 1. Source Code - -The “source code” for a work means the preferred form of the work for -making modifications to it. “Object code” means any non-source form of a -work. - -A “Standard Interface” means an interface that either is an official -standard defined by a recognized standards body, or, in the case of interfaces -specified for a particular programming language, one that is widely used among -developers working in that language. - -The “System Libraries” of an executable work include anything, other than -the work as a whole, that **(a)** is included in the normal form of packaging a Major -Component, but which is not part of that Major Component, and **(b)** serves only to -enable use of the work with that Major Component, or to implement a Standard -Interface for which an implementation is available to the public in source code form. -A “Major Component”, in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system (if any) on which -the executable work runs, or a compiler used to produce the work, or an object code -interpreter used to run it. - -The “Corresponding Source” for a work in object code form means all the -source code needed to generate, install, and (for an executable work) run the object -code and to modify the work, including scripts to control those activities. However, -it does not include the work's System Libraries, or general-purpose tools or -generally available free programs which are used unmodified in performing those -activities but which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for the work, and -the source code for shared libraries and dynamically linked subprograms that the work -is specifically designed to require, such as by intimate data communication or -control flow between those subprograms and other parts of the work. - -The Corresponding Source need not include anything that users can regenerate -automatically from other parts of the Corresponding Source. - -The Corresponding Source for a work in source code form is that same work. - -### 2. Basic Permissions - -All rights granted under this License are granted for the term of copyright on the -Program, and are irrevocable provided the stated conditions are met. This License -explicitly affirms your unlimited permission to run the unmodified Program. The -output from running a covered work is covered by this License only if the output, -given its content, constitutes a covered work. This License acknowledges your rights -of fair use or other equivalent, as provided by copyright law. - -You may make, run and propagate covered works that you do not convey, without -conditions so long as your license otherwise remains in force. You may convey covered -works to others for the sole purpose of having them make modifications exclusively -for you, or provide you with facilities for running those works, provided that you -comply with the terms of this License in conveying all material for which you do not -control copyright. Those thus making or running the covered works for you must do so -exclusively on your behalf, under your direction and control, on terms that prohibit -them from making any copies of your copyrighted material outside their relationship -with you. - -Conveying under any other circumstances is permitted solely under the conditions -stated below. Sublicensing is not allowed; section 10 makes it unnecessary. - -### 3. Protecting Users' Legal Rights From Anti-Circumvention Law - -No covered work shall be deemed part of an effective technological measure under any -applicable law fulfilling obligations under article 11 of the WIPO copyright treaty -adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention -of such measures. - -When you convey a covered work, you waive any legal power to forbid circumvention of -technological measures to the extent such circumvention is effected by exercising -rights under this License with respect to the covered work, and you disclaim any -intention to limit operation or modification of the work as a means of enforcing, -against the work's users, your or third parties' legal rights to forbid circumvention -of technological measures. - -### 4. Conveying Verbatim Copies - -You may convey verbatim copies of the Program's source code as you receive it, in any -medium, provided that you conspicuously and appropriately publish on each copy an -appropriate copyright notice; keep intact all notices stating that this License and -any non-permissive terms added in accord with section 7 apply to the code; keep -intact all notices of the absence of any warranty; and give all recipients a copy of -this License along with the Program. - -You may charge any price or no price for each copy that you convey, and you may offer -support or warranty protection for a fee. - -### 5. Conveying Modified Source Versions - -You may convey a work based on the Program, or the modifications to produce it from -the Program, in the form of source code under the terms of section 4, provided that -you also meet all of these conditions: - -* **a)** The work must carry prominent notices stating that you modified it, and giving a -relevant date. -* **b)** The work must carry prominent notices stating that it is released under this -License and any conditions added under section 7. This requirement modifies the -requirement in section 4 to “keep intact all notices”. -* **c)** You must license the entire work, as a whole, under this License to anyone who -comes into possession of a copy. This License will therefore apply, along with any -applicable section 7 additional terms, to the whole of the work, and all its parts, -regardless of how they are packaged. This License gives no permission to license the -work in any other way, but it does not invalidate such permission if you have -separately received it. -* **d)** If the work has interactive user interfaces, each must display Appropriate Legal -Notices; however, if the Program has interactive interfaces that do not display -Appropriate Legal Notices, your work need not make them do so. - -A compilation of a covered work with other separate and independent works, which are -not by their nature extensions of the covered work, and which are not combined with -it such as to form a larger program, in or on a volume of a storage or distribution -medium, is called an “aggregate” if the compilation and its resulting -copyright are not used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work in an aggregate -does not cause this License to apply to the other parts of the aggregate. - -### 6. Conveying Non-Source Forms - -You may convey a covered work in object code form under the terms of sections 4 and -5, provided that you also convey the machine-readable Corresponding Source under the -terms of this License, in one of these ways: - -* **a)** Convey the object code in, or embodied in, a physical product (including a -physical distribution medium), accompanied by the Corresponding Source fixed on a -durable physical medium customarily used for software interchange. -* **b)** Convey the object code in, or embodied in, a physical product (including a -physical distribution medium), accompanied by a written offer, valid for at least -three years and valid for as long as you offer spare parts or customer support for -that product model, to give anyone who possesses the object code either **(1)** a copy of -the Corresponding Source for all the software in the product that is covered by this -License, on a durable physical medium customarily used for software interchange, for -a price no more than your reasonable cost of physically performing this conveying of -source, or **(2)** access to copy the Corresponding Source from a network server at no -charge. -* **c)** Convey individual copies of the object code with a copy of the written offer to -provide the Corresponding Source. This alternative is allowed only occasionally and -noncommercially, and only if you received the object code with such an offer, in -accord with subsection 6b. -* **d)** Convey the object code by offering access from a designated place (gratis or for -a charge), and offer equivalent access to the Corresponding Source in the same way -through the same place at no further charge. You need not require recipients to copy -the Corresponding Source along with the object code. If the place to copy the object -code is a network server, the Corresponding Source may be on a different server -(operated by you or a third party) that supports equivalent copying facilities, -provided you maintain clear directions next to the object code saying where to find -the Corresponding Source. Regardless of what server hosts the Corresponding Source, -you remain obligated to ensure that it is available for as long as needed to satisfy -these requirements. -* **e)** Convey the object code using peer-to-peer transmission, provided you inform -other peers where the object code and Corresponding Source of the work are being -offered to the general public at no charge under subsection 6d. - -A separable portion of the object code, whose source code is excluded from the -Corresponding Source as a System Library, need not be included in conveying the -object code work. - -A “User Product” is either **(1)** a “consumer product”, which -means any tangible personal property which is normally used for personal, family, or -household purposes, or **(2)** anything designed or sold for incorporation into a -dwelling. In determining whether a product is a consumer product, doubtful cases -shall be resolved in favor of coverage. For a particular product received by a -particular user, “normally used” refers to a typical or common use of -that class of product, regardless of the status of the particular user or of the way -in which the particular user actually uses, or expects or is expected to use, the -product. A product is a consumer product regardless of whether the product has -substantial commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - -“Installation Information” for a User Product means any methods, -procedures, authorization keys, or other information required to install and execute -modified versions of a covered work in that User Product from a modified version of -its Corresponding Source. The information must suffice to ensure that the continued -functioning of the modified object code is in no case prevented or interfered with -solely because modification has been made. - -If you convey an object code work under this section in, or with, or specifically for -use in, a User Product, and the conveying occurs as part of a transaction in which -the right of possession and use of the User Product is transferred to the recipient -in perpetuity or for a fixed term (regardless of how the transaction is -characterized), the Corresponding Source conveyed under this section must be -accompanied by the Installation Information. But this requirement does not apply if -neither you nor any third party retains the ability to install modified object code -on the User Product (for example, the work has been installed in ROM). - -The requirement to provide Installation Information does not include a requirement to -continue to provide support service, warranty, or updates for a work that has been -modified or installed by the recipient, or for the User Product in which it has been -modified or installed. Access to a network may be denied when the modification itself -materially and adversely affects the operation of the network or violates the rules -and protocols for communication across the network. - -Corresponding Source conveyed, and Installation Information provided, in accord with -this section must be in a format that is publicly documented (and with an -implementation available to the public in source code form), and must require no -special password or key for unpacking, reading or copying. - -### 7. Additional Terms - -“Additional permissions” are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. Additional -permissions that are applicable to the entire Program shall be treated as though they -were included in this License, to the extent that they are valid under applicable -law. If additional permissions apply only to part of the Program, that part may be -used separately under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - -When you convey a copy of a covered work, you may at your option remove any -additional permissions from that copy, or from any part of it. (Additional -permissions may be written to require their own removal in certain cases when you -modify the work.) You may place additional permissions on material, added by you to a -covered work, for which you have or can give appropriate copyright permission. - -Notwithstanding any other provision of this License, for material you add to a -covered work, you may (if authorized by the copyright holders of that material) -supplement the terms of this License with terms: - -* **a)** Disclaiming warranty or limiting liability differently from the terms of -sections 15 and 16 of this License; or -* **b)** Requiring preservation of specified reasonable legal notices or author -attributions in that material or in the Appropriate Legal Notices displayed by works -containing it; or -* **c)** Prohibiting misrepresentation of the origin of that material, or requiring that -modified versions of such material be marked in reasonable ways as different from the -original version; or -* **d)** Limiting the use for publicity purposes of names of licensors or authors of the -material; or -* **e)** Declining to grant rights under trademark law for use of some trade names, -trademarks, or service marks; or -* **f)** Requiring indemnification of licensors and authors of that material by anyone -who conveys the material (or modified versions of it) with contractual assumptions of -liability to the recipient, for any liability that these contractual assumptions -directly impose on those licensors and authors. - -All other non-permissive additional terms are considered “further -restrictions” within the meaning of section 10. If the Program as you received -it, or any part of it, contains a notice stating that it is governed by this License -along with a term that is a further restriction, you may remove that term. If a -license document contains a further restriction but permits relicensing or conveying -under this License, you may add to a covered work material governed by the terms of -that license document, provided that the further restriction does not survive such -relicensing or conveying. - -If you add terms to a covered work in accord with this section, you must place, in -the relevant source files, a statement of the additional terms that apply to those -files, or a notice indicating where to find the applicable terms. - -Additional terms, permissive or non-permissive, may be stated in the form of a -separately written license, or stated as exceptions; the above requirements apply -either way. - -### 8. Termination - -You may not propagate or modify a covered work except as expressly provided under -this License. Any attempt otherwise to propagate or modify it is void, and will -automatically terminate your rights under this License (including any patent licenses -granted under the third paragraph of section 11). - -However, if you cease all violation of this License, then your license from a -particular copyright holder is reinstated **(a)** provisionally, unless and until the -copyright holder explicitly and finally terminates your license, and **(b)** permanently, -if the copyright holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - -Moreover, your license from a particular copyright holder is reinstated permanently -if the copyright holder notifies you of the violation by some reasonable means, this -is the first time you have received notice of violation of this License (for any -work) from that copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - -Termination of your rights under this section does not terminate the licenses of -parties who have received copies or rights from you under this License. If your -rights have been terminated and not permanently reinstated, you do not qualify to -receive new licenses for the same material under section 10. - -### 9. Acceptance Not Required for Having Copies - -You are not required to accept this License in order to receive or run a copy of the -Program. Ancillary propagation of a covered work occurring solely as a consequence of -using peer-to-peer transmission to receive a copy likewise does not require -acceptance. However, nothing other than this License grants you permission to -propagate or modify any covered work. These actions infringe copyright if you do not -accept this License. Therefore, by modifying or propagating a covered work, you -indicate your acceptance of this License to do so. - -### 10. Automatic Licensing of Downstream Recipients - -Each time you convey a covered work, the recipient automatically receives a license -from the original licensors, to run, modify and propagate that work, subject to this -License. You are not responsible for enforcing compliance by third parties with this -License. - -An “entity transaction” is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an organization, or -merging organizations. If propagation of a covered work results from an entity -transaction, each party to that transaction who receives a copy of the work also -receives whatever licenses to the work the party's predecessor in interest had or -could give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if the predecessor -has it or can get it with reasonable efforts. - -You may not impose any further restrictions on the exercise of the rights granted or -affirmed under this License. For example, you may not impose a license fee, royalty, -or other charge for exercise of rights granted under this License, and you may not -initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging -that any patent claim is infringed by making, using, selling, offering for sale, or -importing the Program or any portion of it. - -### 11. Patents - -A “contributor” is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The work thus -licensed is called the contributor's “contributor version”. - -A contributor's “essential patent claims” are all patent claims owned or -controlled by the contributor, whether already acquired or hereafter acquired, that -would be infringed by some manner, permitted by this License, of making, using, or -selling its contributor version, but do not include claims that would be infringed -only as a consequence of further modification of the contributor version. For -purposes of this definition, “control” includes the right to grant patent -sublicenses in a manner consistent with the requirements of this License. - -Each contributor grants you a non-exclusive, worldwide, royalty-free patent license -under the contributor's essential patent claims, to make, use, sell, offer for sale, -import and otherwise run, modify and propagate the contents of its contributor -version. - -In the following three paragraphs, a “patent license” is any express -agreement or commitment, however denominated, not to enforce a patent (such as an -express permission to practice a patent or covenant not to sue for patent -infringement). To “grant” such a patent license to a party means to make -such an agreement or commitment not to enforce a patent against the party. - -If you convey a covered work, knowingly relying on a patent license, and the -Corresponding Source of the work is not available for anyone to copy, free of charge -and under the terms of this License, through a publicly available network server or -other readily accessible means, then you must either **(1)** cause the Corresponding -Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the -patent license for this particular work, or **(3)** arrange, in a manner consistent with -the requirements of this License, to extend the patent license to downstream -recipients. “Knowingly relying” means you have actual knowledge that, but -for the patent license, your conveying the covered work in a country, or your -recipient's use of the covered work in a country, would infringe one or more -identifiable patents in that country that you have reason to believe are valid. - -If, pursuant to or in connection with a single transaction or arrangement, you -convey, or propagate by procuring conveyance of, a covered work, and grant a patent -license to some of the parties receiving the covered work authorizing them to use, -propagate, modify or convey a specific copy of the covered work, then the patent -license you grant is automatically extended to all recipients of the covered work and -works based on it. - -A patent license is “discriminatory” if it does not include within the -scope of its coverage, prohibits the exercise of, or is conditioned on the -non-exercise of one or more of the rights that are specifically granted under this -License. You may not convey a covered work if you are a party to an arrangement with -a third party that is in the business of distributing software, under which you make -payment to the third party based on the extent of your activity of conveying the -work, and under which the third party grants, to any of the parties who would receive -the covered work from you, a discriminatory patent license **(a)** in connection with -copies of the covered work conveyed by you (or copies made from those copies), or **(b)** -primarily for and in connection with specific products or compilations that contain -the covered work, unless you entered into that arrangement, or that patent license -was granted, prior to 28 March 2007. - -Nothing in this License shall be construed as excluding or limiting any implied -license or other defenses to infringement that may otherwise be available to you -under applicable patent law. - -### 12. No Surrender of Others' Freedom - -If conditions are imposed on you (whether by court order, agreement or otherwise) -that contradict the conditions of this License, they do not excuse you from the -conditions of this License. If you cannot convey a covered work so as to satisfy -simultaneously your obligations under this License and any other pertinent -obligations, then as a consequence you may not convey it at all. For example, if you -agree to terms that obligate you to collect a royalty for further conveying from -those to whom you convey the Program, the only way you could satisfy both those terms -and this License would be to refrain entirely from conveying the Program. - -### 13. Use with the GNU Affero General Public License - -Notwithstanding any other provision of this License, you have permission to link or -combine any covered work with a work licensed under version 3 of the GNU Affero -General Public License into a single combined work, and to convey the resulting work. -The terms of this License will continue to apply to the part which is the covered -work, but the special requirements of the GNU Affero General Public License, section -13, concerning interaction through a network will apply to the combination as such. - -### 14. Revised Versions of this License - -The Free Software Foundation may publish revised and/or new versions of the GNU -General Public License from time to time. Such new versions will be similar in spirit -to the present version, but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Program specifies that -a certain numbered version of the GNU General Public License “or any later -version” applies to it, you have the option of following the terms and -conditions either of that numbered version or of any later version published by the -Free Software Foundation. If the Program does not specify a version number of the GNU -General Public License, you may choose any version ever published by the Free -Software Foundation. - -If the Program specifies that a proxy can decide which future versions of the GNU -General Public License can be used, that proxy's public statement of acceptance of a -version permanently authorizes you to choose that version for the Program. - -Later license versions may give you additional or different permissions. However, no -additional obligations are imposed on any author or copyright holder as a result of -your choosing to follow a later version. - -### 15. Disclaimer of Warranty - -THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER -EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE -QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE -DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - -### 16. Limitation of Liability - -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY -COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS -PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, -INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE -OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE -WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - -### 17. Interpretation of Sections 15 and 16 - -If the disclaimer of warranty and limitation of liability provided above cannot be -given local legal effect according to their terms, reviewing courts shall apply local -law that most closely approximates an absolute waiver of all civil liability in -connection with the Program, unless a warranty or assumption of liability accompanies -a copy of the Program in return for a fee. - -_END OF TERMS AND CONDITIONS_ - -## How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest possible use to -the public, the best way to achieve this is to make it free software which everyone -can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to attach them -to the start of each source file to most effectively state the exclusion of warranty; -and each file should have at least the “copyright” line and a pointer to -where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - -If the program does terminal interaction, make it output a short notice like this -when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type 'show c' for details. - -The hypothetical commands `show w` and `show c` should show the appropriate parts of -the General Public License. Of course, your program's commands might be different; -for a GUI interface, you would use an “about box”. - -You should also get your employer (if you work as a programmer) or school, if any, to -sign a “copyright disclaimer” for the program, if necessary. For more -information on this, and how to apply and follow the GNU GPL, see -<>. - -The GNU General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may consider it -more useful to permit linking proprietary applications with the library. If this is -what you want to do, use the GNU Lesser General Public License instead of this -License. But first, please read -<>. diff --git a/NEWS.md b/NEWS.md index 7dbbf7db..8ac21852 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ geostan now supports Poisson models with censored count data, a common problem in public health research where small area disease and mortality counts are censored below a threshold value. Model for censored outcome data can now be implemented using the `censor_point` argument found in all of the model fitting functions (stan_glm, stan_car, stan_esf, stan_icar). -## measurement error models improved +## Measurement error models improved The measurement error models have been updated in three important respects: diff --git a/R/convenience-functions.R b/R/convenience-functions.R index b270d4e3..9cd032fd 100644 --- a/R/convenience-functions.R +++ b/R/convenience-functions.R @@ -328,7 +328,8 @@ lisa <- function(x, w, type = TRUE) { #' @param shape An object of class \code{sf} or another spatial object coercible to \code{sf} with \code{sf::st_as_sf} such as \code{SpatialPolygonsDataFrame}. #' @param name The name to use on the plot labels; default to "y" or, if \code{y} is a \code{geostan_fit} object, to "Residuals". #' @param plot If \code{FALSE}, return a list of \code{gg} plots. -#' @param mc Character string indicating how to plot the residual Moran coefficient: if `mc = "scatter"`, then \code{\link[geostan]{moran_plot}} will be used with the marginal residuals; if `mc = "hist"`, then a histogram of Moran coefficient values will be returned, where each plotted value represents the degree of residual autocorrelation in a draw from the join posterior distribution of model parameters. +#' +#' @param mc_style Character string indicating how to plot the residual Moran coefficient (only used if `y` is a fitted model): if `mc = "scatter"`, then \code{\link[geostan]{moran_plot}} will be used with the marginal residuals; if `mc = "hist"`, then a histogram of Moran coefficient values will be returned, where each plotted value represents the degree of residual autocorrelation in a draw from the join posterior distribution of model parameters. #' #' @param style Style of connectivity matrix; if `w` is not provided, `style` is passed to \code{\link[geostan]{shape2mat}} and defaults to "W" for row-standardized. #' @param w An optional spatial connectivity matrix; if not provided, one will be created using \code{\link[geostan]{shape2mat}}. @@ -368,7 +369,7 @@ sp_diag <- function(y, shape, name = "y", plot = TRUE, - mc = c("scatter", "hist"), + mc_style = c("scatter", "hist"), style = c("W", "B"), w = shape2mat(shape, match.arg(style)), binwidth = function(x) 0.5 * sd(x), @@ -393,14 +394,14 @@ sp_diag.geostan_fit <- function(y, shape, name = "Residual", plot = TRUE, - mc = c("scatter", "hist"), + mc_style = c("scatter", "hist"), style = c("W", "B"), w = shape2mat(shape, match.arg(style)), binwidth = function(x) 0.5 * stats::sd(x), rates = TRUE, size = 0.15, ...) { - mc <- match.arg(mc) + mc_style <- match.arg(mc_style, c("scatter", "hist")) if (!inherits(shape, "sf")) shape <- sf::st_as_sf(shape) outcome <- y$data[,1] fits <- fitted(y, summary = TRUE, rates = rates) @@ -432,13 +433,9 @@ sp_diag.geostan_fit <- function(y, label = signs::signs) + theme_void() # residual autocorrelation - R <- residuals(y, summary = FALSE) R.mc <- apply(R, 1, mc, w = w) - if (length(unique(R.mc)) == 1) { - g.mc <- moran_plot(R[1,], w, xlab = name) - } - if (mc == "scatter") { + if (mc_style == "scatter") { g.mc <- moran_plot(marginal_residual, w, xlab = name) } else { R.mc.mu <- mean(R.mc) @@ -453,6 +450,9 @@ sp_diag.geostan_fit <- function(y, x = "Residual MC", subtitle = paste0("MC (mean) = ", round(R.mc.mu, 2))) } + if (length(unique(R.mc)) == 1) { + g.mc <- moran_plot(R[1,], w, xlab = name) + } if (plot) { return( gridExtra::grid.arrange(ovf, g.mc, map.y, ncol = 3) ) } else { @@ -512,7 +512,7 @@ sp_diag.numeric <- function(y, #' @param probs Lower and upper quantiles of the credible interval to plot. #' @param plot If \code{FALSE}, return a list of \code{ggplot}s and a \code{data.frame} with the raw data values alongside a posterior summary of the modeled variable. #' -#' @param mc Character string indicating how to plot the Moran coefficient for the delta values: if `mc = "scatter"`, then \code{\link[geostan]{moran_plot}} will be used with the marginal residuals; if `mc = "hist"`, then a histogram of Moran coefficient values will be returned, where each plotted value represents the degree of residual autocorrelation in a draw from the join posterior distribution of delta values. +#' @param mc_style Character string indicating how to plot the Moran coefficient for the delta values: if `mc = "scatter"`, then \code{\link[geostan]{moran_plot}} will be used with the marginal residuals; if `mc = "hist"`, then a histogram of Moran coefficient values will be returned, where each plotted value represents the degree of residual autocorrelation in a draw from the join posterior distribution of delta values. #' #' @param size Size of points and lines, passed to \code{geom_pointrange}. #' @param index Integer value; use this if you wish to identify observations with the largest `n=index` absolute Delta values; data on the top `n=index` observations ordered by absolute Delta value will be printed to the console and the plots will be labeled with the indices of the identified observations. @@ -570,7 +570,7 @@ me_diag <- function(fit, shape, probs = c(0.025, 0.975), plot = TRUE, - mc = c("scatter", "hist"), + mc_style = c("scatter", "hist"), size = 0.25, index = 0, style = c("W", "B"), @@ -580,7 +580,7 @@ me_diag <- function(fit, stopifnot(length(varname) == 1) if (!varname %in% colnames(fit$data)) stop("varname is not found in colnames(fit$data). Provide the name of the variable as it appears in the model formula") if (!inherits(shape, "sf")) shape <- sf::st_as_sf(shape) - mc <- match.arg(mc) + mc_style <- match.arg(mc_style, c("scatter", "hist")) x.raw <- as.numeric(fit$data[,varname]) probs = sort(probs) width = paste0(100 * (probs[2] - probs[1]), "%") @@ -624,11 +624,11 @@ me_diag <- function(fit, theme_classic() delta.mat <- t(apply(x.samples, 1, .resid, y = x.raw)) df$Delta <- apply(delta.mat, 2, mean) - if (mc == "scatter") { + D.mc <- apply(delta.mat, 1, mc, w = w) + D.mc.mu <- mean(D.mc) + if (mc_style == "scatter") { g.mc <- moran_plot(df$Delta, w, xlab = bquote(hat(Delta))) } else { - D.mc <- apply(delta.mat, 1, mc, w = w) - D.mc.mu <- mean(D.mc) g.mc <- ggplot() + geom_histogram(aes(D.mc), binwidth = binwidth(as.numeric(D.mc)), diff --git a/README.Rmd b/README.Rmd index 892d836c..d995cceb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -19,29 +19,38 @@ knitr::opts_chunk$set( -# geostan +## geostan: Bayesian spatial analysis The **geostan** R package supports a complete spatial analysis workflow with hierarchical Bayesian models (HBMs) for areal -data and a variety of functions for visualizing spatial data and model results. +data, including a variety of functions for visualizing spatial data and model results. + +The package is designed primarily to support public health research with spatial data; +see the [**surveil**](https://connordonegan.github.io/surveil) R package for time series analysis of public health surveillance data. + +**geostan** is an interface to [**Stan**](https://mc-stan.org), a state-of-the-art platform for Bayesian inference. ### Disease mapping and spatial regression -Model small-area incidence rates with mortality or disease data recorded across areal units like counties or census tracts. +Model small-area incidence rates with mortality or disease data recorded across areal units like states, counties, or census tracts. ### Observational uncertainty -Incorporate information on data reliability into any **geostan** model. Built specifically for American Community Survey (ACS) data. +Incorporate information on data reliability, such as standard errors of American Community Survey estimates, into any **geostan** model. + +### Censored observations + +Vital statistics and disease surveillance systems like CDC Wonder censor case counts that fall below a threshold number; **geostan** can provide probability distributions for disease or mortality risk across all areas, censored or not. ### Spatial analysis tools -Tools for visualizing and measuring spatial autocorrelation and map patterns, for exploratory analysis and model diagnostics. +Tools for visualizing and measuring spatial autocorrelation and map patterns, for exploratory analysis and model diagnostics. Visual diagnostics also support the evaluation of survey data quality and observational error models. ### The RStan ecosystem Compatible with a suite of high-quality R packages for Bayesian inference and model evaluation. -### Custom Stan models +### Custom spatial models Tools for building custom spatial models in [Stan](https://mc-stan.org/). @@ -54,26 +63,3 @@ if (!require(drat)) install.packages("drat") drat::addRepo("connordonegan") install.packages("geostan") ``` - -## Resources - -For demonstration analyses and some additional discussion, see the vignettes on the package [website](https://connordonegan.github.io/geostan/) and the package help pages (e.g., run `?stan_car` in R). - -## Citation - - * Donegan, Connor (2021). geostan: Bayesian Spatial Analysis. R package Version 0.1.1 https://connordonegan.github.io/geostan/ - - * Donegan, Connor, Yongwan Chun, and Daniel A. Griffith. Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. International Journal of Environmental Research and Public Health 18.13 (2021): 6856. DOI: 10.3390/ijerph18136856 - - -**geostan** is an interface to **Stan**: - - * Carpenter B., Gelman A., Hoffman M. D., Lee D., Goodrich B., Betancourt M., Brubaker M., Guo J., Li P., and Riddell A. (2017). Stan: A probabilistic programming language. Journal of Statistical Software. 76(1). DOI: 10.18637/jss.v076.i01 - -## Credit - -The **geostan** package was built with the help of **rstantools**: - - * Gabry, Jonah, Ben Goodrich, and Martin Lysy (2021). rstantools: Tools for Developing R Packages Interfacing with 'Stan'. R package version 2.1.1 https://mc-stan.org/rstantools/index.html - - diff --git a/README.html b/README.html index 16f72f33..c972fc14 100644 --- a/README.html +++ b/README.html @@ -602,41 +602,29 @@ - + -

geostan

-

The geostan R package supports a complete spatial analysis workflow with hierarchical Bayesian models (HBMs) for areal data and a variety of functions for visualizing spatial data and model results.

+

geostan: Bayesian spatial analysis

+

The geostan R package supports a complete spatial analysis workflow with hierarchical Bayesian models (HBMs) for areal data, including a variety of functions for visualizing spatial data and model results.

+

The package is designed primarily to support public health research with spatial data; see the surveil R package for time series analysis of public health surveillance data.

+

geostan is an interface to Stan, a state-of-the-art platform for Bayesian inference.

Disease mapping and spatial regression

-

Model small-area incidence rates with mortality or disease data recorded across areal units like counties or census tracts.

+

Model small-area incidence rates with mortality or disease data recorded across areal units like states, counties, or census tracts.

Observational uncertainty

-

Incorporate information on data reliability into any geostan model. Built specifically for American Community Survey (ACS) data.

+

Incorporate information on data reliability, such as standard errors of American Community Survey estimates, into any geostan model.

+

Censored observations

+

Vital statistics and disease surveillance systems like CDC Wonder censor case counts that fall below a threshold number; geostan can provide probability distributions for disease or mortality risk across all areas, censored or not.

Spatial analysis tools

-

Tools for visualizing and measuring spatial autocorrelation and map patterns, for exploratory analysis and model diagnostics.

+

Tools for visualizing and measuring spatial autocorrelation and map patterns, for exploratory analysis and model diagnostics. Visual diagnostics also support the evaluation of survey data quality and observational error models.

The RStan ecosystem

Compatible with a suite of high-quality R packages for Bayesian inference and model evaluation.

-

Custom Stan models

+

Custom spatial models

Tools for building custom spatial models in Stan.

Installation

Install geostan using:

-

Resources

-

For demonstration analyses and some additional discussion, see the vignettes on the package website and the package help pages (e.g., run ?stan_car in R).

-

Citation

-
    -
  • Donegan, Connor (2021). geostan: Bayesian Spatial Analysis. R package Version 0.1.1 https://connordonegan.github.io/geostan/

  • -
  • Donegan, Connor, Yongwan Chun, and Daniel A. Griffith. Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. International Journal of Environmental Research and Public Health 18.13 (2021): 6856. DOI: 10.3390/ijerph18136856

  • -
-

geostan is an interface to Stan:

-
    -
  • Carpenter B., Gelman A., Hoffman M. D., Lee D., Goodrich B., Betancourt M., Brubaker M., Guo J., Li P., and Riddell A. (2017). Stan: A probabilistic programming language. Journal of Statistical Software. 76(1). DOI: 10.18637/jss.v076.i01
  • -
-

Credit

-

The geostan package was built with the help of rstantools:

- diff --git a/README.md b/README.md index 31ac77c7..54690511 100644 --- a/README.md +++ b/README.md @@ -3,33 +3,50 @@ -# geostan +## geostan: Bayesian spatial analysis The **geostan** R package supports a complete spatial analysis workflow -with hierarchical Bayesian models (HBMs) for areal data and a variety of -functions for visualizing spatial data and model results. +with hierarchical Bayesian models (HBMs) for areal data, including a +variety of functions for visualizing spatial data and model results. + +The package is designed primarily to support public health research with +spatial data; see the +[**surveil**](https://connordonegan.github.io/surveil) R package for +time series analysis of public health surveillance data. + +**geostan** is an interface to [**Stan**](https://mc-stan.org), a +state-of-the-art platform for Bayesian inference. ### Disease mapping and spatial regression Model small-area incidence rates with mortality or disease data recorded -across areal units like counties or census tracts. +across areal units like states, counties, or census tracts. ### Observational uncertainty -Incorporate information on data reliability into any **geostan** model. -Built specifically for American Community Survey (ACS) data. +Incorporate information on data reliability, such as standard errors of +American Community Survey estimates, into any **geostan** model. + +### Censored observations + +Vital statistics and disease surveillance systems like CDC Wonder censor +case counts that fall below a threshold number; **geostan** can provide +probability distributions for disease or mortality risk across all +areas, censored or not. ### Spatial analysis tools Tools for visualizing and measuring spatial autocorrelation and map -patterns, for exploratory analysis and model diagnostics. +patterns, for exploratory analysis and model diagnostics. Visual +diagnostics also support the evaluation of survey data quality and +observational error models. ### The RStan ecosystem Compatible with a suite of high-quality R packages for Bayesian inference and model evaluation. -### Custom Stan models +### Custom spatial models Tools for building custom spatial models in [Stan](https://mc-stan.org/). @@ -43,36 +60,3 @@ if (!require(drat)) install.packages("drat") drat::addRepo("connordonegan") install.packages("geostan") ``` - -## Resources - -For demonstration analyses and some additional discussion, see the -vignettes on the package -[website](https://connordonegan.github.io/geostan/) and the package help -pages (e.g., run `?stan_car` in R). - -## Citation - - - Donegan, Connor (2021). geostan: Bayesian Spatial Analysis. R - package Version 0.1.1 - - - Donegan, Connor, Yongwan Chun, and Daniel A. Griffith. Modeling - community health with areal data: Bayesian inference with survey - standard errors and spatial structure. International Journal of - Environmental Research and Public Health 18.13 (2021): 6856. DOI: - 10.3390/ijerph18136856 - -**geostan** is an interface to **Stan**: - - - Carpenter B., Gelman A., Hoffman M. D., Lee D., Goodrich B., - Betancourt M., Brubaker M., Guo J., Li P., and Riddell A. (2017). - Stan: A probabilistic programming language. Journal of Statistical - Software. 76(1). DOI: 10.18637/jss.v076.i01 - -## Credit - -The **geostan** package was built with the help of **rstantools**: - - - Gabry, Jonah, Ben Goodrich, and Martin Lysy (2021). rstantools: - Tools for Developing R Packages Interfacing with ‘Stan’. R package - version 2.1.1 diff --git a/_pkgdown.yml b/_pkgdown.yml index 5ef059f4..15b25f92 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,18 +1,22 @@ template: params: - bootswatch: flatly + bootswatch: sandstone ganalytics: UA-116873024-1 destination: docs url: https://connordonegan.github.io/geostan/ home: title: Bayesian Spatial Analysis +articles: +- title: "Package vignettes" + contents: + - measuring-sa + - spatial-me-models reference: - title: "Package overview" - contents: - geostan-package - title: "Spatial analysis" - desc: > - Functions for measuring and visualizing spatial autocorrelation and dispersion, including model diagnostics + desc: Functions for measuring and visualizing spatial autocorrelation and dispersion, including model diagnostics - contents: - aple - lisa diff --git a/configure b/configure index 1c047986..0304fc54 100755 --- a/configure +++ b/configure @@ -1,4 +1,5 @@ +#! /bin/sh + # Generated by rstantools. Do not edit by hand. -#! /bin/sh "${R_HOME}/bin/Rscript" -e "rstantools::rstan_config()" diff --git a/configure.win b/configure.win index 94d77bdc..5e2dceb8 100755 --- a/configure.win +++ b/configure.win @@ -1,4 +1,5 @@ +#! /bin/sh + # Generated by rstantools. Do not edit by hand. -#! /bin/sh "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "rstantools::rstan_config()" diff --git a/docs/404.html b/docs/404.html index e40e0ba7..9f59cfe5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -12,7 +12,7 @@ - + @@ -22,7 +22,7 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
-
- - -
- -

Version 3, 29 June 2007
Copyright © 2007 Free Software Foundation, Inc. <http://fsf.org/>

-

Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.

-
-

-Preamble

-

The GNU General Public License is a free, copyleft license for software and other kinds of works.

-

The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program–to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too.

-

When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.

-

To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others.

-

For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.

-

Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it.

-

For the developers’ and authors’ protection, the GPL clearly explains that there is no warranty for this free software. For both users’ and authors’ sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions.

-

Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users’ freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users.

-

Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free.

-

The precise terms and conditions for copying, distribution and modification follow.

-
-
-

-TERMS AND CONDITIONS

-
-

-0. Definitions

-

“This License” refers to version 3 of the GNU General Public License.

-

“Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.

-

“The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations.

-

To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work.

-

A “covered work” means either the unmodified Program or a work based on the Program.

-

To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.

-

To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.

-

An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.

-
-
-

-1. Source Code

-

The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work.

-

A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.

-

The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.

-

The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work’s System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work.

-

The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.

-

The Corresponding Source for a work in source code form is that same work.

-
-
-

-2. Basic Permissions

-

All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.

-

You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.

-

Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary.

-
- -
-

-4. Conveying Verbatim Copies

-

You may convey verbatim copies of the Program’s source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.

-

You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.

-
-
-

-5. Conveying Modified Source Versions

-

You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:

-
    -
  • -a) The work must carry prominent notices stating that you modified it, and giving a relevant date.
  • -
  • -b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to “keep intact all notices”.
  • -
  • -c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it.
  • -
  • -d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so.
  • -
-

A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation’s users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.

-
-
-

-6. Conveying Non-Source Forms

-

You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:

-
    -
  • -a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange.
  • -
  • -b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge.
  • -
  • -c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b.
  • -
  • -d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements.
  • -
  • -e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d.
  • -
-

A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.

-

A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.

-

“Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.

-

If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).

-

The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.

-

Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.

-
-
-

-7. Additional Terms

-

“Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.

-

When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.

-

Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:

-
    -
  • -a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or
  • -
  • -b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or
  • -
  • -c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or
  • -
  • -d) Limiting the use for publicity purposes of names of licensors or authors of the material; or
  • -
  • -e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or
  • -
  • -f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors.
  • -
-

All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.

-

If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.

-

Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.

-
-
-

-8. Termination

-

You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).

-

However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.

-

Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.

-

Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.

-
-
-

-9. Acceptance Not Required for Having Copies

-

You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.

-
-
-

-10. Automatic Licensing of Downstream Recipients

-

Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License.

-

An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party’s predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.

-

You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.

-
-
-

-11. Patents

-

A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor’s “contributor version”.

-

A contributor’s “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.

-

Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor’s essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.

-

In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.

-

If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient’s use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.

-

If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.

-

A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.

-

Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.

-
-
-

-12. No Surrender of Others’ Freedom

-

If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.

-
-
-

-13. Use with the GNU Affero General Public License

-

Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such.

-
-
-

-14. Revised Versions of this License

-

The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.

-

Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation.

-

If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy’s public statement of acceptance of a version permanently authorizes you to choose that version for the Program.

-

Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.

-
-
-

-15. Disclaimer of Warranty

-

THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

-
-
-

-16. Limitation of Liability

-

IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

-
-
-

-17. Interpretation of Sections 15 and 16

-

If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.

-

END OF TERMS AND CONDITIONS

-
-
-
-

-How to Apply These Terms to Your New Programs

-

If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.

-

To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the “copyright” line and a pointer to where the full notice is found.

- -

Also add information on how to contact you by electronic and paper mail.

-

If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode:

- -

The hypothetical commands show w and show c should show the appropriate parts of the General Public License. Of course, your program’s commands might be different; for a GUI interface, you would use an “about box”.

-

You should also get your employer (if you work as a programmer) or school, if any, to sign a “copyright disclaimer” for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see <http://www.gnu.org/licenses/>.

-

The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read <http://www.gnu.org/philosophy/why-not-lgpl.html>.

-
-
- -
- - - -
- - - -
- - -
-

Site built with pkgdown 1.6.1.9001.

-
- -
-
- - - - - - - - - - diff --git a/docs/apple-touch-icon-120x120.png b/docs/apple-touch-icon-120x120.png index eda63b82..e5a1d485 100644 Binary files a/docs/apple-touch-icon-120x120.png and b/docs/apple-touch-icon-120x120.png differ diff --git a/docs/apple-touch-icon-152x152.png b/docs/apple-touch-icon-152x152.png index c2462d00..5884dccd 100644 Binary files a/docs/apple-touch-icon-152x152.png and b/docs/apple-touch-icon-152x152.png differ diff --git a/docs/apple-touch-icon-180x180.png b/docs/apple-touch-icon-180x180.png index c8f9c77b..a976e5b5 100644 Binary files a/docs/apple-touch-icon-180x180.png and b/docs/apple-touch-icon-180x180.png differ diff --git a/docs/apple-touch-icon-60x60.png b/docs/apple-touch-icon-60x60.png index 609b29d9..815f9ae4 100644 Binary files a/docs/apple-touch-icon-60x60.png and b/docs/apple-touch-icon-60x60.png differ diff --git a/docs/apple-touch-icon-76x76.png b/docs/apple-touch-icon-76x76.png index baf34b05..54f002d8 100644 Binary files a/docs/apple-touch-icon-76x76.png and b/docs/apple-touch-icon-76x76.png differ diff --git a/docs/apple-touch-icon.png b/docs/apple-touch-icon.png index c8f9c77b..a976e5b5 100644 Binary files a/docs/apple-touch-icon.png and b/docs/apple-touch-icon.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 489ab07e..fc7251f3 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,92 +1,18 @@ - - - - - - - -Articles • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • geostan - - - - - - - - - - - - - - +
-
- -
- - -
- +
- - - + diff --git a/docs/articles/measuring-sa.html b/docs/articles/measuring-sa.html index 8e4ba716..0b39b906 100644 --- a/docs/articles/measuring-sa.html +++ b/docs/articles/measuring-sa.html @@ -12,7 +12,7 @@ - + @@ -46,7 +46,7 @@ geostan - 0.1.1 + 0.1.2 @@ -55,20 +55,8 @@
  • Reference
  • -
  • Changelog @@ -99,7 +87,7 @@

    Connor Donegan

    September 13, 2021

    - Source: vignettes/measuring-sa.Rmd + Source: vignettes/measuring-sa.Rmd @@ -107,18 +95,18 @@

    September 13, 2021

    This vignette walks through exploratory spatial analysis functionality in the geostan package, which includes methods for measuring and visualizing spatial autocorrelation. The last section introduces a set of diagnostic plots for spatial models.

    -
    -

    -Getting started

    +
    +

    Getting started +

    From the R console, load geostan and the georgia data set.

     library(geostan)
     data("georgia")

    georgia is a simple features (sf) object with estimates of county population characteristics from the American Community Survey (ACS) for the five year period spanning 2014-2018. Their corresponding standard errors are also here. The column college contains ACS estimates for the percent of the population age 25 and older that has obtained a college degree or higher; the standard errors of the survey estimates are in the column named college.se.

    -
    -

    -Spatial diagnostic summary

    +
    +

    Spatial diagnostic summary +

    If we pass these estimates and the simple features object to sp_diag function, it returns a histogram, Moran scatter plot, and map of the estimates:

     sp_diag(georgia$college, georgia, name = "College (%)")
    @@ -133,9 +121,9 @@

    weighted.mean(georgia$college, w = georgia$population) #> [1] 30.66856

    -
    -

    -The Moran scatter plot

    +
    +

    The Moran scatter plot +

    We can create the Moran plot ourselves using the moran_plot function and a spatial connectivity matrix. The shape2mat function takes a spatial object (simple features or spatial polygons) and creates a sparse matrix representation of the neighborhood structure: counties are considered ‘neighbors’ if their borders touch each other.1 To reproduce the Moran plot given by sp_diag, we need to provide a row-standardized spatial weights matrix. We do this be setting the second argument, style, to “W”.

     C <- shape2mat(georgia, style = "W")
    @@ -153,9 +141,9 @@ 

    While positive and negative neighboring values still cancel out as previously, using a binary matrix means that counties with more neighbors contribute more to the MC.

    The quadrants of the Moran plot are helpful for classifying observations. The first (top right) quadrant represents counties with above-average values that are also surrounded by above-average values; the third (bottom left) quadrant contains low values surrounded by low values. Points in these quadrants contribute positively to the MC, and they represent positive spatial autocorrelation. The second (top left) and fourth quadrants represent negative spatial autocorrelation since they contain spatial outliers—high (or low) values surrounded by dissimilar values.

    -
    -

    -Local Indicators of Spatial Association

    +
    +

    Local Indicators of Spatial Association +

    The lisa function calculates “local indicators of spatial association” (Anselin 1995). LISA values are closely related to the Moran plot. If we were to provide standardized values (z-scores) on the Moran plot, the LISA value would be equal to the product of each z-scores and their spatially lagged value. The lisa function returns the LISA values and indicates which quadrant of the Moran plot the point is found:

     Li <- lisa(georgia$college, C)
    @@ -172,9 +160,9 @@ 

    c(mc(georgia$college, C), mean(Li$Li)) #> [1] 0.4220000 0.4189617

    -
    -

    -Effective sample size

    +
    +

    Effective sample size +

    We can also consider what these spatial patterns mean in terms of the information content of our data; that is, the impact that spatial autocorrelation might have on the amount of evidence that can be garnered from this data in an analysis. This is often described as effective sample size (ESS).

    The n_eff function provides an approximate measure of ESS for spatially autocorrelated data. Based on the simultaneous autoregressive (SAR) model (Griffith 2005), it requires a value of the SA parameter, \(\rho\), from the SAR model and the number of observations in our data set. We can get a rough measure of ESS for our ICE data using the following code:

    @@ -186,9 +174,9 @@ 

    #> 159.00000 0.71800 21.69292

    This tells us that, given the degree of SA in the ICE estimates, our nominal sample size of 159 observations has about the same information content as 22 independent observations. This should provide some idea as to why it is so perilous to use conventional (non-spatial) statistical methods with spatial data. The odds of observing a strong correlation between any arbitrary pair of spatially patterned variables can be far greater than conventional methods report.

    -
    -

    -Model diagnostics

    +
    +

    Model diagnostics +

    The sp_diag function can also be used to evaluate spatial models. One of the purposes of the function is to identify spatial patterns in the model residuals, because spatial autocorrelation violates a core assumption (independence) of conventional statistical models and because spatial patterns can provide valuable information that we should pay attention to.

    To demonstrate, we first fit a (non-spatial) Poisson model to the Georgia male mortality data. The following code fits a log-linear Poisson model, and it models mortality rates for each county separately (that’s provided by the “random effects” argument: re ~ GEOID).

    @@ -212,16 +200,18 @@ 

    #> 1 10 0 3 #> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. #> Running the chains for more iterations may help. See -#> http://mc-stan.org/misc/warnings.html#bulk-ess -print(fit) +#> https://mc-stan.org/misc/warnings.html#bulk-ess

    +

    For a summary of model results:

    +
    +print(fit)
     #> Spatial Model Results 
     #> Formula: deaths.male ~ offset(log(pop.at.risk.male))
     #> Partial pooling (varying intercept): ~GEOID
     #> Spatial method (outcome):  Exchangeable 
     #> Likelihood function:  poisson 
     #> Link function:  log 
    -#> Residual Moran Coefficient:  0.02171375 
    -#> WAIC:  1320.08 
    +#> Residual Moran Coefficient:  0.02154225 
    +#> WAIC:  1320.37 
     #> Observations:  159 
     #> Data models (ME): none
     #> Inference for Stan model: foundation.
    @@ -229,33 +219,25 @@ 

    #> post-warmup draws per chain=1000, total post-warmup draws=4000. #> #> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat -#> intercept -4.180 0.001 0.021 -4.222 -4.194 -4.180 -4.166 -4.140 354 1.007 -#> alpha_tau 0.248 0.000 0.016 0.218 0.237 0.247 0.258 0.281 4755 0.999 +#> intercept -4.178 0.001 0.021 -4.219 -4.191 -4.177 -4.164 -4.138 263 1.019 +#> alpha_tau 0.248 0.000 0.016 0.220 0.237 0.247 0.258 0.280 3679 1.000 #> -#> Samples were drawn using NUTS(diag_e) at Thu Nov 25 11:11:44 2021. +#> Samples were drawn using NUTS(diag_e) at Sun Dec 26 18:28:19 2021. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split chains (at #> convergence, Rhat=1).

    -

    Because geostan uses Bayesian inference and a Markov chain Monte Carlo (MCMC) algorithm from the Stan modeling language to draw samples from the posterior distribution of parameters, the fit object contains not just summaries of results (as printed above) but also full probability distributions for each parameter. We can plot the posterior distribution of any model parameter; below is the probability distribution for the intercept, which is the mean log-county mortality rate. We can see it is centered on \(-4.183\), which is a mortality rate of \(e^{-4.183} = 153\) per 10,000.

    -

    When necessary, Stan will print important warning messages, such as “Bulk Effective Samples Size (ESS) is too low.” Looking at the printed results above, we can see that we kept a total of 4,000 MCMC samples for inference. If we then look at the “n_eff” (i.e., ESS) column in the table of results, we see that the effective sample size is smaller that the nominal sample size of 4,000 (due to serial autocorrelation in the MCMC samples). If the MCMC samples are an approximation of the exact posterior distribution, then the more samples we draw the closer our approximation will be. The Monte Carlo standard error of the estimates (“se_mean”) tells us how close we are. To evaluate all model parameters (which you should always do), you can use the following function calls: rstan::stan_ess(fit$stanfit), rstan::stan_mcse(fit$stanfit), and rstan::stan_rhat(fit$stanfit) (and see the corresponding help pages, ?rstan::stan_rhat.)

    -
    -plot(fit, pars = "intercept")
    -#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    -

    -

    With MCMC samples, we can easily extrapolate our inferences from the posterior distribution of parameters to any function of those parameters (MacKay 2003). This means we can obtain a posterior distribution for every county mortality rate, and for the difference between the modeled mortality rates (fitted values) and the crude mortality rates (i.e., the residuals: \(\text{observed} - \text{modeled}\)), and, further, for any function of those residuals. These are generally referred to as ‘quantities of interest.’

    -

    Quantities of interest can be useful for model criticism as well as for making inferences from a model. For example, we can measure the degree of spatial autocorrelation in the MCMC samples of residuals, resulting in a probability distribution for the residual autocorrelation. Or, if we wanted to measure health inequality across counties as a function of the county mortality rates, we could calculate that inequality measure for each MCMC sample of fitted values to obtain a probability distribution for the degree of inequality.

    -

    Now provide the fitted model, fit, and the spatial data, georgia, to the sp_diag function to see a set of spatial model diagnostics:

    +

    The printed summary of results shows that the posterior probability distribution for the intercept, which in this case represents the mean log-mortality rate, is centered on \(-4.183\), which is a mortality rate of \(e^{-4.183} = 153\) per 10,000. The 2.5% and 97.5% columns provide the bounds on the 95% credible interval (CI) for each parameter; the CI for the intercept is [-4.22, -4.14].2

    +

    Provide the fitted model, fit, and the spatial data, georgia, to the sp_diag function to see a set of spatial model diagnostics:

    -sp_diag(fit, georgia)
    -#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    -

    -

    The point-interval plot on the left shows the raw mortality rates (the raw outcome data) on the x-axis, the fitted values on the y-axis, and a ‘perfect fit’ (slope = 1, intercept = 0) line for reference. We can see that a number of the fitted values have posterior means that deviate from the observations; but this “shrinkage” towards the mean is not necessarily a problem. In fact, it is often desirable insofar as it indicates that these are counties for which our data provide very little evidence as to what the risk of death is (i.e., the population is very small). (For a good introductory discussion of information pooling and many other topics as well, see McElreath (2016)).

    -

    The middle panel represents spatial autocorrelation in the residuals as measured by the Moran coefficient (i.e., autocorrelation in the joint probability distribution of residuals), and the map shows the mean residual for each county (marginal posterior means). In this case, the MC histogram shows that there is a small amount of residual autocorrelation, while the map indicates that this derives mainly from a north-south/metropolitan-rural trend. The trend in the residuals helps us see that shrinking towards the mean mortality rate is less than ideal in this case because we can see that county mortality rates are higher in the southern half of the state than in the greater Atlanta metropolitan area.

    -

    We could probably do better than shrinking towards the mean by using one of geostan’s spatial models (see the examples in ?stan_car) or by adding one or more (substantively meaningful) covariates.

    +sp_diag(fit, georgia)
    +

    The point-interval plot on the left shows the raw mortality rates (the raw outcome data) on the x-axis, the fitted values on the y-axis, and a ‘perfect fit’ (slope = 1, intercept = 0) line for reference. We can see that a number of the fitted values have posterior means that deviate from the observations; but this “shrinkage” towards the mean is not necessarily a problem. In fact, it is often desirable insofar as it indicates that these are counties for which our data provide very little evidence as to what the risk of death is (i.e., the population is very small). (For an introductory discussion of information pooling and other topics as well, see McElreath (2016)).

    +

    The middle panel is a Moran scatter plot of the model residuals, and the map shows the mean residual for each county. The residuals have been taken at their marginal posterior means. However, there is more than one way to measure residual autocorrelation. For an alternative visualization that uses the entire posterior distribution of parameters and provides an estimate of the residual Moran coefficient that will match the printed model results above (MC = 0.022), try sp_diag(fit, georgia, mc_style = "hist").

    +

    In this case, there is a very small amount of residual autocorrelation, and the map indicates that this derives from a slight north-south/metropolitan-rural trend. The trend in the residuals helps us see that shrinking towards the mean mortality rate is less than ideal in this case because we can see that county mortality rates are higher in the southern half of the state than in the greater Atlanta metropolitan area.

    +

    We could extend this model by using one of geostan’s spatial models (see the examples in ?stan_car) or by adding one or more (substantively meaningful) covariates.

    -
    -

    -References

    +
    +

    References +

    Anselin, Luc. 1995. “Local Indicators of Spatial Association—Lisa.” Georgaphical Analysis 27 (2): 93–115.

    @@ -266,9 +248,6 @@

    Griffith, Daniel A. 2005. “Effective Geographic Sample Size in the Presence of Spatial Autocorrelation.” Annals of the Association of American Geographers 95 (4): 740–60.

    -
    -

    MacKay, David J. 2003. Information Theory, Inference, and Learning Algorithms. Cambridge University Press.

    -

    McElreath, Richard. 2016. Statistical Rethinking: A Bayesian Course with Eexamples in R and Stan. CRC Press.

    @@ -278,6 +257,7 @@


    1. For the most part, users do not need to know anything about sparse matrix objects to work with them. Objects from the Matrix package can typically be treated like objects of class “matrix”. Sometimes, however, you may need to make an explicit call the the Matrix package to access its methods. For example, colSums(C) will produce an error, but Matrix::colSums(C) will work as expected.

    2. +
    3. Stan will print important warning messages when Markov chain Monte Carlo (MCMC) diagnostics indicate any cause for concern, such as “Bulk Effective Samples Size (ESS) is too low.” Looking at the printed results, we can see that we kept a total of 4,000 MCMC samples for inference. If we then look at the “n_eff” (i.e., ESS) column in the table of results, we see that the effective sample size is smaller that the nominal sample size of 4,000 (this is almost always the case, due to serial autocorrelation in the MCMC samples). To see diagnostics for all model parameters at once, you can use the following function calls: rstan::stan_ess(fit$stanfit), rstan::stan_mcse(fit$stanfit), and rstan::stan_rhat(fit$stanfit) (and see the corresponding help pages, ?rstan::stan_rhat.)

    @@ -299,7 +279,7 @@

    -

    Site built with pkgdown 1.6.1.9001.

    +

    Site built with pkgdown 2.0.1.

    diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-10-1.png deleted file mode 100644 index 7110acd5..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-10-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-11-1.png deleted file mode 100644 index 8822b330..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-11-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-12-1.png index 54ae9da8..537ffbc0 100644 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-13-1.png index b2c39706..b31113e9 100644 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-13-1.png and b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-14-1.png deleted file mode 100644 index ee0d5e36..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-14-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-15-1.png deleted file mode 100644 index 1abb113a..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-15-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-16-1.png deleted file mode 100644 index fd875bc4..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-16-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-3-1.png deleted file mode 100644 index 6bc1ad55..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-3-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-6-1.png deleted file mode 100644 index cfbf3c4b..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-6-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-8-1.png deleted file mode 100644 index 9d5da490..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-8-1.png and /dev/null differ diff --git a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-9-1.png deleted file mode 100644 index 948fcb7c..00000000 Binary files a/docs/articles/measuring-sa_files/figure-html/unnamed-chunk-9-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models.html b/docs/articles/spatial-me-models.html index 67b1d03e..addf3906 100644 --- a/docs/articles/spatial-me-models.html +++ b/docs/articles/spatial-me-models.html @@ -12,7 +12,7 @@ - + @@ -46,7 +46,7 @@ geostan - 0.1.1 + 0.1.2

    @@ -55,20 +55,8 @@
  • Reference
  • -
  • Changelog @@ -99,7 +87,7 @@

    Connor Donegan

    September 13, 2021

    - Source: vignettes/spatial-me-models.Rmd + Source: vignettes/spatial-me-models.Rmd @@ -108,9 +96,9 @@

    September 13, 2021

    This vignette introduces users to the spatial measurement error (ME) models implemented in the geostan package (Donegan, Chun, and Griffith 2021; Donegan 2021). These models are particularly appropriate for working with American Community Survey (ACS) data and other large, government-backed surveys.

    A premise of this methodology is that the survey includes a systematic spatial sampling design (i.e., the sampling procedure was stratified by areal unit, whether they be block groups, counties, or states).

    -
    -

    -Getting started

    +
    +

    Getting started +

    From the R console, load the geostan and ggplot2 packages.

     library(geostan)
    @@ -119,9 +107,9 @@ 

    data(georgia)

    The line data(georgia) loads the georgia data set from the geostan package into your working environment. You can learn more about the data by entering ?georgia to the R console.

    -
    -

    -ICE data

    +
    +

    ICE data +

    This vignette will make use of the index of concentration at the extremes (ICE) (Massey 2001). The ICE is the difference between the proportion of the population residing in a high income households and the proportion in low income households: \[\text{ICE} = \text{Proportion Rich} - \text{Proportion Poor,}\] where “rich” and “poor” are defined as the top and bottom quintiles of the US household income distribution (\(< \$20,000\) and \(>= \$120,000\)), respectively. It ranges from -1, for an entirely impoverished population, to 1, for an entirely wealthy population.

    In this vignette, we will examine the ICE standard errors and build a probability model for the actual ICE values. The purpose of the vignette is to provide a guide to critically evaluating both your data and the ME model.

    Examining the standard errors directly is informative, but not quite enlightening1:

    @@ -131,10 +119,10 @@

    This shows that there are strong spatial patterns in the reliability of the estimates. For continuous measures like the ICE, it is helpful to scale the standard errors by the scale of the data. Using the median absolute deviation (MAD) is a good option:

     c(sd.ice <- sd(georgia$ICE))
    -
    ## [1] 0.126393
    +
    ## [1] 0.126393
     c(mad.ice <- mad(georgia$ICE))
    -
    ## [1] 0.09241624
    +
    ## [1] 0.09241624
     scaled_se <- georgia$ICE.se / mad.ice
     ggplot() +
    @@ -145,9 +133,9 @@ 

    No we can see that a number of these estimates are not particularly reliable.

    -
    -

    -Modeling errors of observation

    +
    +

    Modeling errors of observation +

    The unknown errors, \(\delta_i\), are defined as the difference between the survey estimate, \(z_i\), of some variable, and that variable’s actual value over the same time period, \(x_i\): \[\delta_i = z_i - x_i.\] For present purposes, we will take for granted the high quality of the Census Bureau’s systematic spatial sampling design (on spatial sampling, see Chun and Griffith 2013), and thus, we do not expect there to be any spatial pattern to the errors, \(\delta_i\).2

    Using Bayes’ theorem and the information at our disposal, we can create a probability distribution for these errors. Since \(\delta_i\) is a simple function of \(z_i\) and \(x_i\), we need to reason about \[p(\boldsymbol x | \boldsymbol z, \mathcal M),\] where \(\mathcal M\) represents our relevant background knowledge. \(\mathcal M\) includes the standard errors, \(\boldsymbol s\), as well as the premise that this data was collected using a valid spatial sampling design.

    By Bayes’ theorem: \[\begin{equation} @@ -167,9 +155,8 @@

    \end{equation}\]

    The default prior for \(\rho\) is uniform across its entire support (determined by the extreme eigenvalues of \(C\)).

    -
    -

    -ME models in geostan +
    +

    ME models in geostan

    These ME models can be implemented using any of the geostan model fitting functions (stan_glm, stan_car, stan_esf, and stan_icar). These functions have a formula interface, so that the basic user experience is similar to using base::glm. For example, if we were to fit a linear model to the log-mortality rates, we could start with the following code:

    @@ -180,7 +167,7 @@ 

    # use binary weights matrix for prep_car_data C <- shape2mat(georgia, style = "B") cp <- prep_car_data(C, style = "WCAR")

    -
    ## Range of permissible rho values:  -1.661134 1
    +
    ## Range of permissible rho values:  -1.661134 1
     ME <- prep_me_data(
       se = data.frame(ICE = georgia$ICE.se),
    @@ -199,97 +186,97 @@ 

    To sample from our spatial ME model alone, we pass our list of ME data to stan_glm and use prior_only = TRUE:

     fit <- stan_glm(log(rate.male) ~ ICE, data = georgia, ME = ME, prior_only = TRUE)
    -
    ## 
    -## *Setting prior parameters for intercept
    -
    ## Distribution: normal
    -
    ##   location scale
    -## 1     -4.2     5
    -
    ## 
    -## *Setting prior parameters for beta
    -## Distribution: normal
    -
    ##   location scale
    -## 1        0     5
    -
    ## 
    -## *Setting prior parameters for sigma
    -
    ## Distribution: student_t
    -
    ##   df location scale
    -## 1 10        0     3
    -## 
    -## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 1).
    -## Chain 1: 
    -## Chain 1: Gradient evaluation took 0.000119 seconds
    -## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.19 seconds.
    -## Chain 1: Adjust your expectations accordingly!
    -## Chain 1: 
    -## Chain 1: 
    -## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
    -## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    -## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    -## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
    -## Chain 1: 
    -## Chain 1:  Elapsed Time: 1.51844 seconds (Warm-up)
    -## Chain 1:                1.16891 seconds (Sampling)
    -## Chain 1:                2.68735 seconds (Total)
    -## Chain 1: 
    -## 
    -## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 2).
    -## Chain 2: 
    -## Chain 2: Gradient evaluation took 9.6e-05 seconds
    -## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.96 seconds.
    -## Chain 2: Adjust your expectations accordingly!
    -## Chain 2: 
    -## Chain 2: 
    -## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
    -## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    -## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    -## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
    -## Chain 2: 
    -## Chain 2:  Elapsed Time: 1.36982 seconds (Warm-up)
    -## Chain 2:                1.17117 seconds (Sampling)
    -## Chain 2:                2.54099 seconds (Total)
    -## Chain 2: 
    -## 
    -## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 3).
    -## Chain 3: 
    -## Chain 3: Gradient evaluation took 9.6e-05 seconds
    -## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.96 seconds.
    -## Chain 3: Adjust your expectations accordingly!
    -## Chain 3: 
    -## Chain 3: 
    -## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
    -## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    -## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    -## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
    -## Chain 3: 
    -## Chain 3:  Elapsed Time: 1.32642 seconds (Warm-up)
    -## Chain 3:                1.17922 seconds (Sampling)
    -## Chain 3:                2.50564 seconds (Total)
    -## Chain 3: 
    -## 
    -## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 4).
    -## Chain 4: 
    -## Chain 4: Gradient evaluation took 0.000155 seconds
    -## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.55 seconds.
    -## Chain 4: Adjust your expectations accordingly!
    -## Chain 4: 
    -## Chain 4: 
    -## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
    -## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    -## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    -## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
    -## Chain 4: 
    -## Chain 4:  Elapsed Time: 1.69196 seconds (Warm-up)
    -## Chain 4:                1.1706 seconds (Sampling)
    -## Chain 4:                2.86256 seconds (Total)
    -## Chain 4:
    +
    ## 
    +## *Setting prior parameters for intercept
    +
    ## Distribution: normal
    +
    ##   location scale
    +## 1     -4.2     5
    +
    ## 
    +## *Setting prior parameters for beta
    +## Distribution: normal
    +
    ##   location scale
    +## 1        0     5
    +
    ## 
    +## *Setting prior parameters for sigma
    +
    ## Distribution: student_t
    +
    ##   df location scale
    +## 1 10        0     3
    +## 
    +## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 1).
    +## Chain 1: 
    +## Chain 1: Gradient evaluation took 0.000124 seconds
    +## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.24 seconds.
    +## Chain 1: Adjust your expectations accordingly!
    +## Chain 1: 
    +## Chain 1: 
    +## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
    +## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    +## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    +## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
    +## Chain 1: 
    +## Chain 1:  Elapsed Time: 1.44052 seconds (Warm-up)
    +## Chain 1:                1.18725 seconds (Sampling)
    +## Chain 1:                2.62777 seconds (Total)
    +## Chain 1: 
    +## 
    +## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 2).
    +## Chain 2: 
    +## Chain 2: Gradient evaluation took 9.5e-05 seconds
    +## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.95 seconds.
    +## Chain 2: Adjust your expectations accordingly!
    +## Chain 2: 
    +## Chain 2: 
    +## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
    +## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    +## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    +## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
    +## Chain 2: 
    +## Chain 2:  Elapsed Time: 1.48348 seconds (Warm-up)
    +## Chain 2:                1.19727 seconds (Sampling)
    +## Chain 2:                2.68074 seconds (Total)
    +## Chain 2: 
    +## 
    +## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 3).
    +## Chain 3: 
    +## Chain 3: Gradient evaluation took 9.2e-05 seconds
    +## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.92 seconds.
    +## Chain 3: Adjust your expectations accordingly!
    +## Chain 3: 
    +## Chain 3: 
    +## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
    +## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    +## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    +## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
    +## Chain 3: 
    +## Chain 3:  Elapsed Time: 1.47298 seconds (Warm-up)
    +## Chain 3:                1.21636 seconds (Sampling)
    +## Chain 3:                2.68934 seconds (Total)
    +## Chain 3: 
    +## 
    +## SAMPLING FOR MODEL 'foundation' NOW (CHAIN 4).
    +## Chain 4: 
    +## Chain 4: Gradient evaluation took 0.000231 seconds
    +## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 2.31 seconds.
    +## Chain 4: Adjust your expectations accordingly!
    +## Chain 4: 
    +## Chain 4: 
    +## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
    +## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
    +## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
    +## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
    +## Chain 4: 
    +## Chain 4:  Elapsed Time: 1.61231 seconds (Warm-up)
    +## Chain 4:                1.27704 seconds (Sampling)
    +## Chain 4:                2.88935 seconds (Total)
    +## Chain 4:

    Note that prior_only = TRUE will prevent stan_glm from considering the likelihood of the outcome, log(rate.male); to facilitate a valid workflow, the entire ME model is treated as part of the “prior” by prior_only. This allows us to understand the properties of the ME model itself, considered independently from any outcome variable.

    -
    -

    -Evaluating spatial ME models

    -
    -

    -ME diagnostic plots

    +
    +

    Evaluating spatial ME models +

    +
    +

    ME diagnostic plots +

    geostan provides a set of diagnostics for its ME models, accessible through the me_diag function. The purpose of the diagnostics is partly to evaluate the quality of the data, and partly to interrogate the adequacy of the model.

    Provide me_diag with the fitted model, the name of the variable, and the underlying spatial object:

    @@ -299,120 +286,120 @@ 

    We get three plots:

    • A point-interval plot showing the ACS estimates on the horizontal axis against a summary of the posterior distribution on the vertical axis. This provides an indication of 1) the amount of uncertainty present in each \(x_i\), and 2) the degree to which the mean of the posterior probability distribution for \(x_i\) may differ from the raw survey estimates (\(\delta_i\)).

    • -
    • A histogram of Moran coefficients calculated for each MCMC sample. The mean of the samples is printed at the top. Zero spatial autocorrelation is indicated by a small negative value (unlike the correlation coefficient, the midpoint of the MC is \(-1/(n-1)\) (Chun and Griffith 2013)).

    • +
    • A Moran scatter plot of the \(\delta_i\) values. Zero spatial autocorrelation is indicated by a small negative value (unlike the correlation coefficient, the midpoint of the MC is \(-1/(n-1)\) (Chun and Griffith 2013)). (Alternatively, autocorrelation can be visualized with a histogram of Moran coefficients that are calculated for each MCMC sample; se the mc_style argument.)

    • A map of the posterior mean for each \(\delta_i\) value.

    From the point-interval plot, we can see that a few of the counties with low ICE estimates have posterior distributions that have shifted slightly towards the mean. However, notice that the model still places substantial probability on values of the ICE that are more extreme than the raw ACS estimates.

    Large \(|\delta_i|\) values can provide a warning that your data may be of low quality; strong social or spatial patterns in \(\delta_i\), on the other hand, should prompt you to ask further questions about the adequacy of the model.

    -
    -

    -Looking closer

    +
    +

    Looking closer +

    To look more closely at the model results, we can have me_diag return the index value for the observations with the \(k\) largest \(\delta_i\) values:

     me_diag(fit, 'ICE', georgia, index = 5)
    -
    ## Identifying the top 5 observations as ordered by their Delta values (Delta = posterior mean of x - raw x value):
    -
    ##                 x.raw       x.mu      x.lwr      x.upr       Delta
    -## x_ICE[91]  -0.4016787 -0.3672502 -0.4494132 -0.2869052 -0.03442849
    -## x_ICE[105] -0.3412494 -0.3086339 -0.3879247 -0.2266870 -0.03261545
    -## x_ICE[90]  -0.2798395 -0.2551138 -0.3261983 -0.1825201 -0.02472574
    -## x_ICE[39]  -0.3236583 -0.3001743 -0.3857347 -0.2152648 -0.02348394
    -## x_ICE[143] -0.2395249 -0.2208236 -0.2875857 -0.1541964 -0.01870130
    +
    ## Identifying the top 5 observations as ordered by their Delta values (Delta = posterior mean of x - raw x value):
    +
    ##                 x.raw       x.mu      x.lwr      x.upr       Delta
    +## x_ICE[91]  -0.4016787 -0.3676754 -0.4521576 -0.2829429 -0.03400329
    +## x_ICE[105] -0.3412494 -0.3076809 -0.3874305 -0.2267651 -0.03356850
    +## x_ICE[90]  -0.2798395 -0.2549004 -0.3264494 -0.1825796 -0.02493916
    +## x_ICE[39]  -0.3236583 -0.2999810 -0.3866111 -0.2177720 -0.02367731
    +## x_ICE[18]   0.2446345  0.2262715  0.1893956  0.2618470  0.01836309

    Or, we can have me_diag return results as raw data (it will also return a list of ggplots):

     delta <- me_diag(fit, 'ICE', georgia, plot = FALSE)$delta_data
     head(delta)
    -
    ##                x.raw        x.mu       x.lwr        x.upr         Delta
    -## x_ICE[1] -0.24576780 -0.23612506 -0.28913059 -0.183239098 -9.642736e-03
    -## x_ICE[2] -0.27540984 -0.26696761 -0.32023503 -0.213111868 -8.442230e-03
    -## x_ICE[3] -0.01856313 -0.01808818 -0.03336573 -0.002751199 -4.749502e-04
    -## x_ICE[4]  0.05059098  0.05058478  0.04279186  0.058327462  6.199239e-06
    -## x_ICE[5]  0.14701110  0.14447502  0.12399190  0.164765673  2.536082e-03
    -## x_ICE[6]  0.16649112  0.16625252  0.15808074  0.174572864  2.385989e-04
    +
    ##                x.raw        x.mu       x.lwr        x.upr         Delta
    +## x_ICE[1] -0.24576780 -0.23589251 -0.28813617 -0.182849316 -9.875294e-03
    +## x_ICE[2] -0.27540984 -0.26761243 -0.32000737 -0.214591646 -7.797403e-03
    +## x_ICE[3] -0.01856313 -0.01820960 -0.03402685 -0.002301792 -3.535360e-04
    +## x_ICE[4]  0.05059098  0.05053656  0.04271002  0.058499775  5.442485e-05
    +## x_ICE[5]  0.14701110  0.14440346  0.12348029  0.164953400  2.607641e-03
    +## x_ICE[6]  0.16649112  0.16618272  0.15780119  0.174602363  3.083992e-04

    We can follow up on this information by examining demographic information on the counties with the largest \(\delta_i\):

     georgia[c(91, 105, 90, 39), c("NAME", "population", "white", "black", "hisp", "ai", "ICE", "ICE.se", "college", "college.se")]
    -
    ## Simple feature collection with 4 features and 10 fields
    -## Geometry type: MULTIPOLYGON
    -## Dimension:     XY
    -## Bounding box:  xmin: -85.06359 ymin: 30.58092 xmax: -82.41898 ymax: 33.46918
    -## Geodetic CRS:  NAD83
    -##        NAME population    white    black      hisp         ai        ICE
    -## 91   Clinch       6743 65.35667 27.61382  5.116417 0.05932078 -0.4016787
    -## 105 Wheeler       7939 56.06500 42.10858  1.272201 0.45345761 -0.3412494
    -## 90  Hancock       8535 24.22964 72.29057  1.921500 0.00000000 -0.2798395
    -## 39  Stewart       6042 24.62761 54.10460 17.212843 0.14895730 -0.3236583
    -##         ICE.se college college.se                       geometry
    -## 91  0.04806692    11.1   2.127660 MULTIPOLYGON (((-82.97125 3...
    -## 105 0.04801373    12.8   2.492401 MULTIPOLYGON (((-82.92786 3...
    -## 90  0.04151013     8.8   1.641337 MULTIPOLYGON (((-83.25346 3...
    -## 39  0.04715834    11.2   1.945289 MULTIPOLYGON (((-85.05141 3...
    +
    ## Simple feature collection with 4 features and 10 fields
    +## Geometry type: MULTIPOLYGON
    +## Dimension:     XY
    +## Bounding box:  xmin: -85.06359 ymin: 30.58092 xmax: -82.41898 ymax: 33.46918
    +## Geodetic CRS:  NAD83
    +##        NAME population    white    black      hisp         ai        ICE
    +## 91   Clinch       6743 65.35667 27.61382  5.116417 0.05932078 -0.4016787
    +## 105 Wheeler       7939 56.06500 42.10858  1.272201 0.45345761 -0.3412494
    +## 90  Hancock       8535 24.22964 72.29057  1.921500 0.00000000 -0.2798395
    +## 39  Stewart       6042 24.62761 54.10460 17.212843 0.14895730 -0.3236583
    +##         ICE.se college college.se                       geometry
    +## 91  0.04806692    11.1   2.127660 MULTIPOLYGON (((-82.97125 3...
    +## 105 0.04801373    12.8   2.492401 MULTIPOLYGON (((-82.92786 3...
    +## 90  0.04151013     8.8   1.641337 MULTIPOLYGON (((-83.25346 3...
    +## 39  0.04715834    11.2   1.945289 MULTIPOLYGON (((-85.05141 3...

    The somewhat large \(\delta_i\) values for these counties are a result of the combination of fairly unreliable estimates (large standard errors) while also being local outliers. We can see that these are low population areas that also have low income, few college grads, and fairly high percent Black populations.

    Notice that the ACS estimate for the ICE in Clinch County is \(-0.40\) (SE = 0.048), implying a 95% margin of error ranging from -0.5 to -0.3 (quite a wide range). Here is our probability distribution for the ICE in Clinch County:

     print(fit$stanfit, pars = "x_ICE[91]")
    -
    ## Inference for Stan model: foundation.
    -## 4 chains, each with iter=2000; warmup=1000; thin=1; 
    -## post-warmup draws per chain=1000, total post-warmup draws=4000.
    -## 
    -##            mean se_mean   sd  2.5%  25%   50%   75% 97.5% n_eff Rhat
    -## x_ICE[91] -0.37       0 0.04 -0.45 -0.4 -0.37 -0.34 -0.29  9205    1
    -## 
    -## Samples were drawn using NUTS(diag_e) at Fri Nov 26 19:44:07 2021.
    -## For each parameter, n_eff is a crude measure of effective sample size,
    -## and Rhat is the potential scale reduction factor on split chains (at 
    -## convergence, Rhat=1).
    +
    ## Inference for Stan model: foundation.
    +## 4 chains, each with iter=2000; warmup=1000; thin=1; 
    +## post-warmup draws per chain=1000, total post-warmup draws=4000.
    +## 
    +##            mean se_mean   sd  2.5%  25%   50%   75% 97.5% n_eff Rhat
    +## x_ICE[91] -0.37       0 0.04 -0.45 -0.4 -0.37 -0.34 -0.28  8046    1
    +## 
    +## Samples were drawn using NUTS(diag_e) at Sun Dec 26 18:38:35 2021.
    +## For each parameter, n_eff is a crude measure of effective sample size,
    +## and Rhat is the potential scale reduction factor on split chains (at 
    +## convergence, Rhat=1).
     # or visualize with:
     # plot(fit, pars = "x_ICE[91]")
     # plot(fit$stanfit, pars = "x_ICE[91]")
    -

    Given the socioeconomic and demographic information on Clinch County, one could argue that the model is being overly conservative by shrinking towards the mean value. However, by examining full posterior distributions, we see in this case that the results are not particularly vulnerable to this concer due to the fact that the model is still positing that the raw estimate and more extreme values are quite plausible. Of course, this was already apparent from examination of the me_diag plots.

    +

    Given the socioeconomic and demographic information on Clinch County, one could argue that the model is being overly conservative by shrinking towards the mean value. However, by examining full posterior distributions, we see in this case that the results are not particularly vulnerable to this concern due to the fact that the model is still positing that the raw estimate and more extreme values are quite plausible. Of course, this was already apparent from examination of the me_diag plots.

    -
    -

    -Working with MCMC samples from ME models

    +
    +

    Working with MCMC samples from ME models +

    geostan consists of pre-compiled Stan models, and users can always access the Markov chain Monte Carlo (MCMC) samples returned by Stan. When extracted as a matrix of samples (as below), each row represents a draw from the joint probability distribution for all model parameters, and each column consists of samples from the marginal distribution of each parameter.

    The ME models return samples for every \(x_i\) as well as the model parameters \(\mu\) (“mu_x_true”), \(\rho\) (“car_rho_x_true”), and \(\tau\) (“sigma_x_true”). We can access these using as.matrix (or as.array or as.data.frame).

     mu.x <- as.matrix(fit, pars = "mu_x_true")
     dim(mu.x)
    -
    ## [1] 4000    1
    +
    ## [1] 4000    1
     mean(mu.x)
    -
    ## [1] -0.1313992
    +
    ## [1] -0.1260957

    We can visualize these using plot or print a summary:

     print(fit$stanfit, pars = c("mu_x_true", "car_rho_x_true", "sigma_x_true"))
    -
    ## Inference for Stan model: foundation.
    -## 4 chains, each with iter=2000; warmup=1000; thin=1; 
    -## post-warmup draws per chain=1000, total post-warmup draws=4000.
    -## 
    -##                    mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
    -## mu_x_true[1]      -0.13       0 0.07 -0.28 -0.16 -0.13 -0.10  0.02  1927    1
    -## car_rho_x_true[1]  0.97       0 0.02  0.91  0.96  0.98  0.99  1.00  3644    1
    -## sigma_x_true[1]    0.20       0 0.01  0.18  0.19  0.20  0.21  0.23  9023    1
    -## 
    -## Samples were drawn using NUTS(diag_e) at Fri Nov 26 19:44:07 2021.
    -## For each parameter, n_eff is a crude measure of effective sample size,
    -## and Rhat is the potential scale reduction factor on split chains (at 
    -## convergence, Rhat=1).
    +
    ## Inference for Stan model: foundation.
    +## 4 chains, each with iter=2000; warmup=1000; thin=1; 
    +## post-warmup draws per chain=1000, total post-warmup draws=4000.
    +## 
    +##                    mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
    +## mu_x_true[1]      -0.13       0 0.08 -0.27 -0.16 -0.13 -0.09  0.04  1438    1
    +## car_rho_x_true[1]  0.97       0 0.02  0.92  0.96  0.98  0.99  1.00  3367    1
    +## sigma_x_true[1]    0.20       0 0.01  0.18  0.19  0.20  0.21  0.23  7188    1
    +## 
    +## Samples were drawn using NUTS(diag_e) at Sun Dec 26 18:38:35 2021.
    +## For each parameter, n_eff is a crude measure of effective sample size,
    +## and Rhat is the potential scale reduction factor on split chains (at 
    +## convergence, Rhat=1).

    To extract samples from the joint probability distribution for \(\boldsymbol x\), use the generic parameter name “x_true”:

     x <- as.matrix(fit, pars = "x_true")
     dim(x)
    -
    ## [1] 4000  159
    +
    ## [1] 4000  159

    If we wanted to calculate the mean of each of these marginal distributions, we could use apply with MARGIN = 2 to summarize by column:

     x.mu <- apply(x, 2, mean)
     head(x.mu)
    -
    ##    x_ICE[1]    x_ICE[2]    x_ICE[3]    x_ICE[4]    x_ICE[5]    x_ICE[6] 
    -## -0.23612506 -0.26696761 -0.01808818  0.05058478  0.14447502  0.16625252
    +
    ##    x_ICE[1]    x_ICE[2]    x_ICE[3]    x_ICE[4]    x_ICE[5]    x_ICE[6] 
    +## -0.23589251 -0.26761243 -0.01820960  0.05053656  0.14440346  0.16618272
    -
    -

    -Non-spatial ME models

    +
    +

    Non-spatial ME models +

    If the ME list doesn’t have a slot with car_parts, geostan will automatically use a non-spatial Student’s t model instead of the CAR model:

     ME_nsp <- prep_me_data(
    @@ -421,11 +408,11 @@ 

    ) fit_nsp <- stan_glm(log(rate.male) ~ ICE, data = georgia, ME = ME_nsp, prior_only = TRUE)

    -
    -

    -Spatial regression with a noisy covariate

    +
    +

    Spatial regression with a noisy covariate +

    Incorporating these ME models into any other geostan model is as simple as removing the prior_only argument (or setting it to FALSE). The ME model will automatically be incorporated into the Bayesian regression analysis, such that all of the regression parameters are modeled jointly with the ME model. This means that our observational uncertainty for the ICE will be propagated throughout the regression analysis.

    -

    At this point, we can introduce a more appropriate model for the mortality data. We will use a Poisson likelihood for the counts of deaths, provide the log-population at risk as an offset term, and pool information across counties using a non-spatial Gaussian model for the log-rates, \(\boldsymbol \phi\): \[y_i \sim Pois(e^{log(P_i) + \phi_i}) \\ \boldsymbol \phi \sim Gauss(\alpha + \boldsymbol x \beta, I \tau^2),\] where \(\alpha\) is the mean log-mortality rate and \(\beta\) is the regression coefficient on the modeled ICE, \(\boldsymbol x\). \(\tau^2\) is the variance of the log-mortality rates around the fitted regression line, \(\alpha + X\beta\). This model for \(\boldsymbol \phi\) is equivalent to a CAR model with \(\rho=0\) and \(M=I\tau^2\).

    +

    At this point, we can introduce a more appropriate model for the mortality data. We will use a Poisson likelihood for the counts of deaths, provide the log-population at risk as an offset term, and pool information across counties using a non-spatial Gaussian model for the log-rates, \(\boldsymbol \phi\): \[y_i \sim Pois(e^{log(P_i) + \phi_i}) \\ \boldsymbol \phi \sim Gauss(\alpha + \boldsymbol x \beta, I \tau^2),\] where \(\alpha\) is the mean log-mortality rate and \(\beta\) is the regression coefficient on the modeled ICE, \(\boldsymbol x\). \(\tau^2\) is the variance of the log-mortality rates around the fitted regression line, \(\alpha + X\beta\). This model for \(\boldsymbol \phi\) is equivalent to a CAR model with \(\rho=0\) and \(M=I\tau^2\) (a constant variance).

     fit_2 <- stan_glm(deaths.male ~ offset(log(pop.at.risk.male)) + ICE, 
                       re = ~ NAME,   
    @@ -433,52 +420,52 @@ 

    ME = ME, family = poisson(), refresh = 0)

    -
    ## 
    -## *Setting prior parameters for intercept
    -
    ## Distribution: normal
    -
    ##   location scale
    -## 1     -4.2     5
    -
    ## 
    -## *Setting prior parameters for beta
    -## Distribution: normal
    -
    ##   location scale
    -## 1        0     5
    -
    ## 
    -## *Setting prior parameters for alpha_tau
    -
    ## Distribution: student_t
    -
    ##   df location scale
    -## 1 10        0     3
    +
    ## 
    +## *Setting prior parameters for intercept
    +
    ## Distribution: normal
    +
    ##   location scale
    +## 1     -4.2     5
    +
    ## 
    +## *Setting prior parameters for beta
    +## Distribution: normal
    +
    ##   location scale
    +## 1        0     5
    +
    ## 
    +## *Setting prior parameters for alpha_tau
    +
    ## Distribution: student_t
    +
    ##   df location scale
    +## 1 10        0     3

    Printing the model results will show the model specification, the mean Moran coefficient of the residuals, a summary of the posterior distributions of select model parameters, and some MCMC diagnostics from Stan.

     print(fit_2)
    -
    ## Spatial Model Results 
    -## Formula: deaths.male ~ offset(log(pop.at.risk.male)) + ICE
    -## Partial pooling (varying intercept): ~NAME
    -## Spatial method (outcome):  Exchangeable 
    -## Likelihood function:  poisson 
    -## Link function:  log 
    -## Residual Moran Coefficient:  0.035289 
    -## WAIC:  1319.11 
    -## Observations:  159 
    -## Data models (ME): ICE
    -##  Data model (ME prior): CAR (auto Gaussian)
    -## Inference for Stan model: foundation.
    -## 4 chains, each with iter=2000; warmup=1000; thin=1; 
    -## post-warmup draws per chain=1000, total post-warmup draws=4000.
    -## 
    -##             mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff  Rhat
    -## intercept -4.384   0.001 0.017 -4.416 -4.396 -4.384 -4.373 -4.349   520 1.010
    -## ICE       -1.708   0.004 0.101 -1.898 -1.777 -1.712 -1.641 -1.500   681 1.006
    -## alpha_tau  0.124   0.000 0.011  0.103  0.116  0.123  0.131  0.148  1277 1.003
    -## 
    -## Samples were drawn using NUTS(diag_e) at Fri Nov 26 19:44:31 2021.
    -## For each parameter, n_eff is a crude measure of effective sample size,
    -## and Rhat is the potential scale reduction factor on split chains (at 
    -## convergence, Rhat=1).
    +
    ## Spatial Model Results 
    +## Formula: deaths.male ~ offset(log(pop.at.risk.male)) + ICE
    +## Partial pooling (varying intercept): ~NAME
    +## Spatial method (outcome):  Exchangeable 
    +## Likelihood function:  poisson 
    +## Link function:  log 
    +## Residual Moran Coefficient:  0.03703425 
    +## WAIC:  1318.01 
    +## Observations:  159 
    +## Data models (ME): ICE
    +##  Data model (ME prior): CAR (auto Gaussian)
    +## Inference for Stan model: foundation.
    +## 4 chains, each with iter=2000; warmup=1000; thin=1; 
    +## post-warmup draws per chain=1000, total post-warmup draws=4000.
    +## 
    +##             mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff  Rhat
    +## intercept -4.384   0.001 0.017 -4.417 -4.395 -4.383 -4.373 -4.351   893 1.002
    +## ICE       -1.709   0.003 0.104 -1.907 -1.780 -1.709 -1.643 -1.501  1067 1.004
    +## alpha_tau  0.123   0.000 0.011  0.103  0.116  0.123  0.130  0.147  2035 1.002
    +## 
    +## Samples were drawn using NUTS(diag_e) at Sun Dec 26 18:39:01 2021.
    +## For each parameter, n_eff is a crude measure of effective sample size,
    +## and Rhat is the potential scale reduction factor on split chains (at 
    +## convergence, Rhat=1).
    -
    -

    -Joint probabilities

    +
    +

    Joint probabilities +

    As stated above, our model is a joint probability distribution for all of the unknown parameters. This means that the probability distribution for \(\boldsymbol x\) that we obtained previously will be updated in consideration of the new information (that is, considering the the regression model and the outcome data).

    In the case of our example model, the bivariate regression relationship implies a multivariate normal probability model for \(\boldsymbol x\) and \(\boldsymbol \phi\) with correlation \(\rho_{x \phi}\).

    An important implication of this fact is that the previously stated criteria for evaluating ME models (e.g., regarding spatial autocorrelation) no longer apply, because the results presented here (fit_2) are conditioned on quite different information, not just the survey data. This is why it is important to critically evaluate the ME model itself, with prior_only = TRUE, before moving to a subsequent stage of analysis.

    @@ -488,7 +475,6 @@

    geom_point(aes(ICE, log(rate.male), col = ICE.se), shape = 6, lwd = 2) + -# geom_label(label = row.names(georgia), aes(ICE, log(rate.male), col = ICE.se)) + labs(x = "ICE Estimate", y = "Crude log mortality") + scale_colour_gradient(low = "white", high = "darkred", name = "SE(ICE)") + theme(panel.background = element_rect(fill = 'gray20'), @@ -517,9 +503,9 @@

    )

    -
    -

    -References

    +
    +

    References +

    Chun, Yongwan, and Daniel A Griffith. 2013. Spatial Statistics and Geostatistics: Theory and Applications for Geographic Information Science and Technology. Sage.

    @@ -567,7 +553,7 @@

    -

    Site built with pkgdown 1.6.1.9001.

    +

    Site built with pkgdown 2.0.1.

    diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-10-1.png deleted file mode 100644 index 0fadbd3a..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-10-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-11-1.png deleted file mode 100644 index 4005c8c0..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-11-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-12-1.png deleted file mode 100644 index e00dcff0..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-12-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-13-1.png deleted file mode 100644 index 32fb8173..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-13-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-14-1.png deleted file mode 100644 index 5df93013..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-14-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-15-1.png deleted file mode 100644 index 47971c6e..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-15-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-16-1.png deleted file mode 100644 index 3adc1f91..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-16-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-17-1.png deleted file mode 100644 index 6e880c2a..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-17-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-18-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-18-1.png deleted file mode 100644 index 5dc2a5b2..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-18-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-19-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-19-1.png deleted file mode 100644 index 3a032471..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-19-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-21-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-21-1.png index 219e74ce..fe9ad123 100644 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-21-1.png and b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-21-1.png differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-22-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-22-1.png deleted file mode 100644 index dfdff687..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-22-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-23-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-23-1.png deleted file mode 100644 index c5699d3c..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-23-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-24-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-24-1.png deleted file mode 100644 index f0dabbcd..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-24-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-4-1.png deleted file mode 100644 index e33c4ac6..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-4-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-5-1.png deleted file mode 100644 index cdf131f1..00000000 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-5-1.png and /dev/null differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-8-1.png index 4d8040f1..3d13e9cc 100644 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-8-1.png and b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-9-1.png index fbadbadc..e9a05eb1 100644 Binary files a/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-9-1.png and b/docs/articles/spatial-me-models_files/figure-html/unnamed-chunk-9-1.png differ diff --git a/docs/articles/spatial-me-models_files/header-attrs-2.10/header-attrs.js b/docs/articles/spatial-me-models_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/spatial-me-models_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/authors.html b/docs/authors.html index 0fe3e89e..8b38f84f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,92 +1,18 @@ - - - - - - - -Citation and Authors • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    -
    -
    - +
    - - - + diff --git a/docs/favicon-16x16.png b/docs/favicon-16x16.png index dee38c26..51de331e 100644 Binary files a/docs/favicon-16x16.png and b/docs/favicon-16x16.png differ diff --git a/docs/favicon-32x32.png b/docs/favicon-32x32.png index 020c52dd..12772ca4 100644 Binary files a/docs/favicon-32x32.png and b/docs/favicon-32x32.png differ diff --git a/docs/favicon.ico b/docs/favicon.ico index 0cd405b7..f9f83d51 100644 Binary files a/docs/favicon.ico and b/docs/favicon.ico differ diff --git a/docs/index.html b/docs/index.html index ccd696de..4e39f427 100644 --- a/docs/index.html +++ b/docs/index.html @@ -12,7 +12,7 @@ - + @@ -46,7 +46,7 @@ geostan - 0.1.1 + 0.1.2
    @@ -55,20 +55,8 @@
  • Reference
  • -
  • Changelog @@ -76,7 +64,7 @@
  • - - -
    +
    -
    -

    -geostan 0.1.2

    -
    -

    -measurement error models improved

    +
    + +
    +

    Models for censored disease and mortality data

    +

    geostan now supports Poisson models with censored count data, a common problem in public health research where small area disease and mortality counts are censored below a threshold value. Model for censored outcome data can now be implemented using the censor_point argument found in all of the model fitting functions (stan_glm, stan_car, stan_esf, stan_icar).

    +
    +
    +

    Measurement error models improved

    The measurement error models have been updated in three important respects:

    -
    1. There is now a prep_me_data function which must be used to create the list of data for the ME models. See ?prep_me_data.
    2. +
      1. There is now a prep_me_data function which must be used to create the list of data for the ME models. See ?prep_me_data.
      2. For covariates that are proportions or rates, the ME models now have an option for using a logit transformation on the variable. Again, see ?prep_me_data for usage.
      3. Previously, when using stan_car, ME models automatically employed the CAR model as a prior for the modeled covariates. That has changed, so that the default behavior for the ME models is the same across all stan_* models (CAR, GLM, ESF, ICAR).
      4. -

      The second change addresses a limitation of the CAR prior models for the ME models. These are particularly important for variables that are highly skewed, such as the poverty rate. To determine whether a transformation should be considered, it can be helpful to evaluate results of the ME model (with the untransformed covariate) using the me_diag function. The logit transform is done on the ‘latent’ (modeled) variable, not the raw covariate. This transformation cannot be applied to the raw data by the user because that would require the standard errors of covariate estimates (e.g., ACS standard errors) to be adjusted for the transformation.

      +

    The second change listed above is particularly useful for variables that are highly skewed, such as the poverty rate. To determine whether a transformation should be considered, it can be helpful to evaluate results of the ME model (with the untransformed covariate) using the me_diag function. The logit transform is done on the ‘latent’ (modeled) variable, not the raw covariate. This transformation cannot be applied to the raw data by the user because that would require the standard errors of covariate estimates (e.g., ACS standard errors) to be adjusted for the transformation.

    -
    -

    -Models for censored disease and mortality data

    -

    geostan now supports Poisson models with censored count data, a common problem in public health research where small area disease and mortality counts are censored below a threshold value. Model for censored outcome data can now be implemented using the censor_point argument found in all of the model fitting functions (stan_glm, stan_car, stan_esf, stan_icar).

    +
    +

    A predict method for marginal effects

    +

    A predict method has been introduced for fitted geostan models; this is designed for calculating marginal effects of predictors. Fitted values of the model are still returned using fitted and the posterior predictive distribution is still accessible via posterior_predict.

    -
    -

    -Centering covariates with measurement error models

    -

    The centerx argument previously centered the covariates on their mean value using the scale function; centering covariates can improve sampling efficiency, sometimes rather drastically, and has interpretive value since it allows the intercept to be interpreted as the mean value of the outcome. Now, the centerx argument has been updated to handle measurement error models for covariates. The centering happens inside the Stan model so that the means of the modeled covariates (latent variables) are used instead of the raw data mean.

    +
    +

    Centering covariates with measurement error models

    +

    The centerx argument has been updated to handle measurement error models for covariates. The centering happens inside the Stan model so that the means of the modeled covariates (latent variables) are used instead of the raw data mean. The centerx argument previously centered the covariates on their mean value using the scale function in R; centering covariates can improve sampling efficiency, sometimes rather drastically, and has interpretive value since it allows the intercept to be interpreted as the mean value of the outcome.

    -
    -

    -geostan 0.1.1

    +
    +

    The stan files for the CAR model have been combined with the ‘foundation.stan’ file, which compresses the file size considerably. The vignette on spatial autocorrelation has also been updated to include model diagnostics, and a new example has been added to the stan_car documentation.

    -
    -

    -geostan 0.1.0

    +
    +

    geostan’s first release.

    +
    -
    - +
    - - - + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index e788b18a..80ea5b83 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -264,9 +263,11 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre, pre code { +pre, code, pre code { background-color: #f8f8f8; color: #333; +} +pre, pre code { white-space: pre-wrap; word-break: break-all; overflow-wrap: break-word; @@ -276,14 +277,12 @@ pre { border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -359,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 956ef70a..6f0eee40 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index a061ad2b..3b36c6ba 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,10 +1,10 @@ pandoc: '2.5' -pkgdown: 1.6.1.9001 -pkgdown_sha: 43e88ecfda6b625beaca50eb1bd7c69c0e956955 +pkgdown: 2.0.1 +pkgdown_sha: ~ articles: measuring-sa: measuring-sa.html spatial-me-models: spatial-me-models.html -last_built: 2021-11-30T02:24Z +last_built: 2021-12-27T01:04Z urls: reference: https://connordonegan.github.io/geostan/reference article: https://connordonegan.github.io/geostan/articles diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png deleted file mode 100644 index 17a35806..00000000 Binary files a/docs/reference/Rplot001.png and /dev/null differ diff --git a/docs/reference/Rplot002.png b/docs/reference/Rplot002.png deleted file mode 100644 index ed8c0ddf..00000000 Binary files a/docs/reference/Rplot002.png and /dev/null differ diff --git a/docs/reference/Rplot003.png b/docs/reference/Rplot003.png deleted file mode 100644 index 98f17e0e..00000000 Binary files a/docs/reference/Rplot003.png and /dev/null differ diff --git a/docs/reference/Rplot004.png b/docs/reference/Rplot004.png deleted file mode 100644 index 06c90a16..00000000 Binary files a/docs/reference/Rplot004.png and /dev/null differ diff --git a/docs/reference/Rplot005.png b/docs/reference/Rplot005.png deleted file mode 100644 index 8d0f029a..00000000 Binary files a/docs/reference/Rplot005.png and /dev/null differ diff --git a/docs/reference/Rplot006.png b/docs/reference/Rplot006.png deleted file mode 100644 index edd10831..00000000 Binary files a/docs/reference/Rplot006.png and /dev/null differ diff --git a/docs/reference/Rplot007.png b/docs/reference/Rplot007.png deleted file mode 100644 index ec9484f4..00000000 Binary files a/docs/reference/Rplot007.png and /dev/null differ diff --git a/docs/reference/Rplot008.png b/docs/reference/Rplot008.png deleted file mode 100644 index 3aae3a4c..00000000 Binary files a/docs/reference/Rplot008.png and /dev/null differ diff --git a/docs/reference/aple.html b/docs/reference/aple.html index d83ba0ee..7d4c13d1 100644 --- a/docs/reference/aple.html +++ b/docs/reference/aple.html @@ -1,93 +1,18 @@ - - - - - - - -Spatial autocorrelation estimator — aple • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Spatial autocorrelation estimator — aple • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,74 +61,68 @@

    Spatial autocorrelation estimator

    The approximate-profile likelihood estimator for the spatial autocorrelation parameter from a simultaneous autoregressive (SAR) model (Li et al. 2007). Note, the APLE approximation is quite unreliable when the number of observations is large.

    -
    aple(x, w, digits = 3)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    Numeric vector of values, length n. This will be standardized internally with scale(x).

    w

    An n x n row-standardized spatial connectivity matrix. See shape2mat.

    digits

    Number of digits to round results to.

    - -

    Source

    +
    +
    aple(x, w, digits = 3)
    +
    +
    +

    Source

    Li, Honfei and Calder, Catherine A. and Cressie, Noel (2007). Beyond Moran's I: testing for spatial dependence based on the spatial autoregressive model. Geographical Analysis: 39(4): 357-375.

    -

    Value

    - +
    +
    +

    Arguments

    +
    x
    +

    Numeric vector of values, length n. This will be standardized internally with scale(x).

    +
    w
    +

    An n x n row-standardized spatial connectivity matrix. See shape2mat.

    +
    digits
    +

    Number of digits to round results to.

    +
    +
    +

    Value

    the APLE estimate, a numeric value.

    -

    Details

    - +
    +
    +

    Details

    To check reliability, the APLE can be compared to an estimate of the spatial autocorrelation parameter from an intercept-only SAR model.

    -

    See also

    - - +
    +
    +

    See also

    + +
    -

    Examples

    -
    
    -library(sf)
    -data(georgia)
    -w <- shape2mat(georgia, "W")
    -x <- georgia$ICE
    -aple(x, w)
    +    
    +

    Examples

    +
    
    +library(sf)
    +data(georgia)
    +w <- shape2mat(georgia, "W")
    +x <- georgia$ICE
    +aple(x, w)
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/append_priors.html b/docs/reference/append_priors.html deleted file mode 100644 index d8235688..00000000 --- a/docs/reference/append_priors.html +++ /dev/null @@ -1,184 +0,0 @@ - - - - - - - - -Format priors for Stan data list — append_priors • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Format priors for Stan data list

    -
    - -
    append_priors(standata, priors_made)
    - - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/auto_gaussian.html b/docs/reference/auto_gaussian.html index 636456fe..5b145cf1 100644 --- a/docs/reference/auto_gaussian.html +++ b/docs/reference/auto_gaussian.html @@ -1,93 +1,18 @@ - - - - - - - -auto Gaussian family for CAR models — auto_gaussian • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -auto Gaussian family for CAR models — auto_gaussian • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,54 +61,53 @@

    auto Gaussian family for CAR models

    create a family object for the auto-Gaussian CAR specification

    -
    auto_gaussian()
    - - -

    Value

    +
    +
    auto_gaussian()
    +
    +
    +

    Value

    An object of class family

    -

    See also

    - - +
    +
    +

    See also

    + +
    -

    Examples

    -
    
    -# \donttest{
    -cp = prep_car_data(shape2mat(georgia))
    -fit <- stan_car(log(rate.male) ~ 1,
    -                data = georgia,
    -                car_parts = cp,
    -                family = auto_gaussian())
    -# }
    +    
    +

    Examples

    +
    
    +# \donttest{
    +cp = prep_car_data(shape2mat(georgia))
    +fit <- stan_car(log(rate.male) ~ 1,
    +                data = georgia,
    +                car_parts = cp,
    +                family = auto_gaussian())
    +# }
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/count_neighbors.html b/docs/reference/count_neighbors.html deleted file mode 100644 index 6a9204a1..00000000 --- a/docs/reference/count_neighbors.html +++ /dev/null @@ -1,177 +0,0 @@ - - - - - - - - -Used internally to count neights per element of an nb list; as in, `Ni <- unlist(lapply(nb, count_neighbors))` — count_neighbors • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Used internally to count neights per element of an nb list; as in, `Ni <- unlist(lapply(nb, count_neighbors))`

    -
    - -
    count_neighbors(z)
    - -

    Arguments

    - - - - - - -
    z

    an element from spdep's nb object

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/edges.html b/docs/reference/edges.html index f8e6f3e9..313bc02f 100644 --- a/docs/reference/edges.html +++ b/docs/reference/edges.html @@ -1,93 +1,18 @@ - - - - - - - -Edge list — edges • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Edge list — edges • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,93 +61,65 @@

    Edge list

    Creates a list of connected nodes following the graph representation of a spatial connectivity matrix.

    -
    edges(C, unique_pairs_only = TRUE)
    - -

    Arguments

    - - - - - - - - - - -
    C

    A connectivity matrix where connection between two nodes is indicated by non-zero entries.

    unique_pairs_only

    By default, only unique pairs of nodes (i, j) will be included in the output.

    - -

    Value

    - -

    Returns a data.frame with three columns. The first two columns (node1 and node2) contain the indices of connected pairs of nodes; only unique pairs of nodes are included (unless `unique_pairs_only = FALSE`). The third column (weight) contains the corresponding matrix element, C[node1, node2].

    -

    Details

    - -

    This is used internally for stan_icar and it is also helpful for creating the scaling factor for BYM2 models fit with stan_icar.

    -

    See also

    - - - -

    Examples

    -
    
    -data(sentencing)
    -C <- shape2mat(sentencing)
    -nbs <- edges(C)
    -head(nbs)
    -#>   node1 node2 weight
    -#> 1     1     3      1
    -#> 2     1     7      1
    -#> 3     1     8      1
    -#> 4     1    20      1
    -#> 5     1    24      1
    -#> 6     1    28      1
    -
    -## similar to:
    -head(Matrix::summary(C))
    -#> 47 x 47 sparse Matrix of class "ngCMatrix", with 208 entries 
    -#>    i j
    -#> 1  3 1
    -#> 2  7 1
    -#> 3  8 1
    -#> 4 20 1
    -#> 5 24 1
    -#> 6 28 1
    -head(Matrix::summary(shape2mat(georgia, "W")))
    -#> 159 x 159 sparse Matrix of class "dgCMatrix", with 860 entries 
    -#>     i j         x
    -#> 1  23 1 0.1666667
    -#> 2  58 1 0.1428571
    -#> 3  59 1 0.1666667
    -#> 4 131 1 0.2000000
    -#> 5 148 1 0.1428571
    -#> 6 159 1 0.1250000
    -
    +    
    +
    edges(C, unique_pairs_only = TRUE)
    +
    + +
    +

    Arguments

    +
    C
    +

    A connectivity matrix where connection between two nodes is indicated by non-zero entries.

    +
    unique_pairs_only
    +

    By default, only unique pairs of nodes (i, j) will be included in the output.

    +
    +
    +

    Value

    +

    Returns a data.frame with three columns. The first two columns (node1 and node2) contain the indices of connected pairs of nodes; only unique pairs of nodes are included (unless unique_pairs_only = FALSE). The third column (weight) contains the corresponding matrix element, C[node1, node2].

    +
    +
    +

    Details

    +

    This is used internally for stan_icar and it is also helpful for creating the scaling factor for BYM2 models fit with stan_icar.

    +
    + + +
    +

    Examples

    +
    
    +data(sentencing)
    +C <- shape2mat(sentencing)
    +nbs <- edges(C)
    +head(nbs)
    +
    +## similar to:
    +head(Matrix::summary(C))
    +head(Matrix::summary(shape2mat(georgia, "W")))
    +
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/exp_pars.html b/docs/reference/exp_pars.html deleted file mode 100644 index d720d0a2..00000000 --- a/docs/reference/exp_pars.html +++ /dev/null @@ -1,235 +0,0 @@ - - - - - - - - -Expected dimensions of an eigenvector spatial filter — exp_pars • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Provides an informed guess for the number of eigenvectors required to remove spatial autocorrelation from a regression. This is used internally for stan_esf; the result can be used to set the prior scale parameter for the global shrinkage parameter in the regularized horseshoe prior. A smaller value of `p0` leads to a more sparse specification.

    -
    - -
    exp_pars(formula, data, C)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    formula

    Model formula.

    data

    The data used to fit the model; must be coercible to a dataframe for use in model.matrix.

    C

    An N x N binary connectivity matrix.

    - -

    Source

    - -

    Chun, Y., D. A. Griffith, M. Lee and P. Sinha (2016). Eigenvector selection with stepwise regression techniques to construct eigenvector spatial filters. *Journal of Geographical Systems*, 18(1), 67-85. doi: 10.1007/s10109-015-0225-3 -.

    -

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. *Spatial Statistics*. doi: 10.1016/j.spasta.2020.100450 -.

    -

    Piironen, J and A. Vehtari (2017). Sparsity information and regularization in the horseshoe and other shrinkage priors. In *Electronic Journal of Statistics*, 11(2):5018-5051.

    -

    Value

    - -

    Returns a numeric value representing the expected number of eigenvectors required to estimate a spatial filter (i.e. number of non-zero or 'large' coefficients).

    -

    Details

    - -

    Following Chun et al. (2016), the expected number of eigenvectors required to remove residual spatial autocorrelation from a model - is an increasing function of the degree of spatial autocorrelation in the outcome variable and the number of links in the connectivity matrix.

    -

    See also

    - - - -

    Examples

    -
    
    -C <- shape2mat(georgia, "B")
    -c(p0 = exp_pars(log(rate.male) ~ college, georgia, C))
    -#> p0 
    -#>  2 
    -if (FALSE) {
    - fit <- stan_esf(log(rate.male) ~ college, data = georgia, p0 = p0, iter = 1e3)
    -}
    -
    -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.9001.

    -
    - -
    -
    - - - - - - - - - - diff --git a/docs/reference/expected_mc.html b/docs/reference/expected_mc.html index 363bcf2f..1b120521 100644 --- a/docs/reference/expected_mc.html +++ b/docs/reference/expected_mc.html @@ -1,93 +1,18 @@ - - - - - - - -Expected value of the residual Moran coefficient. — expected_mc • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Expected value of the residual Moran coefficient. — expected_mc • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,63 +61,57 @@

    Expected value of the residual Moran coefficient.

    Expected value for the Moran coefficient of model residuals under the null hypothesis of no spatial autocorrelation.

    -
    expected_mc(X, C)
    - -

    Arguments

    - - - - - - - - - - -
    X

    model matrix, including column of ones.

    C

    Connectivity matrix.

    - -

    Source

    +
    +
    expected_mc(X, C)
    +
    +
    +

    Source

    Chun, Yongwan and Griffith, Daniel A. (2013). Spatial statistics and geostatistics. Sage, p. 18.

    -

    Value

    - +
    +
    +

    Arguments

    +
    X
    +

    model matrix, including column of ones.

    +
    C
    +

    Connectivity matrix.

    +
    +
    +

    Value

    Returns a numeric value.

    +
    -

    Examples

    -
    
    -data(georgia)
    -C <- shape2mat(georgia)
    -X <- model.matrix(~ ICE + college, georgia)
    -expected_mc(X, C)
    +    
    +

    Examples

    +
    
    +data(georgia)
    +C <- shape2mat(georgia)
    +X <- model.matrix(~ ICE + college, georgia)
    +expected_mc(X, C)
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/figures/README-example-1.png b/docs/reference/figures/README-example-1.png deleted file mode 100644 index 2be0dcf9..00000000 Binary files a/docs/reference/figures/README-example-1.png and /dev/null differ diff --git a/docs/reference/figures/README-example-2.png b/docs/reference/figures/README-example-2.png deleted file mode 100644 index 60ee4edc..00000000 Binary files a/docs/reference/figures/README-example-2.png and /dev/null differ diff --git a/docs/reference/figures/README-pressure-1.png b/docs/reference/figures/README-pressure-1.png deleted file mode 100644 index f98d4a61..00000000 Binary files a/docs/reference/figures/README-pressure-1.png and /dev/null differ diff --git a/docs/reference/figures/README-unnamed-chunk-4-1.png b/docs/reference/figures/README-unnamed-chunk-4-1.png deleted file mode 100644 index 87b0dedd..00000000 Binary files a/docs/reference/figures/README-unnamed-chunk-4-1.png and /dev/null differ diff --git a/docs/reference/figures/logo.png b/docs/reference/figures/logo.png index f84f3e97..eb042645 100644 Binary files a/docs/reference/figures/logo.png and b/docs/reference/figures/logo.png differ diff --git a/docs/reference/georgia-1.png b/docs/reference/georgia-1.png deleted file mode 100644 index 0ff911ee..00000000 Binary files a/docs/reference/georgia-1.png and /dev/null differ diff --git a/docs/reference/georgia.html b/docs/reference/georgia.html index e99bff11..f6b843ce 100644 --- a/docs/reference/georgia.html +++ b/docs/reference/georgia.html @@ -1,93 +1,18 @@ - - - - - - - -Georgia all-cause, sex-specific mortality, ages 55-64, years 2014-2018 — georgia • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Georgia all-cause, sex-specific mortality, ages 55-64, years 2014-2018 — georgia • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,84 +61,132 @@

    Georgia all-cause, sex-specific mortality, ages 55-64, years 2014-2018

    A simple features (sf) object for Georgia counties with sex- and age-specific deaths and populations at risk (2014-2018), plus select estimates (with standard errors) of county characteristics. Standard errors of the ICE were calculated using the Census Bureau's variance replicate tables.

    -
    georgia
    - - -

    Format

    - -

    A simple features object with county geometries and the following columns:

    -
    GEOID

    Six digit combined state and county FIPS code

    -
    NAME

    County name

    -
    ALAND

    Land area

    -
    AWATER

    Water area

    -
    population

    Census Bureau 2018 county population estimate

    -
    white

    Percent White, ACS 2018 five-year estimate

    -
    black

    Percent Black, ACS 2018 five-year estimate

    -
    hisp

    Percent Hispanic/Latino, ACS 2018 five-year estimate

    -
    ai

    Percent American Indian, ACS 2018 five-year estimate

    -
    deaths.male

    Male deaths, 55-64 yo, 2014-2018

    -
    pop.at.risk.male

    Population estimate, males, 55-64 yo, years 2014-2018 (total), ACS 2018 five-year estimate

    -
    pop.at.risk.male.se

    Standard error of the pop.at.risk.male estimate

    -
    deaths.female

    Female deaths, 55-64 yo, 2014-2018

    -
    pop.at.risk.female

    Population estimate, females, 55-64 yo, years 2014-2018 (total), ACS 2018 five-year estimate

    -
    pop.at.risk.female.se

    Standard error of the pop.at.risk.female estimate

    -
    ICE

    Index of Concentration at the Extremes

    -
    ICE.se

    Standard error of the ICE estimate, calculated using variance replicate tables

    -
    income

    Median household income, ACS 2018 five-year estimate

    -
    income.se

    Standard error of the income estimate

    -
    college

    Percent of the population age 25 or higher than has a bachelors degree of higher, ACS 2018 five-year estimate

    -
    college.se

    Standard error of the college estimate

    -
    insurance

    Percent of the population with health insurance coverage, ACS 2018 five-year estimate

    -
    insurance.se

    Standard error of the insurance estimate

    -
    rate.male

    Raw (crude) age-specific male mortality rate, 2014-2018

    -
    rate.female

    Raw (crude) age-specific female mortality rate, 2014-2018

    -
    geometry

    simple features geometry for county boundaries

    - -
    - -

    Source

    - -

    Centers for Disease Control and Prevention, National Center for Health Statistics. Underlying Cause of Death 1999-2018 on CDC Wonder Online Database. 2020. Available online: http://wonder.cdc.gov (accessed on 19 October 2020).

    -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). ``Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure.'' *Int. J. Env. Res. and Public Health* 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    -

    Kyle Walker and Matt Herman (2020). tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames. R package version 0.11. https://CRAN.R-project.org/package=tidycensus

    -

    US Census Bureau. Variance Replicate Tables, 2018. Available online: https://www.census.gov/programs-surveys/acs/data/variance-tables.2018.html (accessed on 19 October 2020).

    - -

    Examples

    -
    if (FALSE) {
    -data(georgia)
    -head(georgia)
    -
    -library(sf)
    -plot(georgia[,'rate.female'])
    -}
    +    
    +
    georgia
    +
    + +
    +

    Format

    +

    A simple features object with county geometries and the following columns:

    GEOID
    +

    Six digit combined state and county FIPS code

    + +
    NAME
    +

    County name

    + +
    ALAND
    +

    Land area

    + +
    AWATER
    +

    Water area

    + +
    population
    +

    Census Bureau 2018 county population estimate

    + +
    white
    +

    Percent White, ACS 2018 five-year estimate

    + +
    black
    +

    Percent Black, ACS 2018 five-year estimate

    + +
    hisp
    +

    Percent Hispanic/Latino, ACS 2018 five-year estimate

    + +
    ai
    +

    Percent American Indian, ACS 2018 five-year estimate

    + +
    deaths.male
    +

    Male deaths, 55-64 yo, 2014-2018

    + +
    pop.at.risk.male
    +

    Population estimate, males, 55-64 yo, years 2014-2018 (total), ACS 2018 five-year estimate

    + +
    pop.at.risk.male.se
    +

    Standard error of the pop.at.risk.male estimate

    + +
    deaths.female
    +

    Female deaths, 55-64 yo, 2014-2018

    + +
    pop.at.risk.female
    +

    Population estimate, females, 55-64 yo, years 2014-2018 (total), ACS 2018 five-year estimate

    + +
    pop.at.risk.female.se
    +

    Standard error of the pop.at.risk.female estimate

    + +
    ICE
    +

    Index of Concentration at the Extremes

    + +
    ICE.se
    +

    Standard error of the ICE estimate, calculated using variance replicate tables

    + +
    income
    +

    Median household income, ACS 2018 five-year estimate

    + +
    income.se
    +

    Standard error of the income estimate

    + +
    college
    +

    Percent of the population age 25 or higher than has a bachelors degree of higher, ACS 2018 five-year estimate

    + +
    college.se
    +

    Standard error of the college estimate

    + +
    insurance
    +

    Percent of the population with health insurance coverage, ACS 2018 five-year estimate

    + +
    insurance.se
    +

    Standard error of the insurance estimate

    + +
    rate.male
    +

    Raw (crude) age-specific male mortality rate, 2014-2018

    + +
    rate.female
    +

    Raw (crude) age-specific female mortality rate, 2014-2018

    + +
    geometry
    +

    simple features geometry for county boundaries

    + + +
    +
    +

    Source

    +

    Centers for Disease Control and Prevention, National Center for Health Statistics. Underlying Cause of Death 1999-2018 on CDC Wonder Online Database. 2020. Available online: http://wonder.cdc.gov (accessed on 19 October 2020).

    +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). ``Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure.'' Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    +

    Kyle Walker and Matt Herman (2020). tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames. R package version 0.11. https://CRAN.R-project.org/package=tidycensus

    +

    US Census Bureau. Variance Replicate Tables, 2018. Available online: https://www.census.gov/programs-surveys/acs/data/variance-tables.2018.html (accessed on 19 October 2020).

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +data(georgia)
    +head(georgia)
    +
    +library(sf)
    +plot(georgia[,'rate.female'])
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/geostan-package.html b/docs/reference/geostan-package.html index 684ac423..f2a0e338 100644 --- a/docs/reference/geostan-package.html +++ b/docs/reference/geostan-package.html @@ -1,93 +1,18 @@ - - - - - - - -The geostan R package. — geostan-package • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -The geostan R package. — geostan-package • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -156,50 +62,44 @@

    The geostan R package.

    - -

    References

    - -

    Carpenter, B., Gelman, A., Hoffman, M.D., Lee, D., Goodrich, B., Betancourt, M., Brubaker, M., Guo, J., Li, P., Riddell, A., 2017. Stan: A probabilistic programming language. Journal of statistical software 76. doi: 10.18637/jss.v076.i01 +

    +

    References

    +

    Carpenter, B., Gelman, A., Hoffman, M.D., Lee, D., Goodrich, B., Betancourt, M., Brubaker, M., Guo, J., Li, P., Riddell, A., 2017. Stan: A probabilistic programming language. Journal of statistical software 76. doi: 10.18637/jss.v076.i01 .

    -

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. *Spatial Statistics*. doi: 10.1016/j.spasta.2020.100450 - (open access: doi: 10.31219/osf.io/fah3z +

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. Spatial Statistics. doi: 10.1016/j.spasta.2020.100450 + (open access: doi: 10.31219/osf.io/fah3z ).

    -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. *Int. J. Env. Res. and Public Health* 18 (13): 6856. doi: 10.3390/ijerph18136856 -. Supplementary material: https://github.com/ConnorDonegan/survey-HBM.

    -

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. *OSF Preprints*. doi: 10.31219/osf.io/3ey65 +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. doi: 10.3390/ijerph18136856 +. Supplementary material: https://github.com/ConnorDonegan/survey-HBM.

    +

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 .

    -

    Gabry, J., Goodrich, B. and Lysy, M. (2020). rstantools: Tools for developers of R packages interfacing with Stan. R package version 2.1.1 https://mc-stan.org/rstantools/.

    -

    Morris, M., Wheeler-Martin, K., Simpson, D., Mooney, S. J., Gelman, A., & DiMaggio, C. (2019). Bayesian hierarchical spatial models: Implementing the Besag York Mollié model in stan. Spatial and spatio-temporal epidemiology, 31, 100301. doi: 10.1016/j.sste.2019.100301 +

    Gabry, J., Goodrich, B. and Lysy, M. (2020). rstantools: Tools for developers of R packages interfacing with Stan. R package version 2.1.1 https://mc-stan.org/rstantools/.

    +

    Morris, M., Wheeler-Martin, K., Simpson, D., Mooney, S. J., Gelman, A., & DiMaggio, C. (2019). Bayesian hierarchical spatial models: Implementing the Besag York Mollié model in stan. Spatial and spatio-temporal epidemiology, 31, 100301. doi: 10.1016/j.sste.2019.100301 .

    -

    Stan Development Team (2019). RStan: the R interface to Stan. R package version 2.19.2. https://mc-stan.org

    +

    Stan Development Team (2019). RStan: the R interface to Stan. R package version 2.19.2. https://mc-stan.org

    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/geostan_fit-1.png b/docs/reference/geostan_fit-1.png deleted file mode 100644 index 84fa0a58..00000000 Binary files a/docs/reference/geostan_fit-1.png and /dev/null differ diff --git a/docs/reference/geostan_fit-2.png b/docs/reference/geostan_fit-2.png deleted file mode 100644 index 122d7d8f..00000000 Binary files a/docs/reference/geostan_fit-2.png and /dev/null differ diff --git a/docs/reference/geostan_fit-3.png b/docs/reference/geostan_fit-3.png deleted file mode 100644 index 7de8e67b..00000000 Binary files a/docs/reference/geostan_fit-3.png and /dev/null differ diff --git a/docs/reference/geostan_fit.html b/docs/reference/geostan_fit.html index a2463179..2bbd5d68 100644 --- a/docs/reference/geostan_fit.html +++ b/docs/reference/geostan_fit.html @@ -1,93 +1,18 @@ - - - - - - - -geostan_fit methods — print.geostan_fit • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -geostan_fit methods — print.geostan_fit • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,178 +61,211 @@

    geostan_fit methods

    Methods for fitted geostan models: extract residuals, fitted values, posterior predictive distribution or spatial component from a spatial regression model; extract samples from the posterior distribution; print regression results; plot posterior distributions.

    -
    # S3 method for geostan_fit
    -print(
    -  x,
    -  probs = c(0.025, 0.25, 0.5, 0.75, 0.975),
    -  digits = 3,
    -  pars = NULL,
    -  ...
    -)
    -
    -# S3 method for geostan_fit
    -plot(x, pars, plotfun = "hist", fill = "steelblue4", ...)
    -
    -# S3 method for geostan_fit
    -as.matrix(x, ...)
    -
    -# S3 method for geostan_fit
    -as.data.frame(x, ...)
    -
    -# S3 method for geostan_fit
    -as.array(x, ...)
    -
    -# S3 method for geostan_fit
    -residuals(object, summary = TRUE, rates = TRUE, detrend = TRUE, ...)
    -
    -# S3 method for geostan_fit
    -fitted(object, summary = TRUE, rates = TRUE, ...)
    -
    -spatial(object, summary = TRUE, ...)
    -
    -# S3 method for geostan_fit
    -spatial(object, summary = TRUE, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A fitted model object of class geostan_fit.

    probs

    Argument passed to quantile; which quantiles to calculate and print.

    digits

    number of digits to print

    pars

    parameters to include; a character string (or vector) of parameter names.

    ...

    additional arguments.

    plotfun

    Argument passed to rstan::plot. Options include histograms ("hist"), MCMC traceplots ("trace"), and density plots ("dens"). Diagnostic plots are also available such as Rhat statistics ("rhat"), effective sample size ("ess"), and MCMC autocorrelation ("ac").

    fill

    fill color for histograms and density plots.

    object

    A fitted model object of class geostan_fit.

    summary

    Logical; should the values be summarized with the mean, standard deviation and quantiles (probs = c(.025, .2, .5, .8, .975)) for each observation? Otherwise a matrix containing samples from the posterior distribution at each observation is returned.

    rates

    For Poisson and Binomial models, should the fitted values be returned as rates, as opposed to raw counts? Defaults to TRUE.

    detrend

    For CAR models with Gaussian likelihood only (auto-gaussian); if detrend = TRUE, the implicit spatial trend will be removed from the residuals. The implicit spatial trend is Trend = rho * C %*% (Y - Mu) (see stan_car). I.e., resid = Y - (Mu + Trend).

    - -

    Value

    - -

    Methods residuals, fitted, spatial return a matrix containing all samples for each observation if summary = FALSE, else if summary = TRUE a data.frame containing a summary of the posterior distribution at each observation (of, respectively, residuals, fitted values, or the spatial trend).

    -

    plot returns a ggplot object that can be customized using the ggplot2 package.

    -

    as.matrix, as.data.frame, as.array return samples from the joint posterior distribution of parameters in the format corresponding to their names. The pars argument is used to return samples from only a subset of parameters.

    -

    See also

    - - - -

    Examples

    -
    # \donttest{
    -library(ggplot2)
    -library(sf)
    -data(sentencing)
    -
    -# spatial weights matrix with binary coding scheme
    -C <- shape2mat(sentencing, style = "B")
    -
    -# log-expected number of sentences
    -## expected counts are based on county racial composition and mean sentencing rates
    -log_e <- log(sentencing$expected_sents)
    -
    -# fit spatial Poisson model with unstructured 'random effects'
    -fit <- stan_glm(sents ~ offset(log_e),
    -                   re = ~ name,
    -                   family = poisson(),
    -                   data = sentencing,
    -                   C = C
    -)
    -
    -# print and plot results
    -print(fit)
    -plot(fit)
    -
    -# residuals
    -r = resid(fit)
    -
    -# fitted values
    -f = fitted(fit)
    -
    -# spatial diagnostics
    -sp_diag(fit, sentencing)
    -
    -# county `random effects' 
    -sp = spatial(fit)
    -
    -# posterior predictive distribution
    -yrep <- posterior_predict(fit, S = 100)
    -bayesplot::ppc_dens_overlay(sentencing$sents, yrep)
    -
    -# extract matrix of samples from posterior distribution of parameters
    -## alpha_re are the unstructured area random effects
    -S.matrix <- as.matrix(fit, pars = "alpha_re")
    -
    -# array of samples
    -S.array <- as.array(fit, pars = c("intercept", "alpha_re", "alpha_tau"))
    -S.monitor <- rstan::monitor(S.array, print = FALSE, warmup = 0)
    -head(S.monitor)
    -
    -# extract data.frame of posterior samples
    -S <- as.data.frame(fit, pars = "alpha_re")
    -# }
    +    
    +
    # S3 method for geostan_fit
    +print(
    +  x,
    +  probs = c(0.025, 0.25, 0.5, 0.75, 0.975),
    +  digits = 3,
    +  pars = NULL,
    +  ...
    +)
    +
    +# S3 method for geostan_fit
    +plot(x, pars, plotfun = "hist", fill = "steelblue4", ...)
    +
    +# S3 method for geostan_fit
    +as.matrix(x, ...)
    +
    +# S3 method for geostan_fit
    +as.data.frame(x, ...)
    +
    +# S3 method for geostan_fit
    +as.array(x, ...)
    +
    +# S3 method for geostan_fit
    +residuals(object, summary = TRUE, rates = TRUE, detrend = TRUE, ...)
    +
    +# S3 method for geostan_fit
    +fitted(object, summary = TRUE, rates = TRUE, ...)
    +
    +spatial(object, summary = TRUE, ...)
    +
    +# S3 method for geostan_fit
    +spatial(object, summary = TRUE, ...)
    +
    +# S3 method for geostan_fit
    +predict(
    +  object,
    +  newdata,
    +  alpha = mean(as.matrix(object, pars = "intercept")),
    +  center = object$x_center,
    +  summary = TRUE,
    +  type = c("link", "response"),
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    x
    +

    A fitted model object of class geostan_fit.

    +
    probs
    +

    Argument passed to quantile; which quantiles to calculate and print.

    +
    digits
    +

    number of digits to print

    +
    pars
    +

    parameters to include; a character string (or vector) of parameter names.

    +
    ...
    +

    additional arguments.

    +
    plotfun
    +

    Argument passed to rstan::plot. Options include histograms ("hist"), MCMC traceplots ("trace"), and density plots ("dens"). Diagnostic plots are also available such as Rhat statistics ("rhat"), effective sample size ("ess"), and MCMC autocorrelation ("ac").

    +
    fill
    +

    fill color for histograms and density plots.

    +
    object
    +

    A fitted model object of class geostan_fit.

    +
    summary
    +

    Logical; should the values be summarized with the mean, standard deviation and quantiles (probs = c(.025, .2, .5, .8, .975)) for each observation? Otherwise a matrix containing samples from the posterior distribution at each observation is returned.

    +
    rates
    +

    For Poisson and Binomial models, should the fitted values be returned as rates, as opposed to raw counts? Defaults to TRUE.

    +
    detrend
    +

    For CAR models with Gaussian likelihood only (auto-gaussian); if detrend = TRUE, the implicit spatial trend will be removed from the residuals. The implicit spatial trend is Trend = rho * C %*% (Y - Mu) (see stan_car). I.e., resid = Y - (Mu + Trend).

    +
    newdata
    +

    A data frame in which to look for variables with which to predict, presumably for the purpose of viewing marginal effects. Note that if the model formula includes an offset term, newdata must contain a column with the appropriate name for the offset, even though the values will be ignored (you may set all values to 1); you must use the alpha argument to include any additional terms. Note also that any spatially-lagged covariate terms will be ignored if they were provided using the slx argument. If covariates in the model were centered using the centerx argument, the predict.geostan_fit method will automatically center the predictors in newdata internally using the values stored in fit$x_center. If newdata is missing, user arguments will be passed to the fitted.geostan_fit method to return the fitted values of the model.

    +
    alpha
    +

    A single numeric value or a numeric vector with length equal to nrow(newdata); alpha serves as the intercept in the linear predictor. The default is to use the posterior mean of the intercept. Even if type = "response", this needs to be provided on the scale of the linear predictor. See Details for additional information.

    +
    center
    +

    May be a vector of numeric values or a logical scalar to pass to scale. Defaults to using object$x_center. If the model was fit using centerx = TRUE, then covariates were centered and their mean values are stored in object$x_center and the predict method will use them to automatically center newdata; if the model was fit with centerx = FALSE, then object$x_center = FALSE and newdata will not be centered.

    +
    type
    +

    By default, results from predict are on the scale of the linear predictor (type = "link")). The alternative (type = "response") is on the scale of the response variable. For example, the default return values for a Poisson model are log-rates, and using type = "response" will return the rates (by exponentiating the log-rates).

    +
    +
    +

    Value

    +

    Methods residuals, fitted, predict, and spatial return a matrix containing all samples for each observation if summary = FALSE, else if summary = TRUE a data.frame containing a summary of the posterior distribution at each observation (of, respectively, residuals, fitted values, predicted values, or the spatial trend). The predict method will return a data frame with a summary of results together with use-provided newdata. +The predict method is designed for reviewing marginal effects of covariates. Thus, results do not include spatial trends or offset terms. To obtain the fitted values of the model (as opposed to predictions from new data), use the fitted method. For the posterior predictive distribution, see posterior_predict. +plot returns a ggplot object that can be customized using the ggplot2 package. +as.matrix, as.data.frame, as.array return samples from the joint posterior distribution of parameters in the format corresponding to their names. The pars argument is used to return samples from only a subset of parameters.

    +
    +
    +

    Details

    + +
    +

    predict.geostan_fit

    + + +

    The purpose of the predict method is to explore marginal effects of (combinations of) covariates. The method sets the intercept equal to its posterior mean (i.e., alpha = mean(as.matrix(object, pars = "intercept"))); the only source of uncertainty in the results is the posterior distribution of the coefficients, which can be obtained using Beta = as.matrix(object, pars = "beta"). The results returned by predict.geostan_fit are obtain by (a summary of):

    for (m in 1:M) preds[m,] = alpha + X * Beta[m,]
    + +

    where M is the number of MCMC samples in the model (M = nrow(Beta)) and preds is a matrix of predicted values.

    +

    Be aware that in non-linear models (including Poisson and Binomial models) marginal effects of each covariate are sensitive to the level of other covariates in the model. If the model includes any spatially-lagged covariates (introduced using the slx argument) or a spatial autocorrelation term, these terms will essentially be fixed at zero for the purposes of calculating marginal effects. To explore the impact of these (missing) terms, you can add their values to the linear predictor using the alpha argument.

    +
    + +
    + + +
    +

    Examples

    +
    # \donttest{
    +library(ggplot2)
    +library(sf)
    +data(sentencing)
    +
    +# spatial weights matrix with binary coding scheme
    +C <- shape2mat(sentencing, style = "B")
    +
    +# log-expected number of sentences
    +## expected counts are based on county racial composition and mean sentencing rates
    +log_e <- log(sentencing$expected_sents)
    +
    +# fit spatial Poisson model with unstructured 'random effects'
    +fit <- stan_glm(sents ~ offset(log_e),
    +                   re = ~ name,
    +                   family = poisson(),
    +                   data = sentencing,
    +                   C = C,
    +                   chains = 2, iter = 500) # for speed only
    +
    +# print and plot results
    +print(fit)
    +plot(fit)
    +
    +# residuals
    +r = resid(fit)
    +
    +# fitted values
    +f = fitted(fit)
    +
    +# spatial diagnostics
    +sp_diag(fit, sentencing)
    +
    +# county `random effects' 
    +sp = spatial(fit)
    +
    +# posterior predictive distribution
    +yrep <- posterior_predict(fit, S = 100)
    +bayesplot::ppc_dens_overlay(sentencing$sents, yrep)
    +
    +# extract matrix of samples from posterior distribution of parameters
    +## alpha_re are the unstructured area random effects
    +S.matrix <- as.matrix(fit, pars = "alpha_re")
    +
    +# array of samples
    +S.array <- as.array(fit, pars = c("intercept", "alpha_re", "alpha_tau"))
    +S.monitor <- rstan::monitor(S.array, print = FALSE, warmup = 0)
    +head(S.monitor)
    +
    +## marginal effects
    +data(georgia)
    +C <- shape2mat(georgia, style = "B")
    +cp <- prep_car_data(C)
    +georgia$income <- georgia$income/1e3
    +
    +fit <- stan_car(deaths.male ~ offset(log(pop.at.risk.male)) + log(income),
    +                slx = ~ log(income),
    +                centerx = TRUE,
    +                car_parts = cp,
    +                data = georgia,
    +                family = poisson(),
    +                chains = 2, iter = 500) # for speed only
    +
    +newdata <- data.frame(
    +    income = seq(min(georgia$income), max(georgia$income), by = 1),
    +    pop.at.risk.male = 1
    +)
    +
    +p <- predict(fit, newdata, type = "response")
    +plot(newdata$income, p$mean * 1e3,
    +     main = "Deaths per 1,000",
    +     ylab = NA,
    +     xlab = "Median county income ($1,000s)")
    +
    +# }
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/get_shp.html b/docs/reference/get_shp.html index 284596a8..3d003dee 100644 --- a/docs/reference/get_shp.html +++ b/docs/reference/get_shp.html @@ -1,93 +1,18 @@ - - - - - - - -Download shapefiles — get_shp • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Download shapefiles — get_shp • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,63 +61,56 @@

    Download shapefiles

    Given a url to a shapefile in a compressed .zip file, download the file and unzip it into a folder in your working directory.

    -
    get_shp(url, folder = "shape")
    - -

    Arguments

    - - - - - - - - - - -
    url

    url to download a shapefile.

    folder

    what to name the new folder in your working directory containing the shapefile

    - -

    Value

    +
    +
    get_shp(url, folder = "shape")
    +
    +
    +

    Arguments

    +
    url
    +

    url to download a shapefile.

    +
    folder
    +

    what to name the new folder in your working directory containing the shapefile

    +
    +
    +

    Value

    A folder in your working directory with the shapefile; filepaths are printed to the console.

    +
    -

    Examples

    -
    if (FALSE) {
    -library(sf)
    -url <- "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_20m.zip"
    -folder <- tempdir()
    -print(folder)
    -get_shp(url, folder)
    -states <- sf::st_read(folder)
    -head(states)
    -}
    +    
    +

    Examples

    +
    if (FALSE) {
    +library(sf)
    +url <- "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_20m.zip"
    +folder <- tempdir()
    +print(folder)
    +get_shp(url, folder)
    +states <- sf::st_read(folder)
    +head(states)
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/index.html b/docs/reference/index.html index 8b8326dc..1e2c3aee 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,25 +1,5 @@ - - - - - - -Function reference • geostan - - - - - - - - - - - - - -Function reference • geostan - - +
    @@ -45,48 +23,29 @@ geostan - 0.1.1 + 0.1.2
    - -
    - -
    - +
    +
    +
    @@ -96,276 +55,176 @@

    Reference

    - ---- - -
    -

    -Package overview

    + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    +

    Package overview

    +

    geostan-package

    The geostan R package.

    -

    -Spatial analysis

    -

    -

    Functions for measuring and visualizing spatial autocorrelation and dispersion, including model diagnostics

    +
    +

    Spatial analysis

    +

    Functions for measuring and visualizing spatial autocorrelation and dispersion, including model diagnostics

    +

    aple()

    Spatial autocorrelation estimator

    +

    lisa()

    Local Moran's I

    +

    mc()

    The Moran coefficient

    +

    me_diag()

    Data model diagnostics

    +

    moran_plot()

    Moran plot

    +

    n_eff()

    Effective sample size

    +

    sp_diag()

    Spatial data diagnostics

    +

    expected_mc()

    Expected value of the residual Moran coefficient.

    +

    row_standardize()

    Row-standardize a matrix; safe for zero row-sums.

    -

    -Models

    -

    -

    Model fitting functions and methods

    +
    +

    Models

    +

    Model fitting functions and methods

    +

    stan_car()

    Conditional autoregressive (CAR) models

    +

    stan_esf()

    Spatial filtering

    +

    stan_glm()

    Generalized linear models

    +

    stan_icar()

    Intrinsic autoregressive models

    -

    print(<geostan_fit>) plot(<geostan_fit>) as.matrix(<geostan_fit>) as.data.frame(<geostan_fit>) as.array(<geostan_fit>) residuals(<geostan_fit>) fitted(<geostan_fit>) spatial()

    +
    +

    print(<geostan_fit>) plot(<geostan_fit>) as.matrix(<geostan_fit>) as.data.frame(<geostan_fit>) as.array(<geostan_fit>) residuals(<geostan_fit>) fitted(<geostan_fit>) spatial() predict(<geostan_fit>)

    geostan_fit methods

    +

    sp_diag()

    Spatial data diagnostics

    +

    posterior_predict()

    Draw samples from the posterior predictive distribution

    +

    uniform() normal() student_t() gamma() hs()

    Prior distributions

    -

    -Convenience functions

    -

    -

    Tools for working with spatial data and geostan models

    +
    +

    Convenience functions

    +

    Tools for working with spatial data and geostan models

    +

    auto_gaussian()

    auto Gaussian family for CAR models

    +

    edges()

    Edge list

    +

    get_shp()

    Download shapefiles

    +

    make_EV()

    Extract eigenfunctions of a connectivity matrix for spatial filtering

    +

    prep_icar_data()

    Prepare data for ICAR models

    +

    prep_car_data()

    Prepare data for a Stan CAR model

    +

    prep_me_data()

    Prepare data for spatial measurement error models

    +

    se_log()

    Standard error of log(x)

    +

    shape2mat()

    Create spatial and space-time connectivity matrices

    +

    sim_sar()

    Simulate spatially autocorrelated data

    +

    uniform() normal() student_t() gamma() hs()

    Prior distributions

    +

    waic()

    WAIC

    -

    -Data

    +
    +

    Data

    +

    georgia

    Georgia all-cause, sex-specific mortality, ages 55-64, years 2014-2018

    +

    sentencing

    Florida state prison sentencing counts by county, 1905-1910

    - +
    +
    -

    -

    Site built with pkgdown 1.6.1.9001.

    +

    Site built with pkgdown 2.0.1.

    -
    -
    +
    - - + + diff --git a/docs/reference/lisa-1.png b/docs/reference/lisa-1.png deleted file mode 100644 index 375e01a2..00000000 Binary files a/docs/reference/lisa-1.png and /dev/null differ diff --git a/docs/reference/lisa.html b/docs/reference/lisa.html index 3b24a7fb..b52ac39e 100644 --- a/docs/reference/lisa.html +++ b/docs/reference/lisa.html @@ -1,93 +1,18 @@ - - - - - - - -Local Moran's I — lisa • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Local Moran's I — lisa • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,89 +61,76 @@

    Local Moran's I

    A local indicator of spatial association (lisa).

    -
    lisa(x, w, type = TRUE)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    Numeric vector of length `n`.

    w

    An `n x n` spatial connectivity matrix. See shape2mat. If w is not row standardized (all(Matrix::rowSums(w) == 1)), it will automatically be row-standardized.

    type

    Return the type of association also (High-High, Low-Low, High-Low, and Low-High)? Defaults to FALSE.

    - -

    Source

    +
    +
    lisa(x, w, type = TRUE)
    +
    +
    +

    Source

    Anselin, Luc. "Local indicators of spatial association—LISA." Geographical Analysis 27, no. 2 (1995): 93-115.

    -

    Value

    - +
    +
    +

    Arguments

    +
    x
    +

    Numeric vector of length n.

    +
    w
    +

    An n x n spatial connectivity matrix. See shape2mat. If w is not row standardized (all(Matrix::rowSums(w) == 1)), it will automatically be row-standardized.

    +
    type
    +

    Return the type of association also (High-High, Low-Low, High-Low, and Low-High)? Defaults to FALSE.

    +
    +
    +

    Value

    If type = FALSE a numeric vector of lisa values for exploratory analysis of local spatial autocorrelation. If type = TRUE, a data.frame with columns Li (the lisa value) and type.

    -

    Details

    - +
    +
    +

    Details

    The values will be standardized with z = scale(x) first and w will be row-standardized if needed. The LISA values are the product of each z value with their respective mean surrounding value lagz = w %*% z; lisa = z * lagz. These are for exploratory analysis and model diagnostics. The function uses Equation 7 from Anselin (1995).

    An above-average value (i.e. positive z-value) with positive mean spatial lag indicates local positive spatial autocorrelation and is designated type "High-High"; a low value surrounded by high values indicates negative spatial autocorrelation and is designated type "Low-High", and so on.

    -

    See also

    - - - -

    Examples

    -
    
    -library(ggplot2)
    -library(sf)
    -
    -data(georgia)
    -w <- shape2mat(georgia, "W")
    -x <- georgia$ICE
    -li = lisa(x, w)
    -head(li)
    -#>           Li type
    -#> 1  0.2092411   LL
    -#> 2  0.5065799   LL
    -#> 3  1.2403225   HH
    -#> 4  1.9109174   HH
    -#> 5 -0.6925012   HL
    -#> 6  3.7233968   HH
    -if (FALSE) {
    -ggplot(georgia, aes(fill = li$Li)) +
    -  geom_sf() +
    -  scale_fill_gradient2()
    - }
    +    
    +
    +

    See also

    + +
    + +
    +

    Examples

    +
    
    +library(ggplot2)
    +library(sf)
    +
    +data(georgia)
    +w <- shape2mat(georgia, "W")
    +x <- georgia$ICE
    +li = lisa(x, w)
    +head(li)
    +if (FALSE) {
    +ggplot(georgia, aes(fill = li$Li)) +
    +  geom_sf() +
    +  scale_fill_gradient2()
    + }
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/make_EV-1.png b/docs/reference/make_EV-1.png deleted file mode 100644 index 68ff4e3b..00000000 Binary files a/docs/reference/make_EV-1.png and /dev/null differ diff --git a/docs/reference/make_EV-2.png b/docs/reference/make_EV-2.png deleted file mode 100644 index 026c0b21..00000000 Binary files a/docs/reference/make_EV-2.png and /dev/null differ diff --git a/docs/reference/make_EV.html b/docs/reference/make_EV.html index 7ee4ece1..29ee13ce 100644 --- a/docs/reference/make_EV.html +++ b/docs/reference/make_EV.html @@ -1,93 +1,18 @@ - - - - - - - -Extract eigenfunctions of a connectivity matrix for spatial filtering — make_EV • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract eigenfunctions of a connectivity matrix for spatial filtering — make_EV • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,137 +61,80 @@

    Extract eigenfunctions of a connectivity matrix for spatial filtering

    Extract eigenfunctions of a connectivity matrix for spatial filtering

    -
    make_EV(C, nsa = FALSE, threshold = 0.2, values = FALSE)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    C

    A binary spatial weights matrix. See shape2mat.

    nsa

    Logical. Default of nsa = FALSE excludes eigenvectors capturing negative spatial autocorrelation. -Setting nsa = TRUE will result in a candidate set of EVs that contains eigenvectors representing positive and negative SA.

    threshold

    Defaults to threshold=0.2 to exclude eigenvectors representing spatial autocorrelation levels that are less than threshold times the maximum possible Moran coefficient achievable for the given spatial connectivity matrix. If theshold = 0, all eigenvectors will be returned (however, the eigenvector of constants (with eigenvalue of zero) will be dropped automatically).

    values

    Should eigenvalues be returned also? Defaults to FALSE.

    - -

    Source

    +
    +
    make_EV(C, nsa = FALSE, threshold = 0.2, values = FALSE)
    +
    +
    +

    Source

    Daniel Griffith and Yongwan Chun. 2014. "Spatial Autocorrelation and Spatial Filtering." in M. M. Fischer and P. Nijkamp (eds.), Handbook of Regional Science. Springer.

    -

    Value

    - +
    +
    +

    Arguments

    +
    C
    +

    A binary spatial weights matrix. See shape2mat.

    +
    nsa
    +

    Logical. Default of nsa = FALSE excludes eigenvectors capturing negative spatial autocorrelation. +Setting nsa = TRUE will result in a candidate set of EVs that contains eigenvectors representing positive and negative SA.

    +
    threshold
    +

    Defaults to threshold=0.2 to exclude eigenvectors representing spatial autocorrelation levels that are less than threshold times the maximum possible Moran coefficient achievable for the given spatial connectivity matrix. If theshold = 0, all eigenvectors will be returned (however, the eigenvector of constants (with eigenvalue of zero) will be dropped automatically).

    +
    values
    +

    Should eigenvalues be returned also? Defaults to FALSE.

    +
    +
    +

    Value

    A data.frame of eigenvectors for spatial filtering. If values=TRUE then a named list is returned with elements eigenvectors and eigenvalues.

    -

    Details

    - +
    +
    +

    Details

    Returns a set of eigenvectors related to the Moran coefficient (MC), limited to those eigenvectors with |MC| > threshold if nsa = TRUE or MC > threshold if nsa = FALSE, optionally with corresponding eigenvalues.

    -

    See also

    - - +
    +
    +

    See also

    + +
    -

    Examples

    -
    
    -library(ggplot2)
    -library(sf)
    -data(georgia)
    -C <- shape2mat(georgia, style = "B")
    -EV <- make_EV(C)
    -head(EV)
    -#>            EV1          EV2         EV3         EV4         EV5         EV6
    -#> 1 -0.053973000  0.158354827 -0.02451495 -0.07999360 -0.05495670 -0.03895408
    -#> 2 -0.120997679 -0.124986886 -0.05059745  0.07318481  0.02784188  0.11857531
    -#> 3  0.157713405 -0.099951835  0.01356943 -0.14334711  0.05341177 -0.03711518
    -#> 4  0.120719899 -0.023687383  0.01020668  0.06400989  0.08722122 -0.02438761
    -#> 5 -0.009716376 -0.031433574  0.01549834 -0.02919731 -0.02628975  0.04268508
    -#> 6  0.104755936 -0.009807962 -0.07396067  0.13411989 -0.08817770 -0.03317589
    -#>           EV7         EV8          EV9        EV10         EV11        EV12
    -#> 1  0.04078584 -0.07913979 -0.073310182  0.05824286  0.165668091 -0.02214353
    -#> 2  0.07455461 -0.06418893  0.018371182 -0.04740389  0.114467522 -0.02966357
    -#> 3  0.14528770 -0.07684324 -0.001716571  0.02772449  0.035922602 -0.03243465
    -#> 4 -0.01379424 -0.15491366 -0.037879577  0.07212674  0.026300454 -0.06266601
    -#> 5 -0.05693693  0.04599274 -0.085951636 -0.01862466 -0.008244736  0.01315821
    -#> 6 -0.07211577 -0.01599899 -0.055539246  0.10582664  0.052396607  0.07686041
    -#>          EV13        EV14         EV15         EV16          EV17        EV18
    -#> 1  0.04195817  0.18103782 -0.084938278  0.003033966  0.0530510220  0.05870778
    -#> 2 -0.04068051 -0.03111605 -0.024452141  0.010425051 -0.0583459853  0.04185440
    -#> 3 -0.01524579 -0.10073778 -0.003287751 -0.110222305  0.0006761951 -0.10313888
    -#> 4  0.10693434  0.02403098  0.120988436 -0.077063185  0.0481654363 -0.02438014
    -#> 5  0.02205978  0.08043488  0.138648306 -0.034792946 -0.0102036565  0.03900379
    -#> 6  0.05031355 -0.10474824 -0.039351149  0.009688502 -0.1202163789  0.10220094
    -#>          EV19        EV20        EV21        EV22        EV23         EV24
    -#> 1 -0.09304319  0.08582136 -0.08203149  0.01545984 -0.04469590 -0.090910352
    -#> 2  0.05246949  0.04548325 -0.06193518 -0.02841151 -0.04594155 -0.006598782
    -#> 3 -0.10237014  0.07811491  0.06047413  0.01204504 -0.16456457  0.068679862
    -#> 4  0.06801384 -0.06532443  0.02101885 -0.13861840  0.11761550  0.041276236
    -#> 5  0.01777727 -0.04806478  0.13050490  0.08672938 -0.03545406  0.068555451
    -#> 6 -0.01371599  0.01246201  0.07763896  0.03758763  0.03783776 -0.104185845
    -#>           EV25        EV26        EV27        EV28         EV29        EV30
    -#> 1 -0.088405271  0.05030406 -0.01005692 -0.06573835 -0.024941839 -0.06591464
    -#> 2  0.023410439  0.08774088  0.01783850 -0.15346774  0.079661211 -0.08033607
    -#> 3 -0.058465515 -0.08765154 -0.00465578  0.08994140  0.038271976 -0.09031933
    -#> 4  0.002939444  0.01579144  0.11970054 -0.10072935  0.012892342  0.05374701
    -#> 5 -0.157902347  0.09327591 -0.08298279  0.05408252 -0.062174041 -0.04069685
    -#> 6  0.005710602 -0.04680732  0.01440013 -0.07482543 -0.006485797 -0.02026172
    -#>          EV31        EV32         EV33         EV34        EV35          EV36
    -#> 1  0.06598855  0.13310230 -0.001088752 -0.136952459  0.04277640  0.0667013728
    -#> 2  0.09038923  0.04758564 -0.203353242  0.046886471 -0.06338316  0.0084997995
    -#> 3 -0.02407610  0.05049433 -0.014104107 -0.042885282  0.06319263  0.0246407754
    -#> 4  0.11287042  0.01197097  0.114566809  0.005226204  0.03427603  0.0775606603
    -#> 5 -0.12699316  0.01330529 -0.023794841 -0.102840746 -0.05690616  0.1238410428
    -#> 6 -0.02987645 -0.06439507  0.035938065 -0.143761276 -0.10594230 -0.0004267883
    -#>          EV37        EV38         EV39         EV40        EV41        EV42
    -#> 1  0.13619033  0.02016976 -0.090284979  0.004669381 -0.02585823 -0.13204350
    -#> 2 -0.20680540 -0.14235190  0.087586221 -0.036857021  0.06712104  0.05734786
    -#> 3 -0.03353478 -0.07598022 -0.001852868 -0.051778871 -0.03636932  0.08016340
    -#> 4 -0.03888049 -0.10850519 -0.060287225  0.038069185  0.01381853 -0.18917293
    -#> 5 -0.20184257  0.01621507 -0.102312521  0.050768030  0.02702793  0.02987741
    -#> 6  0.05951001 -0.03161532  0.111008436  0.152597569 -0.04076408  0.10672196
    -
    -if (FALSE) {
    -ggplot(georgia) +
    -  geom_sf(aes(fill = EV[,1])) +
    -  scale_fill_gradient2()
    -
    -fit <- stan_esf(log(rate.male) ~ 1, data = georgia, EV = EV, C = C)
    -sp_diag(fit, georgia)
    -}
    +    
    +

    Examples

    +
    
    +library(ggplot2)
    +library(sf)
    +data(georgia)
    +C <- shape2mat(georgia, style = "B")
    +EV <- make_EV(C)
    +head(EV)
    +
    +if (FALSE) {
    +ggplot(georgia) +
    +  geom_sf(aes(fill = EV[,1])) +
    +  scale_fill_gradient2()
    +
    +fit <- stan_esf(log(rate.male) ~ 1, data = georgia, EV = EV, C = C)
    +sp_diag(fit, georgia)
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/mc.html b/docs/reference/mc.html index a548a9dc..ad12ebaf 100644 --- a/docs/reference/mc.html +++ b/docs/reference/mc.html @@ -1,93 +1,18 @@ - - - - - - - -The Moran coefficient — mc • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -The Moran coefficient — mc • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,80 +61,71 @@

    The Moran coefficient

    The Moran coefficient, a measure of spatial autocorrelation (also known as Global Moran's I)

    -
    mc(x, w, digits = 3, warn = TRUE)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    Numeric vector of input values, length n.

    w

    An n x n spatial connectivity matrix. See shape2mat.

    digits

    Number of digits to round results to.

    warn

    If `FALSE`, no warning will be printed to inform you when observations with zero neighbors have been dropped.

    - -

    Source

    +
    +
    mc(x, w, digits = 3, warn = TRUE)
    +
    +
    +

    Source

    Chun, Yongwan, and Daniel A. Griffith. Spatial statistics and geostatistics: theory and applications for geographic information science and technology. Sage, 2013.

    Cliff, Andrew David, and J. Keith Ord. Spatial processes: models & applications. Taylor & Francis, 1981.

    -

    Value

    - +
    +
    +

    Arguments

    +
    x
    +

    Numeric vector of input values, length n.

    +
    w
    +

    An n x n spatial connectivity matrix. See shape2mat.

    +
    digits
    +

    Number of digits to round results to.

    +
    warn
    +

    If FALSE, no warning will be printed to inform you when observations with zero neighbors have been dropped.

    +
    +
    +

    Value

    The Moran coefficient, a numeric value.

    -

    Details

    - +
    +
    +

    Details

    If any observations with no neighbors are found (i.e. any(Matrix::rowSums(w) == 0)) they will be dropped automatically and a message will print stating how many were dropped.

    -

    See also

    - - - -

    Examples

    -
    
    -library(sf)
    -data(georgia)
    -w <- shape2mat(georgia, style = "W")
    -x <- georgia$ICE
    -mc(x, w)
    -#> [1] 0.519
    -
    +    
    +
    +

    See also

    + +
    + +
    +

    Examples

    +
    
    +library(sf)
    +data(georgia)
    +w <- shape2mat(georgia, style = "W")
    +x <- georgia$ICE
    +mc(x, w)
    +
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/me_diag-1.png b/docs/reference/me_diag-1.png deleted file mode 100644 index 088dc97b..00000000 Binary files a/docs/reference/me_diag-1.png and /dev/null differ diff --git a/docs/reference/me_diag-2.png b/docs/reference/me_diag-2.png deleted file mode 100644 index ea9ca3de..00000000 Binary files a/docs/reference/me_diag-2.png and /dev/null differ diff --git a/docs/reference/me_diag.html b/docs/reference/me_diag.html index 9b2785a0..071af21b 100644 --- a/docs/reference/me_diag.html +++ b/docs/reference/me_diag.html @@ -1,93 +1,18 @@ - - - - - - - -Data model diagnostics — me_diag • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Data model diagnostics — me_diag • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,130 +61,112 @@

    Data model diagnostics

    Visual diagnostics for spatial measurement error models.

    -
    me_diag(
    -  fit,
    -  varname,
    -  shape,
    -  probs = c(0.025, 0.975),
    -  plot = TRUE,
    -  size = 0.25,
    -  index = 0,
    -  style = c("W", "B"),
    -  w = shape2mat(shape, match.arg(style)),
    -  binwidth = function(x) 0.5 * sd(x)
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    fit

    A geostan_fit model object as returned from a call to one of the geostan::stan_* functions.

    varname

    Name of the modeled variable (a character string, as it appears in the model formula).

    shape

    An object of class sf or another spatial object coercible to sf with sf::st_as_sf.

    probs

    Lower and upper quantiles of the credible interval to plot.

    plot

    If FALSE, return a list of ggplots and a data.frame with the raw data values alongside a posterior summary of the modeled variable.

    size

    Size of points and lines, passed to geom_pointrange.

    index

    Integer value; use this if you wish to identify observations with the largest n=index absolute Delta values; data on the top n=index observations ordered by absolute Delta value will be printed to the console and the plots will be labeled with the indices of the identified observations.

    style

    Style of connectivity matrix; if w is not provided, style is passed to shape2mat and defaults to "W" for row-standardized.

    w

    An optional spatial connectivity matrix; if not provided, one will be created using shape2mat.

    binwidth

    A function with a single argument that will be passed to the binwidth argument in geom_histogram. The default is to set the width of bins to 0.5 * sd(x).

    - -

    Source

    - -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). ``Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure.'' Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    -

    Value

    +
    +
    me_diag(
    +  fit,
    +  varname,
    +  shape,
    +  probs = c(0.025, 0.975),
    +  plot = TRUE,
    +  mc_style = c("scatter", "hist"),
    +  size = 0.25,
    +  index = 0,
    +  style = c("W", "B"),
    +  w = shape2mat(shape, match.arg(style)),
    +  binwidth = function(x) 0.5 * sd(x)
    +)
    +
    +
    +

    Source

    +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). ``Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure.'' Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    +
    +
    +

    Arguments

    +
    fit
    +

    A geostan_fit model object as returned from a call to one of the geostan::stan_* functions.

    +
    varname
    +

    Name of the modeled variable (a character string, as it appears in the model formula).

    +
    shape
    +

    An object of class sf or another spatial object coercible to sf with sf::st_as_sf.

    +
    probs
    +

    Lower and upper quantiles of the credible interval to plot.

    +
    plot
    +

    If FALSE, return a list of ggplots and a data.frame with the raw data values alongside a posterior summary of the modeled variable.

    +
    mc_style
    +

    Character string indicating how to plot the Moran coefficient for the delta values: if mc = "scatter", then moran_plot will be used with the marginal residuals; if mc = "hist", then a histogram of Moran coefficient values will be returned, where each plotted value represents the degree of residual autocorrelation in a draw from the join posterior distribution of delta values.

    +
    size
    +

    Size of points and lines, passed to geom_pointrange.

    +
    index
    +

    Integer value; use this if you wish to identify observations with the largest n=index absolute Delta values; data on the top n=index observations ordered by absolute Delta value will be printed to the console and the plots will be labeled with the indices of the identified observations.

    +
    style
    +

    Style of connectivity matrix; if w is not provided, style is passed to shape2mat and defaults to "W" for row-standardized.

    +
    w
    +

    An optional spatial connectivity matrix; if not provided, one will be created using shape2mat.

    +
    binwidth
    +

    A function with a single argument that will be passed to the binwidth argument in geom_histogram. The default is to set the width of bins to 0.5 * sd(x).

    +
    +
    +

    Value

    A grid of spatial diagnostic plots for measurement error models comparing the raw observations to the posterior distribution of the true values. Includes a point-interval plot, a histogram of Moran coefficient values for the posterior distribution of Delta values (Delta = z - x, where z are the survey estimates and x are the modeled true values), and a map of the posterior mean of the Delta values.

    -

    See also

    +
    +
    +

    See also

    + +
    - +
    +

    Examples

    +
    if (FALSE) {
    +library(sf)
    +data(georgia)
    +## binary adjacency matrix
    +A <- shape2mat(georgia, "B")
    +## prepare data for the CAR model, using WCAR specification
    +cp <- prep_car_data(A, type = "WCAR")
    +## provide list of data for the measurement error model
    +ME <- list(se = data.frame(ICE = georgia$ICE.se),
    +           spatial = TRUE,
    +           car_parts = cp
    +         )
    +## sample from the prior probability model only, including the ME model
    +fit <- stan_glm(log(rate.male) ~ ICE,
    +                ME = ME,
    +                C = C,
    +                data = georgia, 
    +                prior_only = TRUE,
    +                refresh = 0
    +                )
    +## see ME diagnostics
    +me_diag(fit, "ICE", georgia)
    +## see index values for the largest (absolute) delta values
    + ## (differences between raw estimate and the posterior mean)
    +me_diag(fit, "ICE", georgia, index = 3)
    +}
     
    -    

    Examples

    -
    if (FALSE) {
    -library(sf)
    -data(georgia)
    -## binary adjacency matrix
    -A <- shape2mat(georgia, "B")
    -## prepare data for the CAR model, using WCAR specification
    -cp <- prep_car_data(A, type = "WCAR")
    -## provide list of data for the measurement error model
    -ME <- list(se = data.frame(ICE = georgia$ICE.se),
    -           spatial = TRUE,
    -           car_parts = cp
    -         )
    -## sample from the prior probability model only, including the ME model
    -fit <- stan_glm(log(rate.male) ~ ICE,
    -                ME = ME,
    -                C = C,
    -                data = georgia, 
    -                prior_only = TRUE,
    -                refresh = 0
    -                )
    -## see ME diagnostics
    -me_diag(fit, "ICE", georgia)
    -## see index values for the largest (absolute) delta values
    - ## (differences between raw estimate and the posterior mean)
    -me_diag(fit, "ICE", georgia, index = 3)
    -}
    -
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/moran_plot-1.png b/docs/reference/moran_plot-1.png deleted file mode 100644 index 4a5710a9..00000000 Binary files a/docs/reference/moran_plot-1.png and /dev/null differ diff --git a/docs/reference/moran_plot.html b/docs/reference/moran_plot.html index 64e49488..f26e0859 100644 --- a/docs/reference/moran_plot.html +++ b/docs/reference/moran_plot.html @@ -1,93 +1,18 @@ - - - - - - - -Moran plot — moran_plot • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Moran plot — moran_plot • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,108 +61,90 @@

    Moran plot

    Plots a set of values against their spatially lagged values and gives the Moran coefficient as a measure of spatial autocorrelation.

    -
    moran_plot(
    -  x,
    -  w,
    -  xlab = "x (centered)",
    -  ylab = "Spatial Lag",
    -  pch = 20,
    -  col = "darkred",
    -  size = 2,
    -  alpha = 1,
    -  lwd = 0.5
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A numeric vector of length n.

    w

    An n x n spatial connectivity matrix.

    xlab

    Label for the x-axis.

    ylab

    Label for the y-axis.

    pch

    Symbol type.

    col

    Symbol color.

    size

    Symbol size.

    alpha

    Symbol transparency.

    lwd

    Width of the regression line.

    - -

    Source

    +
    +
    moran_plot(
    +  x,
    +  w,
    +  xlab = "x (centered)",
    +  ylab = "Spatial Lag",
    +  pch = 20,
    +  col = "darkred",
    +  size = 2,
    +  alpha = 1,
    +  lwd = 0.5
    +)
    +
    +
    +

    Source

    Anselin, Luc. "Local indicators of spatial association—LISA." Geographical analysis 27, no. 2 (1995): 93-115.

    -

    Value

    - +
    +
    +

    Arguments

    +
    x
    +

    A numeric vector of length n.

    +
    w
    +

    An n x n spatial connectivity matrix.

    +
    xlab
    +

    Label for the x-axis.

    +
    ylab
    +

    Label for the y-axis.

    +
    pch
    +

    Symbol type.

    +
    col
    +

    Symbol color.

    +
    size
    +

    Symbol size.

    +
    alpha
    +

    Symbol transparency.

    +
    lwd
    +

    Width of the regression line.

    +
    +
    +

    Value

    Returns a gg plot, a scatter plot with x on the horizontal and its spatially lagged values on the vertical axis (i.e. a Moran scatter plot).

    -

    Details

    - -

    For details on the symbol parameters see the documentation for geom_point.

    +
    +
    +

    Details

    +

    For details on the symbol parameters see the documentation for geom_point.

    If any observations with no neighbors are found (i.e. any(Matrix::rowSums(w) == 0)) they will be dropped automatically and a message will print stating how many were dropped.

    -

    See also

    - - +
    +
    +

    See also

    + +
    -

    Examples

    -
    if (FALSE) {
    -data(georgia)
    -x <- georgia$ICE
    -w <- shape2mat(georgia, "W")
    -moran_plot(x, w)
    -}
    +    
    +

    Examples

    +
    if (FALSE) {
    +data(georgia)
    +x <- georgia$ICE
    +w <- shape2mat(georgia, "W")
    +moran_plot(x, w)
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/n_eff-1.png b/docs/reference/n_eff-1.png deleted file mode 100644 index 51c5e44c..00000000 Binary files a/docs/reference/n_eff-1.png and /dev/null differ diff --git a/docs/reference/n_eff.html b/docs/reference/n_eff.html index 13cdf218..5ffe36ac 100644 --- a/docs/reference/n_eff.html +++ b/docs/reference/n_eff.html @@ -1,93 +1,18 @@ - - - - - - - -Effective sample size — n_eff • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Effective sample size — n_eff • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,77 +61,69 @@

    Effective sample size

    An approximate calculation for the effective sample size for spatially autocorrelated data. Only valid for approximately normally distributed data.

    -
    n_eff(n, rho)
    - -

    Arguments

    - - - - - - - - - - -
    n

    Number of observations.

    rho

    Spatial autocorrelation parameter from a simultaneous autoregressive model.

    - -

    Source

    +
    +
    n_eff(n, rho)
    +
    +
    +

    Source

    Griffith, Daniel A. (2005). Effective geographic sample size in the presence of spatial autocorrelation. Annals of the Association of American Geographers. Vol. 95(4): 740-760.

    -

    Value

    - +
    +
    +

    Arguments

    +
    n
    +

    Number of observations.

    +
    rho
    +

    Spatial autocorrelation parameter from a simultaneous autoregressive model.

    +
    +
    +

    Value

    Returns effective sample size n*, a numeric value.

    -

    Details

    - +
    +
    +

    Details

    Implements Equation 3 from Griffith (2005).

    -

    See also

    - - - -

    Examples

    -
    
    -n_eff(100, 0)
    -#> [1] 100
    -n_eff(100, 0.5)
    -#> [1] 30.25516
    -n_eff(100, 0.9)
    -#> [1] 4.796839
    -n_eff(100, 1)
    -#> [1] 1.003387
    -
    -if (FALSE) {
    -rho <- seq(0, 1, by = 0.01)
    -plot(rho, n_eff(100, rho), type = 'l')
    -}
    +    
    +
    +

    See also

    + +
    + +
    +

    Examples

    +
    
    +n_eff(100, 0)
    +n_eff(100, 0.5)
    +n_eff(100, 0.9)
    +n_eff(100, 1)
    +
    +if (FALSE) {
    +rho <- seq(0, 1, by = 0.01)
    +plot(rho, n_eff(100, rho), type = 'l')
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/ohio.html b/docs/reference/ohio.html deleted file mode 100644 index d23dc34a..00000000 --- a/docs/reference/ohio.html +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - -Ohio Presidential election results and county characteristics — ohio • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    A simple features spatial dataset containing county attributes and Presidential election data for Ohio. - This is a processed version of the MIT "County Presidential Election Returns 2000-2016" data set augmented with data from the American Community Survey and Bureau of Labor Statistics.

    -
    - -
    ohio
    - - -

    Format

    - -

    A simple feature collection including the following attributes:

    -
    GEOID

    Six digit combined state and county FIPS code

    -
    county

    County name

    -
    gop_growth

    Change in the Republican vote share from historic (2000 - 2012) average vote share to 2016 vote share (i.e. (trump_2016/total_2016) - historic_gop). Data only includes Democratic and Republican votes (i.e. two-party vote share).

    -
    historic_gop

    Average Republican share of all major party votes, 2000 to 2012

    -
    trump_2016

    Number of votes for Donald Trump in 2016

    -
    total_2016

    Total number of Democratic and Republican votes in 2016

    -
    population

    ACS 2016 5 year population estimate

    -
    pop_density

    Population per square mile (population / ALAND)

    -
    college_educated

    ACS 2016 5 year estimate of the percent of the population 25 and older with a bachelors degree or higher

    -
    college_educated.se

    Standard error for college_educated

    -
    white_nonhispanic

    ACS 2016 5 year estimate of the percent of non-hispanic whites in the population

    -
    white_nonhispanic.se

    Standard error for white_nonhispanic

    -
    unemployment

    Local area unemployment estimate from the Bureau of Labor Statistics, 2016

    -
    unemployment.acs

    Civilian unemployment rate estimate from the American Community Survey, 2016

    -
    unemployment.acs.se

    Standard error for unemployment.acs

    -
    geometry

    County boundaries (polygons) in simple features format

    - -
    - -

    Source

    - -

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian Estimation of Spatial Filters with Moran’s Eigenvectors and Hierarchical Shrinkage Priors. Spatial Statistics. https://doi.org/10.1016/j.spasta.2020.100450

    -

    MIT Election Data and Science Lab, 2018, "County Presidential Election Returns 2000-2016", https://doi.org/10.7910/DVN/VOQCHQ, Harvard Dataverse, V1, UNF:6:ZaxsDvp6RsFitno8ZYlK7w== [fileUNF]

    -

    US Bureau of Labor Statistics, Local Force By County, 2016 Annual Averages. https://www.bls.gov/lau/#cntyaa

    -

    US Census Bureau, 2016. American Community Survey. Tables DP02, DP03, DP05 5-year estimates.

    -

    Kyle Walker, 2018. Tidycensus. https://walkerke.github.io/tidycensus/index.html

    -

    Details

    - -

    The Bureau of Labor Statistics unemployment estimates are based on the ACS estimates as well as additional information. The BLS estimates were used for the analysis in Donegan et al. (2020). The ACS estimates have standard errors (the BLS estimates do not).

    - -

    Examples

    -
    if (FALSE) { -library(sf) -data(ohio) -} -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/posterior_predict-1.png b/docs/reference/posterior_predict-1.png deleted file mode 100644 index 5678cac1..00000000 Binary files a/docs/reference/posterior_predict-1.png and /dev/null differ diff --git a/docs/reference/posterior_predict-2.png b/docs/reference/posterior_predict-2.png deleted file mode 100644 index c337447e..00000000 Binary files a/docs/reference/posterior_predict-2.png and /dev/null differ diff --git a/docs/reference/posterior_predict-3.png b/docs/reference/posterior_predict-3.png deleted file mode 100644 index 94af624e..00000000 Binary files a/docs/reference/posterior_predict-3.png and /dev/null differ diff --git a/docs/reference/posterior_predict.html b/docs/reference/posterior_predict.html index cddbdd98..16c78793 100644 --- a/docs/reference/posterior_predict.html +++ b/docs/reference/posterior_predict.html @@ -1,93 +1,18 @@ - - - - - - - -Draw samples from the posterior predictive distribution — posterior_predict • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Draw samples from the posterior predictive distribution — posterior_predict • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,87 +61,72 @@

    Draw samples from the posterior predictive distribution

    Draw samples from the posterior predictive distribution of a fitted geostan model.

    -
    posterior_predict(object, S, summary = FALSE, width = 0.95, car_parts, seed)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - -
    object

    A geostan_fit object.

    S

    Optional; number of samples to take from the posterior distribution. The default, and maximum, is the total number of samples stored in the model.

    summary

    Should the predictive distribution be summarized by its means and central quantile intervals? If summary = FALSE, an `S` x `N` matrix of samples will be returned. If summary = TRUE, then a `data.frame` with the means and `100*width` credible intervals is returned.

    width

    Only used if summary = TRUE, to set the quantiles for the credible intervals. Defaults to `width = 0.95`.

    car_parts

    Data for CAR model specification; only required for stan_car with `family = auto_gaussian()`.

    seed

    A single integer value to be used in a call to set.seed before taking samples from the posterior distribution.

    - -

    Value

    - -

    A matrix of size S x N containing samples from the posterior predictive distribution, where S is the number of samples drawn and N is the number of observations. If `summary = TRUE`, a `data.frame` with N rows and 3 columns is returned (with column names `mu`, `lwr`, and `upr`).

    - -

    Examples

    -
    if (FALSE) {
    - fit <- stan_glm(sents ~ offset(log(expected_sents)),
    -                 data = sentencing,
    -                 family = poisson(),
    -                 chains = 1)
    - yrep <- posterior_predict(fit, S = 100)
    - bayesplot::ppc_dens_overlay(y = sentencing$sents, yrep = yrep)
    -
    - fit2 <- stan_glm(sents ~ offset(log(expected_sents)),
    -                  re = ~ name,
    -                  data = sentencing,
    -                  family = poisson(),
    -                  chains = 1)
    - yrep <- posterior_predict(fit2, S = 100)
    - bayesplot::ppc_dens_overlay(y = sentencing$sents, yrep = yrep)
    - sp_diag(fit2, sentencing)
    - }
    +    
    +
    posterior_predict(object, S, summary = FALSE, width = 0.95, car_parts, seed)
    +
    + +
    +

    Arguments

    +
    object
    +

    A geostan_fit object.

    +
    S
    +

    Optional; number of samples to take from the posterior distribution. The default, and maximum, is the total number of samples stored in the model.

    +
    summary
    +

    Should the predictive distribution be summarized by its means and central quantile intervals? If summary = FALSE, an S x N matrix of samples will be returned. If summary = TRUE, then a data.frame with the means and 100*width credible intervals is returned.

    +
    width
    +

    Only used if summary = TRUE, to set the quantiles for the credible intervals. Defaults to width = 0.95.

    +
    car_parts
    +

    Data for CAR model specification; only required for stan_car with family = auto_gaussian().

    +
    seed
    +

    A single integer value to be used in a call to set.seed before taking samples from the posterior distribution.

    +
    +
    +

    Value

    +

    A matrix of size S x N containing samples from the posterior predictive distribution, where S is the number of samples drawn and N is the number of observations. If summary = TRUE, a data.frame with N rows and 3 columns is returned (with column names mu, lwr, and upr).

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    + fit <- stan_glm(sents ~ offset(log(expected_sents)),
    +                 data = sentencing,
    +                 family = poisson(),
    +                 chains = 1)
    + yrep <- posterior_predict(fit, S = 100)
    + bayesplot::ppc_dens_overlay(y = sentencing$sents, yrep = yrep)
    +
    + fit2 <- stan_glm(sents ~ offset(log(expected_sents)),
    +                  re = ~ name,
    +                  data = sentencing,
    +                  family = poisson(),
    +                  chains = 1)
    + yrep <- posterior_predict(fit2, S = 100)
    + bayesplot::ppc_dens_overlay(y = sentencing$sents, yrep = yrep)
    + sp_diag(fit2, sentencing)
    + }
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/prep_car_data-1.png b/docs/reference/prep_car_data-1.png deleted file mode 100644 index 974698b9..00000000 Binary files a/docs/reference/prep_car_data-1.png and /dev/null differ diff --git a/docs/reference/prep_car_data.html b/docs/reference/prep_car_data.html index 4271c398..069fdb08 100644 --- a/docs/reference/prep_car_data.html +++ b/docs/reference/prep_car_data.html @@ -1,93 +1,18 @@ - - - - - - - -Prepare data for a Stan CAR model — prep_car_data • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Prepare data for a Stan CAR model — prep_car_data • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,124 +61,109 @@

    Prepare data for a Stan CAR model

    Prepare data for a Stan CAR model

    -
    prep_car_data(
    -  A,
    -  style = c("WCAR", "ACAR", "DCAR"),
    -  k = 1,
    -  gamma = 0,
    -  lambda = TRUE,
    -  cmat = TRUE,
    -  stan_fn = ifelse(style == "WCAR", "wcar_normal_lpdf", "car_normal_lpdf")
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    A

    Binary adjacency matrix; for style = DCAR, provide a symmetric matrix of distances instead. The distance matrix should be sparse, meaning that most distances should be zero (usually obtained by setting some threshold distance beyond which all are zero).

    style

    Specification for the connectivity matrix (C) and conditional variances (M); one of "WCAR", "ACAR", or "DCAR".

    k

    For style = DCAR, distances will be raised to the -k power (d^-k).

    gamma

    For style = DCAR, distances will be offset by gamma before raising to the -kth power.

    lambda

    If TRUE, return eigenvalues required for calculating the log determinant of the precision matrix and for determining the range of permissible values of rho. These will also be printed with a message if lambda = TRUE.

    cmat

    If cmat = TRUE, return the full matrix C (in sparse matrix format).

    stan_fn

    Two computational methods are available for CAR models using stan_car: car\_normal\_lpdf and wcar\_normal\_lpdf. For WCAR models, either method will work but wcar\_normal\_lpdf is faster. To force use car\_normal\_lpdf when style = 'WCAR', provide stan_fn = "car_normal_lpdf".

    - -

    Source

    +
    +
    prep_car_data(
    +  A,
    +  style = c("WCAR", "ACAR", "DCAR"),
    +  k = 1,
    +  gamma = 0,
    +  lambda = TRUE,
    +  cmat = TRUE,
    +  stan_fn = ifelse(style == "WCAR", "wcar_normal_lpdf", "car_normal_lpdf")
    +)
    +
    +
    +

    Source

    Cliff A, Ord J (1981). Spatial Processes: Models and Applications. Pion.

    Cressie N (2015 [1993]). Statistics for Spatial Data. Revised edition. John Wiley & Sons.

    Cressie N, Perrin O, Thomas-Agnan C (2005). “Likelihood-based estimation for Gaussian MRFs.” Statistical Methodology, 2(1), 1–16.

    Cressie N, Wikle CK (2011). Statistics for Spatio-Temporal Data. John Wiley & Sons.

    Haining RP, Li G (2020). Modelling Spatial and Spatio-Temporal Data: A Bayesian Approach. CRC Press.

    -

    Value

    - -

    A list containing all of the data elements required by the CAR model in stan_car.

    -

    Details

    - -

    The CAR model is:

      Normal(Mu, Sigma), Sigma = (I - rho * C)^-1 * M * tau^2,
    -
    +
    +
    +

    Arguments

    +
    A
    +

    Binary adjacency matrix; for style = DCAR, provide a symmetric matrix of distances instead. The distance matrix should be sparse, meaning that most distances should be zero (usually obtained by setting some threshold distance beyond which all are zero).

    +
    style
    +

    Specification for the connectivity matrix (C) and conditional variances (M); one of "WCAR", "ACAR", or "DCAR".

    +
    k
    +

    For style = DCAR, distances will be raised to the -k power (d^-k).

    +
    gamma
    +

    For style = DCAR, distances will be offset by gamma before raising to the -kth power.

    +
    lambda
    +

    If TRUE, return eigenvalues required for calculating the log determinant of the precision matrix and for determining the range of permissible values of rho. These will also be printed with a message if lambda = TRUE.

    +
    cmat
    +

    If cmat = TRUE, return the full matrix C (in sparse matrix format).

    +
    stan_fn
    +

    Two computational methods are available for CAR models using stan_car: car\_normal\_lpdf and wcar\_normal\_lpdf. For WCAR models, either method will work but wcar\_normal\_lpdf is faster. To force use car\_normal\_lpdf when style = 'WCAR', provide stan_fn = "car_normal_lpdf".

    +
    +
    +

    Value

    +

    A list containing all of the data elements required by the CAR model in stan_car.

    +
    +
    +

    Details

    +

    The CAR model is:

      Normal(Mu, Sigma), Sigma = (I - rho * C)^-1 * M * tau^2,
    +

    where I is the identity matrix, rho is a spatial autocorrelation parameter, C is a connectivity matrix, and M * tau^2 is a diagonal matrix with conditional variances on the diagonal. tau^2 is a (scalar) scale parameter.

    In the WCAR specification, C is the row-standardized version of A. This means that the non-zero elements of A will be converted to 1/N_i where N_i is the number of neighbors for the ith site (obtained using Matrix::rowSums(A). The conditional variances (on the diagonal of M * tau^2), are also proportional to 1/N_i.

    The ACAR specification is from Cressie, Perrin and Thomas-Agnon (2005); also see Cressie and Wikle (2011, p. 188).

    The DCAR specification is inverse distance-based, and requires the user provide a (sparse) distance matrix instead of a binary adjacency matrix. (For A, provide a symmetric matrix of distances, not inverse distances!) Internally, non-zero elements of A will be converted to: d_{ij} = (a_{ij} + gamma)^(-k) (Cliff and Ord 1981, p. 144). Default values are k=1 and gamma=0. Following Cressie (2015), these values will be standardized by the maximum d_{ij} value. The conditional variances will be proportional to the inverse of the row sums of the transformed distance matrix: M_{ii} = (sum_i^N d_{ij})^(-1).

    For inverse-distance weighting schemes, see Cliff and Ord (1981); for distance-based CAR specifications, see Cressie (2015 [1993]) and Haining and Li (2020).

    -

    When using stan_car, always use cmat = TRUE (the default).

    +

    When using stan_car, always use cmat = TRUE (the default).

    +
    -

    Examples

    -
    
    -data(georgia)
    +    
    +

    Examples

    +
    
    +data(georgia)
     
    -## binary adjacency matrix
    -A <- shape2mat(georgia, style = "B")
    +## binary adjacency matrix
    +A <- shape2mat(georgia, style = "B")
     
    -## get list of data for Stan
    -cp <- prep_car_data(A, "WCAR")
    -1 / range(cp$lambda)
    +## get list of data for Stan
    +cp <- prep_car_data(A, "WCAR")
    +1 / range(cp$lambda)
     
    -# \donttest{
    -## pass the data to stan_car
    -fit = stan_car(log(rate.male) ~ 1, data = georgia, car_parts = cp)
    +# \donttest{
    +## pass the data to stan_car
    +fit = stan_car(log(rate.male) ~ 1, data = georgia, car_parts = cp)
     
    -# ACAR specification
    -cp <- prep_car_data(A, "ACAR")
    +# ACAR specification
    +cp <- prep_car_data(A, "ACAR")
     
    -## DCAR specification (inverse-distance based)
    -A <- shape2mat(georgia, "B")
    -D <- sf::st_distance(sf::st_centroid(georgia))
    -A <- D * A
    -cp <- prep_car_data(A, "DCAR", k = 1)
    -# }
    +## DCAR specification (inverse-distance based)
    +A <- shape2mat(georgia, "B")
    +D <- sf::st_distance(sf::st_centroid(georgia))
    +A <- D * A
    +cp <- prep_car_data(A, "DCAR", k = 1)
    +# }
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/prep_icar_data.html b/docs/reference/prep_icar_data.html index 0d2ae0b5..f3743efb 100644 --- a/docs/reference/prep_icar_data.html +++ b/docs/reference/prep_icar_data.html @@ -1,93 +1,18 @@ - - - - - - - -Prepare data for ICAR models — prep_icar_data • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Prepare data for ICAR models — prep_icar_data • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    -

    Given a symmetric n x n connectivity matrix, prepare data for intrinsic conditional autoregressive models in Stan. This function may be used for building custom ICAR models in Stan. This is used internally by stan_icar.

    +

    Given a symmetric n x n connectivity matrix, prepare data for intrinsic conditional autoregressive models in Stan. This function may be used for building custom ICAR models in Stan. This is used internally by stan_icar.

    -
    prep_icar_data(C, scale_factor = NULL)
    - -

    Arguments

    - - - - - - - - - - -
    C

    Connectivity matrix

    scale_factor

    Optional vector of scale factors for each connected portion of the graph structure. If not provided by the user it will be fixed to a vector of ones.

    - -

    Source

    +
    +
    prep_icar_data(C, scale_factor = NULL)
    +
    +
    +

    Source

    Besag, Julian, Jeremy York, and Annie Mollié. 1991. “Bayesian Image Restoration, with Two Applications in Spatial Statistics.” Annals of the Institute of Statistical Mathematics 43 (1): 1–20.

    -

    Donegan, Connor. Flexible Functions for ICAR, BYM, and BYM2 Models in Stan. Code Repository. 2021. Available online: https://github.com/ConnorDonegan/Stan-IAR (accessed Sept. 10, 2021).

    +

    Donegan, Connor. Flexible Functions for ICAR, BYM, and BYM2 Models in Stan. Code Repository. 2021. Available online: https://github.com/ConnorDonegan/Stan-IAR (accessed Sept. 10, 2021).

    Freni-Sterrantino, Anna, Massimo Ventrucci, and Håvard Rue. 2018. “A Note on Intrinsic Conditional Autoregressive Models for Disconnected Graphs.” Spatial and Spatio-Temporal Epidemiology 26: 25–34.

    Morris, Mitzi, Katherine Wheeler-Martin, Dan Simpson, Stephen J Mooney, Andrew Gelman, and Charles DiMaggio. 2019. “Bayesian Hierarchical Spatial Models: Implementing the Besag York Mollié Model in Stan.” Spatial and Spatio-Temporal Epidemiology 31: 100301.

    Riebler, Andrea, Sigrunn H Sørbye, Daniel Simpson, and Håvard Rue. 2016. “An Intuitive Bayesian Spatial Model for Disease Mapping That Accounts for Scaling.” Statistical Methods in Medical Research 25 (4): 1145–65.

    -

    Value

    - -

    list of data to add to Stan data list:

    -
    -
    k

    number of groups

    -
    group_size

    number of nodes per group

    -
    n_edges

    number of connections between nodes (unique pairs only)

    -
    node1

    first node

    -
    node2

    second node. (node1[i] and node2[i] form a connected pair)

    -
    weight

    The element C[node1, node2].

    -
    group_idx

    indices for each observation belonging each group, ordered by group.

    -
    m

    number of disconnected regions requiring their own intercept.

    -
    A

    n-by-m matrix of dummy variables for the component-specific intercepts.

    -
    inv_sqrt_scale_factor

    By default, this will be a k-length vector of ones. Placeholder for user-specified information. If user provided `scale_factor`, then this will be `1/sqrt(scale_factor)`.

    -
    comp_id

    n-length vector indicating the group membership of each observation.

    - -
    - -

    Details

    - -

    This is used internally to prepare data for stan_icar models. It can also be helpful for fitting custom ICAR models outside of geostan.

    -

    See also

    - - - -

    Examples

    -
    
    -data(sentencing)
    -C <- shape2mat(sentencing)
    -icar.data.list <- prep_icar_data(C)
    -
    +    
    +
    +

    Arguments

    +
    C
    +

    Connectivity matrix

    +
    scale_factor
    +

    Optional vector of scale factors for each connected portion of the graph structure. If not provided by the user it will be fixed to a vector of ones.

    +
    +
    +

    Value

    +

    list of data to add to Stan data list:

    k
    +

    number of groups

    + +
    group_size
    +

    number of nodes per group

    + +
    n_edges
    +

    number of connections between nodes (unique pairs only)

    + +
    node1
    +

    first node

    + +
    node2
    +

    second node. (node1i and node2i form a connected pair)

    + +
    weight
    +

    The element C[node1, node2].

    + +
    group_idx
    +

    indices for each observation belonging each group, ordered by group.

    + +
    m
    +

    number of disconnected regions requiring their own intercept.

    + +
    A
    +

    n-by-m matrix of dummy variables for the component-specific intercepts.

    + +
    inv_sqrt_scale_factor
    +

    By default, this will be a k-length vector of ones. Placeholder for user-specified information. If user provided scale_factor, then this will be 1/sqrt(scale_factor).

    + +
    comp_id
    +

    n-length vector indicating the group membership of each observation.

    + + +
    +
    +

    Details

    +

    This is used internally to prepare data for stan_icar models. It can also be helpful for fitting custom ICAR models outside of geostan.

    +
    + + +
    +

    Examples

    +
    
    +data(sentencing)
    +C <- shape2mat(sentencing)
    +icar.data.list <- prep_icar_data(C)
    +
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/prep_me_data.html b/docs/reference/prep_me_data.html index b06112ba..ecba167f 100644 --- a/docs/reference/prep_me_data.html +++ b/docs/reference/prep_me_data.html @@ -1,93 +1,18 @@ - - - - - - - -Prepare data for spatial measurement error models — prep_me_data • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Prepare data for spatial measurement error models — prep_me_data • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,93 +61,87 @@

    Prepare data for spatial measurement error models

    Prepares the list of data required for geostan's (spatial) measurement error models. Given a data frame of standard errors and any optional arguments, the function returns a list with all required data for the models, filling in missing elements with default values.

    -
    prep_me_data(
    -  se,
    -  bounds = c(-Inf, Inf),
    -  car_parts,
    -  prior,
    -  logit = rep(FALSE, times = ncol(se))
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    se

    Data frame of standard errors; column names must match (exactly) the variable names used in the model formula.

    bounds

    An optional numeric vector of length two providing the upper and lower bounds, respectively, of the variables. If not provided, they will be set to c(-Inf, Inf) (i.e., unbounded). Common usages include keeping percentages between zero and one hundred or proportions between zero and one.

    car_parts

    A list of data required for spatial CAR models, as created by prep_car_data; optional. If omitted, the measurement error model will be a non-spatial Student's t model.

    prior

    A named list of prior distributions (see priors). If none are provided, default priors will be assigned. The liste of priors may include the following parameters:

    -
    df

    If using a non-spatial ME model, the degrees of freedom (df) for the Student's t model is assigned a gamma prior with default parameters of gamma(alpha = 3, beta = 0.2). Provide values for each covariate in se, listing the values in the same order as the columns of se.

    - -
    location

    The prior for the location parameter (mu) is a normal (Gaussian) distribution (the default being normal(location = 0, scale = 100)). To adjust the prior distributions, provide values for each covariate in se, listing the values in the same order as the columns of se.

    - -
    scale

    The prior for the scale parameters is Student's t, and the default parameters are student_t(df = 10, location = 0, scale = 40). To adjust, provide values for each covariate in se, listing the values in the same order as the columns of se.

    - -
    car_rho

    The CAR model, if used, has a spatial autocorrelation parameter, rho, which is assigned a uniform prior distribution. You must specify values that are within the permissible range of values for rho; these are automatically printed to the console by the prep_car_data function.

    - - -
    logit

    Optional vector of logical values (TRUE, FALSE) indicating if the variable should be logit-transformed before being modeled. When TRUE, the sampling error will be modeled on the untransformed scale as usual; however, the spatial CAR prior model (or non-spatial Student's t prior model) will be assigned to the logit-transformed variate. Transformation can be crucial for modeling proportions with frequency distributions that are highly skewed.

    - -

    Value

    +
    +
    prep_me_data(
    +  se,
    +  bounds = c(-Inf, Inf),
    +  car_parts,
    +  prior,
    +  logit = rep(FALSE, times = ncol(se))
    +)
    +
    + +
    +

    Arguments

    +
    se
    +

    Data frame of standard errors; column names must match (exactly) the variable names used in the model formula.

    +
    bounds
    +

    An optional numeric vector of length two providing the upper and lower bounds, respectively, of the variables. If not provided, they will be set to c(-Inf, Inf) (i.e., unbounded). Common usages include keeping percentages between zero and one hundred or proportions between zero and one.

    +
    car_parts
    +

    A list of data required for spatial CAR models, as created by prep_car_data; optional. If omitted, the measurement error model will be a non-spatial Student's t model.

    +
    prior
    +

    A named list of prior distributions (see priors). If none are provided, default priors will be assigned. The liste of priors may include the following parameters:

    df
    +

    If using a non-spatial ME model, the degrees of freedom (df) for the Student's t model is assigned a gamma prior with default parameters of gamma(alpha = 3, beta = 0.2). Provide values for each covariate in se, listing the values in the same order as the columns of se.

    + +
    location
    +

    The prior for the location parameter (mu) is a normal (Gaussian) distribution (the default being normal(location = 0, scale = 100)). To adjust the prior distributions, provide values for each covariate in se, listing the values in the same order as the columns of se.

    + + +
    scale
    +

    The prior for the scale parameters is Student's t, and the default parameters are student_t(df = 10, location = 0, scale = 40). To adjust, provide values for each covariate in se, listing the values in the same order as the columns of se.

    + + +
    car_rho
    +

    The CAR model, if used, has a spatial autocorrelation parameter, rho, which is assigned a uniform prior distribution. You must specify values that are within the permissible range of values for rho; these are automatically printed to the console by the prep_car_data function.

    + + + +
    +
    logit
    +

    Optional vector of logical values (TRUE, FALSE) indicating if the variable should be logit-transformed before being modeled. When TRUE, the sampling error will be modeled on the untransformed scale as usual; however, the spatial CAR prior model (or non-spatial Student's t prior model) will be assigned to the logit-transformed variate. Transformation can be crucial for modeling proportions with frequency distributions that are highly skewed.

    +
    +
    +

    Value

    A list of data as required for (spatial) ME models. Missing arguments will be filled in with default values, including prior distributions.

    +
    -

    Examples

    -
    data(georgia)
    +    
    +

    Examples

    +
    data(georgia)
     
    -## for a non-spatial prior model 
    -se <- data.frame(ICE = georgia$ICE.se, college = georgia$college.se)
    -ME <- prep_me_data(se)
    +## for a non-spatial prior model 
    +se <- data.frame(ICE = georgia$ICE.se, college = georgia$college.se)
    +ME <- prep_me_data(se)
     
    -## for a spatial prior model (generally recommended)
    -A <- shape2mat(georgia, "B")
    -cars <- prep_car_data(A)
    -ME <- prep_me_data(se, car_parts = cars)
    +## for a spatial prior model (generally recommended)
    +A <- shape2mat(georgia, "B")
    +cars <- prep_car_data(A)
    +ME <- prep_me_data(se, car_parts = cars)
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/prep_sp_me_data.html b/docs/reference/prep_sp_me_data.html deleted file mode 100644 index 5d7d1d6c..00000000 --- a/docs/reference/prep_sp_me_data.html +++ /dev/null @@ -1,199 +0,0 @@ - - - - - - - - -prep_sp_me_data — prep_sp_me_data • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    prep_sp_me_data

    -
    - -
    prep_sp_me_data(C, spatial_me)
    - -

    Arguments

    - - - - - - - - - - -
    C

    Connectivity matrix

    spatial_me

    If FALSE, will return placeholder values only

    - -

    Value

    - -

    A list containing elements has_car = TRUE, number of non-zero elements of C, and numbers of neighbors per observation D_diag, and binary connectivity matrix C_me

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/priors-1.png b/docs/reference/priors-1.png deleted file mode 100644 index 6757382c..00000000 Binary files a/docs/reference/priors-1.png and /dev/null differ diff --git a/docs/reference/priors.html b/docs/reference/priors.html index 03215293..1f36ad02 100644 --- a/docs/reference/priors.html +++ b/docs/reference/priors.html @@ -1,93 +1,18 @@ - - - - - - - -Prior distributions — priors • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Prior distributions — priors • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,164 +61,142 @@

    Prior distributions

    Prior distributions

    -
    uniform(lower, upper, variable = NULL)
    -
    -normal(location = 0, scale, variable = NULL)
    -
    -student_t(df = 10, location = 0, scale, variable = NULL)
    -
    -gamma(alpha, beta, variable = NULL)
    -
    -hs(global_scale = 1, slab_df = 10, slab_scale, variable = "beta_ev")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    lower, upper

    lower and upper bounds of the distribution

    variable

    A reserved slot for the variable name; if provided by the user, this may be ignored by geostan.

    location

    Location parameter(s), numeric value(s)

    scale

    Scale parameter(s), positive numeric value(s)

    df

    Degrees of freedom, positive numeric value(s)

    alpha

    shape parameter, positive numeric value(s)

    beta

    inverse scale parameter, positive numeric value(s)

    global_scale

    Control the (prior) degree of sparsity in the horseshoe model (0 < global_scale < 1).

    slab_df

    Degrees of freedom for the Student's t model for large coefficients in the horseshoe model (slab_df > 0).

    slab_scale

    Scale parameter for the Student's t model for large coefficients in the horseshoe model (slab_scale > 0).

    - -

    Source

    - -

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. Spatial Statistics. doi: 10.1016/j.spasta.2020.100450 - (open access: doi: 10.31219/osf.io/fah3z -).

    -

    Polson, N.G. and J.G. Scott (2010). Shrink globally, act locally: Sparse Bayesian regularization and prediction. Bayesian Statistics 9, 501-538.

    -

    Piironen, J and A. Vehtari (2017). Sparsity information and regularization in the horseshoe and other shrinkage priors. In Electronic Journal of Statistics, 11(2):5018-5051. doi: 10.1214/17-EJS1337SI -.

    -

    Value

    +
    +
    uniform(lower, upper, variable = NULL)
     
    -    

    An object of class prior which will be used internally by geostan to set parameters of prior distributions.

    -

    Student's t

    +normal(location = 0, scale, variable = NULL) +student_t(df = 10, location = 0, scale, variable = NULL) -

    Return value for student_t depends on the input; if no arguments are provided (specifically, if the scale parameter is missing), this will return an object of class 'family'; if at least the scale parameter is provided, student_t will return an object of class prior containing parameter values for the Student's t distribution.

    +gamma(alpha, beta, variable = NULL) + +hs(global_scale = 1, slab_df = 10, slab_scale, variable = "beta_ev")
    +
    + +
    +

    Source

    +

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. Spatial Statistics. doi: 10.1016/j.spasta.2020.100450 + (open access: doi: 10.31219/osf.io/fah3z +).

    +

    Polson, N.G. and J.G. Scott (2010). Shrink globally, act locally: Sparse Bayesian regularization and prediction. Bayesian Statistics 9, 501-538.

    +

    Piironen, J and A. Vehtari (2017). Sparsity information and regularization in the horseshoe and other shrinkage priors. In Electronic Journal of Statistics, 11(2):5018-5051.

    +
    +
    +

    Arguments

    +
    lower, upper
    +

    lower and upper bounds of the distribution

    +
    variable
    +

    A reserved slot for the variable name; if provided by the user, this may be ignored by geostan.

    +
    location
    +

    Location parameter(s), numeric value(s)

    +
    scale
    +

    Scale parameter(s), positive numeric value(s)

    +
    df
    +

    Degrees of freedom, positive numeric value(s)

    +
    alpha
    +

    shape parameter, positive numeric value(s)

    +
    beta
    +

    inverse scale parameter, positive numeric value(s)

    +
    global_scale
    +

    Control the (prior) degree of sparsity in the horseshoe model (0 < global_scale < 1).

    +
    slab_df
    +

    Degrees of freedom for the Student's t model for large coefficients in the horseshoe model (slab_df > 0).

    +
    slab_scale
    +

    Scale parameter for the Student's t model for large coefficients in the horseshoe model (slab_scale > 0).

    +
    +
    +

    Value

    +

    An object of class prior which will be used internally by geostan to set parameters of prior distributions.

    +

    Student's t

    -

    Details

    +

    Return value for student_t depends on the input; if no arguments are provided (specifically, if the scale parameter is missing), this will return an object of class 'family'; if at least the scale parameter is provided, student_t will return an object of class prior containing parameter values for the Student's t distribution.

    +
    +
    +
    +

    Details

    The prior distribution functions are used to set the values of prior parameters.

    -

    Users can control the values of the parameters, but the distribution (model) itself is fixed. The intercept and regression coefficients are given Gaussian prior distributions and scale parameters are assigned Student's t prior distributions. Degrees of freedom parameters are assigned gamma priors, and the spatial autocorrelation parameter in the CAR model, rho, is assigned a uniform prior. The horseshoe (hs) model is used by stan_esf.

    -

    Note that the variable argument is used internally by geostan, and any user provided values will be ignored.

    Parameterizations

    +

    Users can control the values of the parameters, but the distribution (model) itself is fixed. The intercept and regression coefficients are given Gaussian prior distributions and scale parameters are assigned Student's t prior distributions. Degrees of freedom parameters are assigned gamma priors, and the spatial autocorrelation parameter in the CAR model, rho, is assigned a uniform prior. The horseshoe (hs) model is used by stan_esf.

    +

    Note that the variable argument is used internally by geostan, and any user provided values will be ignored.

    +

    Parameterizations

    -

    For details on how any distribution is parameterized, see the Stan Language Functions Reference document: https://mc-stan.org/users/documentation/.

    +

    For details on how any distribution is parameterized, see the Stan Language Functions Reference document: https://mc-stan.org/users/documentation/.

    +
    -

    The horseshoe prior

    +
    +

    The horseshoe prior

    -

    The horseshoe prior is used by stan_esf as a prior for the eigenvector coefficients. The horseshoe model encodes a prior state of knowledge that effectively states, 'I believe a small number of these variables may be important, but I don't know which of them is important.' The horseshoe is a normal distribution with unknown scale (Polson and Scott 2010):

           beta_j ~ Normal(0, tau^2 * lambda_j^2)
    -
    +

    The horseshoe prior is used by stan_esf as a prior for the eigenvector coefficients. The horseshoe model encodes a prior state of knowledge that effectively states, 'I believe a small number of these variables may be important, but I don't know which of them is important.' The horseshoe is a normal distribution with unknown scale (Polson and Scott 2010):

    beta_j ~ Normal(0, tau^2 * lambda_j^2)

    The scale parameter for this prior is the product of two terms: lambda_j^2 is specific to the variable beta_j, and tau^2 is known as the global shrinkage parameter.

    -

    The global shrinkage parameter is assigned a half-Cauchy prior:

           tau ~ Cauchy(0, global_scale * sigma)
    -
    +

    The global shrinkage parameter is assigned a half-Cauchy prior:

    tau ~ Cauchy(0, global_scale * sigma)

    where global_scale is provided by the user and sigma is the scale parameter for the outcome variable; for Poisson and binomial models, sigma is fixed at one. Use global_scale to control the overall sparsity of the model.

    -

    The second part of the model is a Student's t prior for lambda_j. Most lambda_j will be small, since the model is half-Cauchy:

           lambda_j ~ Cauchy(0, 1)
    -
    +

    The second part of the model is a Student's t prior for lambda_j. Most lambda_j will be small, since the model is half-Cauchy:

    lambda_j ~ Cauchy(0, 1)
    -

    This model results in most lambda_j being small, but due to the long tails of the Cauchy distribution, strong evidence in the data can force any particular lambda_j to be large. Piironen and Vehtari (2017) adjust the model so that those large lambda_j are effectively assigned a Student's t model:

           Big_lambda_j ~ Student_t(slab_df, 0, slab_scale)
    -
    +

    This model results in most lambda_j being small, but due to the long tails of the Cauchy distribution, strong evidence in the data can force any particular lambda_j to be large. Piironen and Vehtari (2017) adjust the model so that those large lambda_j are effectively assigned a Student's t model:

    Big_lambda_j ~ Student_t(slab_df, 0, slab_scale)

    This is a schematic representation of the model; see Piironen and Vehtari (2017) or Donegan et al. (2020) for details.

    +
    +
    -

    Examples

    -
    
    -prior <- list()
    -prior$beta <- normal(c(0, 0), c(1, 1))
    -prior$intercept <- normal(-5, 3)
    -if (FALSE) {
    -fit <- stan_glm(deaths.male ~ offset(log(pop.at.risk.male)) + ICE + college,
    -                re = ~ GEOID,
    -                data = georgia,
    -                family = poisson(),
    -                prior = prior,
    -                prior_only = TRUE)
    -plot(fit)
    -}
    -
    -ME <- list()
    -ME$se <- data.frame(insurance = georgia$insurance.se)
    -ME$prior <- list()
    -ME$prior$df <- gamma(3, 0.2)
    -ME$prior$location <- normal(50, 50)
    -ME$prior$scale <- student_t(12, 10, 20)
    -if (FALSE) {
    -fit <- stan_glm(log(rate.male) ~ insurance, 
    -                data = georgia,
    -                ME = ME,
    -                prior_only = TRUE)
    -}
    -if (FALSE) {
    -fit = stan_glm(log(rate.male) ~ 1, data = georgia, family = student_t())
    -}
    +    
    +

    Examples

    +
    
    +prior <- list()
    +prior$beta <- normal(c(0, 0), c(1, 1))
    +prior$intercept <- normal(-5, 3)
    +if (FALSE) {
    +fit <- stan_glm(deaths.male ~ offset(log(pop.at.risk.male)) + ICE + college,
    +                re = ~ GEOID,
    +                data = georgia,
    +                family = poisson(),
    +                prior = prior,
    +                prior_only = TRUE)
    +plot(fit)
    +}
    +
    +ME <- list()
    +ME$se <- data.frame(insurance = georgia$insurance.se)
    +ME$prior <- list()
    +ME$prior$df <- gamma(3, 0.2)
    +ME$prior$location <- normal(50, 50)
    +ME$prior$scale <- student_t(12, 10, 20)
    +if (FALSE) {
    +fit <- stan_glm(log(rate.male) ~ insurance, 
    +                data = georgia,
    +                ME = ME,
    +                prior_only = TRUE)
    +}
    +if (FALSE) {
    +fit = stan_glm(log(rate.male) ~ 1, data = georgia, family = student_t())
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/row_standardize.html b/docs/reference/row_standardize.html index c850007e..7dd55e1f 100644 --- a/docs/reference/row_standardize.html +++ b/docs/reference/row_standardize.html @@ -1,93 +1,18 @@ - - - - - - - -Row-standardize a matrix; safe for zero row-sums. — row_standardize • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Row-standardize a matrix; safe for zero row-sums. — row_standardize • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,67 +61,58 @@

    Row-standardize a matrix; safe for zero row-sums.

    Row-standardize a matrix; safe for zero row-sums.

    -
    row_standardize(C, warn = TRUE, msg = "Row standardizing connectivity matrix")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    C

    A matrix

    warn

    Print `msg` if `warn = TRUE`.

    msg

    A warning message to print.

    - -

    Value

    +
    +
    row_standardize(C, warn = TRUE, msg = "Row standardizing connectivity matrix")
    +
    +
    +

    Arguments

    +
    C
    +

    A matrix

    +
    warn
    +

    Print msg if warn = TRUE.

    +
    msg
    +

    A warning message to print.

    +
    +
    +

    Value

    A row-standardized matrix, W (i.e., all row sums equal 1, or zero).

    +
    -

    Examples

    -
    
    -A <- shape2mat(georgia)
    -head(Matrix::summary(A))
    -Matrix::rowSums(A)
    +    
    +

    Examples

    +
    
    +A <- shape2mat(georgia)
    +head(Matrix::summary(A))
    +Matrix::rowSums(A)
     
    -W <- row_standardize(A)
    -head(Matrix::summary(W))
    -Matrix::rowSums(W)
    +W <- row_standardize(A)
    +head(Matrix::summary(W))
    +Matrix::rowSums(W)
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/se_log-1.png b/docs/reference/se_log-1.png deleted file mode 100644 index df969695..00000000 Binary files a/docs/reference/se_log-1.png and /dev/null differ diff --git a/docs/reference/se_log.html b/docs/reference/se_log.html index d7ab7b46..54e061d0 100644 --- a/docs/reference/se_log.html +++ b/docs/reference/se_log.html @@ -1,93 +1,18 @@ - - - - - - - -Standard error of log(x) — se_log • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Standard error of log(x) — se_log • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,89 +61,77 @@

    Standard error of log(x)

    Transform the standard error of x to standard error of log(x).

    -
    se_log(x, se, method = c("mc", "delta"), nsim = 5000, bounds = c(0, Inf))
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    An estimate

    se

    Standard error of x

    method

    The "delta" method uses a Taylor series approximation; the default method, "mc", uses a simple monte carlo method.

    nsim

    Number of draws to take if method = "mc".

    bounds

    Lower and upper bounds for the variable, used in the monte carlo method. Must be a length-two numeric vector with lower bound greater than or equal to zero (i.e. c(lower, upper) as in default bounds = c(0, Inf).

    - -

    Value

    +
    +
    se_log(x, se, method = c("mc", "delta"), nsim = 5000, bounds = c(0, Inf))
    +
    +
    +

    Arguments

    +
    x
    +

    An estimate

    +
    se
    +

    Standard error of x

    +
    method
    +

    The "delta" method uses a Taylor series approximation; the default method, "mc", uses a simple monte carlo method.

    +
    nsim
    +

    Number of draws to take if method = "mc".

    +
    bounds
    +

    Lower and upper bounds for the variable, used in the monte carlo method. Must be a length-two numeric vector with lower bound greater than or equal to zero (i.e. c(lower, upper) as in default bounds = c(0, Inf).

    +
    +
    +

    Value

    Numeric vector of standard errors

    -

    Details

    - +
    +
    +

    Details

    The delta method returns x^(-1) * se. The monte carlo method is detailed in the examples section.

    +
    -

    Examples

    -
    
    -data(georgia)
    -x = georgia$college
    -se = georgia$college.se
    -
    -lse1 = se_log(x, se)
    -lse2 = se_log(x, se, method = "delta")
    -if (FALSE) {
    -plot(lse1, lse2); abline(0, 1)
    -}
    -
    -# the monte carlo method
    -x = 10
    -se = 2
    -z = rnorm(n = 30e3, mean = x,  sd = se)
    -l.z = log(z)
    -sd(l.z)
    -se_log(x, se, method = "mc")
    -se_log(x, se, method = "delta")
    +    
    +

    Examples

    +
    
    +data(georgia)
    +x = georgia$college
    +se = georgia$college.se
    +
    +lse1 = se_log(x, se)
    +lse2 = se_log(x, se, method = "delta")
    +if (FALSE) {
    +plot(lse1, lse2); abline(0, 1)
    +}
    +
    +# the monte carlo method
    +x = 10
    +se = 2
    +z = rnorm(n = 30e3, mean = x,  sd = se)
    +l.z = log(z)
    +sd(l.z)
    +se_log(x, se, method = "mc")
    +se_log(x, se, method = "delta")
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/sentencing.html b/docs/reference/sentencing.html index cc6bb882..34eb34b1 100644 --- a/docs/reference/sentencing.html +++ b/docs/reference/sentencing.html @@ -1,94 +1,19 @@ - - - - - - - -Florida state prison sentencing counts by county, 1905-1910 — sentencing • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Florida state prison sentencing counts by county, 1905-1910 — sentencing • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +

    A spatial polygons data frame of historical 1910 county boundaries of Florida with aggregated state prison sentencing counts and census data. - Sentencing and population counts are aggregates over the period 1905-1910, where populations were interpolated linearly between decennial censuses of 1900 and 1910.

    +Sentencing and population counts are aggregates over the period 1905-1910, where populations were interpolated linearly between decennial censuses of 1900 and 1910.

    +
    + +
    +
    sentencing
    -
    sentencing
    +
    +

    Format

    +

    A spatial polygons data frame with the following attributes:

    name
    +

    County name

    + +
    wpop
    +

    White population total for years 1905-1910

    + +
    bpop
    +

    Black population total for years 1905-1910

    +
    sents
    +

    Number of state prison sentences, 1905-1910

    -

    Format

    +
    plantation_belt
    +

    Binary indicator for inclusion in the plantation belt

    -

    A spatial polygons data frame with the following attributes:

    -
    name

    County name

    -
    wpop

    White population total for years 1905-1910

    -
    bpop

    Black population total for years 1905-1910

    -
    sents

    Number of state prison sentences, 1905-1910

    -
    plantation_belt

    Binary indicator for inclusion in the plantation belt

    -
    pct_ag_1910

    Percent of land area in agriculture, 1910

    -
    expected_sents

    Expected sentences given demographic information and state level sentencing rates by race

    -
    sir_raw

    Standardized incident ratio (observed/expected sentences)

    +
    pct_ag_1910
    +

    Percent of land area in agriculture, 1910

    -
    +
    expected_sents
    +

    Expected sentences given demographic information and state level sentencing rates by race

    -

    Source

    +
    sir_raw
    +

    Standardized incident ratio (observed/expected sentences)

    -

    Donegan, Connor. "The Making of Florida's 'Criminal Class': Race, Modernity and the Convict Leasing Program." Florida Historical Quarterly 97.4 (2019): 408-434. https://osf.io/2wj7s/.

    + +
    +
    +

    Source

    +

    Donegan, Connor. "The Making of Florida's 'Criminal Class': Race, Modernity and the Convict Leasing Program." Florida Historical Quarterly 97.4 (2019): 408-434. https://osf.io/2wj7s/.

    Mullen, Lincoln A. and Bratt, Jordon. "USABoundaries: Historical and Contemporary Boundaries of the United States of America," - Journal of Open Source Software 3, no. 23 (2018): 314, doi: 10.21105/joss.00314 +Journal of Open Source Software 3, no. 23 (2018): 314, doi: 10.21105/joss.00314 .

    +
    -

    Examples

    -
    if (FALSE) {
    -data(sentencing)
    -head(sentencing@data)
    -}
    +    
    +

    Examples

    +
    if (FALSE) {
    +data(sentencing)
    +head(sentencing@data)
    +}
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/shape2mat.html b/docs/reference/shape2mat.html index 9b8c3577..a0592159 100644 --- a/docs/reference/shape2mat.html +++ b/docs/reference/shape2mat.html @@ -1,93 +1,18 @@ - - - - - - - -Create spatial and space-time connectivity matrices — shape2mat • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create spatial and space-time connectivity matrices — shape2mat • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,141 +61,99 @@

    Create spatial and space-time connectivity matrices

    Creates sparse matrix representations of spatial connectivity structures

    -
    shape2mat(
    -  shape,
    -  style = c("B", "W"),
    -  queen = TRUE,
    -  snap = sqrt(.Machine$double.eps),
    -  t = 1,
    -  st.style = c("contemp", "lag")
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - -
    shape

    An object of class sf, SpatialPolygons or SpatialPolygonsDataFrame.

    style

    What kind of coding scheme should be used to create the spatial connectivity matrix? Defaults to "B" for binary; use "W" for row-standardized weights.

    queen

    Passed to poly2nb to set the contiguity condition. Defaults to TRUE so that a single shared boundary point (rather than a shared border/line) between polygons is sufficient for them to be considered neighbors.

    snap

    Passed to poly2nb; "boundary points less than ‘snap’ distance apart are considered to indicate contiguity."

    t

    Number of time periods. Only the binary coding scheme is available for space-time connectivity matrices.

    st.style

    For space-time data, what type of space-time connectivity structure should be used? Options are "lag" for the lagged specification and "contemp" (the default) for contemporaneous specification (see Details).

    - -

    Source

    +
    +
    shape2mat(
    +  shape,
    +  style = c("B", "W"),
    +  queen = TRUE,
    +  snap = sqrt(.Machine$double.eps),
    +  t = 1,
    +  st.style = c("contemp", "lag")
    +)
    +
    +
    +

    Source

    Griffith, D. A. (2012). Space, time, and space-time eigenvector filter specifications that account for autocorrelation. Estadística Espanola, 54(177), 7-34.

    Haining, R. P., & Li, G. (2020). Regression Modelling Wih Spatial and Spatial-Temporal Data: A Bayesian Approach. CRC Press.

    -

    Value

    - +
    +
    +

    Arguments

    +
    shape
    +

    An object of class sf, SpatialPolygons or SpatialPolygonsDataFrame.

    +
    style
    +

    What kind of coding scheme should be used to create the spatial connectivity matrix? Defaults to "B" for binary; use "W" for row-standardized weights.

    +
    queen
    +

    Passed to poly2nb to set the contiguity condition. Defaults to TRUE so that a single shared boundary point (rather than a shared border/line) between polygons is sufficient for them to be considered neighbors.

    +
    snap
    +

    Passed to poly2nb; "boundary points less than ‘snap’ distance apart are considered to indicate contiguity."

    +
    t
    +

    Number of time periods. Only the binary coding scheme is available for space-time connectivity matrices.

    +
    st.style
    +

    For space-time data, what type of space-time connectivity structure should be used? Options are "lag" for the lagged specification and "contemp" (the default) for contemporaneous specification (see Details).

    +
    +
    +

    Value

    A spatial connectivity matrix

    -

    Details

    - +
    +
    +

    Details

    Haining and Li (Ch. 4) provide a helpful discussion of spatial connectivity matrices (Ch. 4).

    -

    The space-time connectivity matrix can be used for eigenvector space-time filtering (stan_esf. The `lagged' space-time structure connects each observation to its own past (one period lagged) value and the past value of its neighbors. The `contemporaneous' specification links each observation to its neighbors and to its own in situ past (one period lagged) value (Griffith 2012, p. 23).

    -

    See also

    +

    The space-time connectivity matrix can be used for eigenvector space-time filtering (stan_esf. The lagged' space-time structure connects each observation to its own past (one period lagged) value and the past value of its neighbors. The contemporaneous' specification links each observation to its neighbors and to its own in situ past (one period lagged) value (Griffith 2012, p. 23).

    +
    + - +
    +

    Examples

    +
    
    +data(georgia)
    +
    +## binary adjacency matrix
    +C <- shape2mat(georgia, "B")
    +## row sums gives the numbers of neighbors per observation
    +Matrix::rowSums(C)
    +head(Matrix::summary(C))
    +
    +## row-standardized matrix 
    +W <- shape2mat(georgia, "W")
    +Matrix::rowSums(W)
    +head(Matrix::summary(W))
    +
    +## space-time matricies 
    +## for eigenvector space-time filtering
    +## if you have multiple years with same neighbors,
    +## provide the geography (for a single year!) and number of years \code{t}
    +Cst <- shape2mat(georgia, t = 5)
    +dim(Cst)
    +EVst <- make_EV(Cst)
    +dim(EVst)
     
    -    

    Examples

    -
    
    -data(georgia)
    -
    -## binary adjacency matrix
    -C <- shape2mat(georgia, "B")
    -## row sums gives the numbers of neighbors per observation
    -Matrix::rowSums(C)
    -#>   [1]  6  5  6  5  3  5  7  4  5  5  5  7  6  4  5  4  7  7  4  5  7  6  6  6  4
    -#>  [26]  5  6  3  8  6  5  7  3  3  8  6  6  7  5  8  8  5  6  6  5  4  5  6  7  5
    -#>  [51]  2  7  6  5  3  1  3  7  6  2  5  5  8  6  7  7  5  6  4  4  6  4  6 10  8
    -#>  [76]  7  7  6  6  3  6 10  6  6  5  2  6  5  4  7  4  8  5  4  5  4  4  7  6  6
    -#> [101]  6  5  4  4  6  8  4  6  6  4  5  4  4  4  9  3  4  7  7  4  3  7  4  6  4
    -#> [126]  5  7  6  4  4  5  6  5  5  6  6  7  5  6  5  4  7  6  5  7  3  3  7  3  7
    -#> [151]  5  6  5  4  6  3  5  7  8
    -head(Matrix::summary(C))
    -#> 159 x 159 sparse Matrix of class "ngCMatrix", with 860 entries 
    -#>     i j
    -#> 1  23 1
    -#> 2  58 1
    -#> 3  59 1
    -#> 4 131 1
    -#> 5 148 1
    -#> 6 159 1
    -
    -## row-standardized matrix 
    -W <- shape2mat(georgia, "W")
    -Matrix::rowSums(W)
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#>  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#> [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#> [149] 1 1 1 1 1 1 1 1 1 1 1
    -head(Matrix::summary(W))
    -#> 159 x 159 sparse Matrix of class "dgCMatrix", with 860 entries 
    -#>     i j         x
    -#> 1  23 1 0.1666667
    -#> 2  58 1 0.1428571
    -#> 3  59 1 0.1666667
    -#> 4 131 1 0.2000000
    -#> 5 148 1 0.1428571
    -#> 6 159 1 0.1250000
    -
    -## space-time matricies 
    -## for eigenvector space-time filtering
    -## if you have multiple years with same neighbors,
    -## provide the geography (for a single year!) and number of years \code{t}
    -Cst <- shape2mat(georgia, t = 5)
    -dim(Cst)
    -#> [1] 795 795
    -EVst <- make_EV(Cst)
    -dim(EVst)
    -#> [1] 795 208
    -
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/sim_sar.html b/docs/reference/sim_sar.html index 378c746d..7a95740d 100644 --- a/docs/reference/sim_sar.html +++ b/docs/reference/sim_sar.html @@ -1,93 +1,18 @@ - - - - - - - -Simulate spatially autocorrelated data — sim_sar • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simulate spatially autocorrelated data — sim_sar • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,89 +61,73 @@

    Simulate spatially autocorrelated data

    Given a spatial weights matrix and degree of autocorrelation, returns autocorrelated data.

    -
    sim_sar(m = 1, mu = rep(0, nrow(w)), w, rho, sigma = 1, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - -
    m

    The number of samples required. Defaults to m=1 to return an n-length vector; if m>1, an m x n matrix is returned (i.e. each row will contain a sample of correlated values).

    mu

    An n-length vector of mean values. Defaults to a vector of zeros with length equal to nrow(w).

    w

    Row-standardized n x n spatial weights matrix.

    rho

    Spatial autocorrelation parameter in the range (-1, 1). Typically a scalar value; otherwise an n-length numeric vector.

    sigma

    Scale parameter (standard deviation). Defaults to sigma = 1. Typically a scalar value; otherwise an n-length numeric vector.

    ...

    further arguments passed to MASS::mvrnorm.

    - -

    Value

    +
    +
    sim_sar(m = 1, mu = rep(0, nrow(w)), w, rho, sigma = 1, ...)
    +
    +
    +

    Arguments

    +
    m
    +

    The number of samples required. Defaults to m=1 to return an n-length vector; if m>1, an m x n matrix is returned (i.e. each row will contain a sample of correlated values).

    +
    mu
    +

    An n-length vector of mean values. Defaults to a vector of zeros with length equal to nrow(w).

    +
    w
    +

    Row-standardized n x n spatial weights matrix.

    +
    rho
    +

    Spatial autocorrelation parameter in the range (-1, 1). Typically a scalar value; otherwise an n-length numeric vector.

    +
    sigma
    +

    Scale parameter (standard deviation). Defaults to sigma = 1. Typically a scalar value; otherwise an n-length numeric vector.

    +
    ...
    +

    further arguments passed to MASS::mvrnorm.

    +
    +
    +

    Value

    If m = 1 a vector of the same length as mu, otherwise an m x length(mu) matrix with one sample in each row.

    -

    Details

    - -

    Calls MASS::mvrnorm internally to draw from the multivariate normal distribution. The covariance matrix is specified following the simultaneous autoregressive (SAR) model.

    -

    See also

    - - - -

    Examples

    -
    
    -data(georgia)
    -w <- shape2mat(georgia, "W")
    -x <- sim_sar(w = w, rho = 0.5)
    -aple(x, w)
    -#> [1] 0.519
    -
    -x <- sim_sar(w = w, rho = 0.7, m = 4)
    -dim(x)
    -#> [1]   4 159
    -apply(x, 1, aple, w = w)
    -#> [1] 0.762 0.697 0.741 0.621
    -
    +    
    +
    +

    Details

    +

    Calls MASS::mvrnorm internally to draw from the multivariate normal distribution. The covariance matrix is specified following the simultaneous autoregressive (SAR) model.

    +
    +
    +

    See also

    + +
    + +
    +

    Examples

    +
    
    +data(georgia)
    +w <- shape2mat(georgia, "W")
    +x <- sim_sar(w = w, rho = 0.5)
    +aple(x, w)
    +
    +x <- sim_sar(w = w, rho = 0.7, m = 10)
    +dim(x)
    +apply(x, 1, aple, w = w)
    +
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/sp_diag-1.png b/docs/reference/sp_diag-1.png deleted file mode 100644 index 5c6404b8..00000000 Binary files a/docs/reference/sp_diag-1.png and /dev/null differ diff --git a/docs/reference/sp_diag-2.png b/docs/reference/sp_diag-2.png deleted file mode 100644 index b47d0c01..00000000 Binary files a/docs/reference/sp_diag-2.png and /dev/null differ diff --git a/docs/reference/sp_diag-3.png b/docs/reference/sp_diag-3.png deleted file mode 100644 index 13ef9544..00000000 Binary files a/docs/reference/sp_diag-3.png and /dev/null differ diff --git a/docs/reference/sp_diag-4.png b/docs/reference/sp_diag-4.png deleted file mode 100644 index 62603bbf..00000000 Binary files a/docs/reference/sp_diag-4.png and /dev/null differ diff --git a/docs/reference/sp_diag-5.png b/docs/reference/sp_diag-5.png deleted file mode 100644 index 88c31a05..00000000 Binary files a/docs/reference/sp_diag-5.png and /dev/null differ diff --git a/docs/reference/sp_diag.html b/docs/reference/sp_diag.html index 53cc6512..57bc2fb8 100644 --- a/docs/reference/sp_diag.html +++ b/docs/reference/sp_diag.html @@ -1,93 +1,18 @@ - - - - - - - -Spatial data diagnostics — sp_diag • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Spatial data diagnostics — sp_diag • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,141 +61,123 @@

    Spatial data diagnostics

    Visual diagnostics for areal data and model residuals

    -
    sp_diag(
    -  y,
    -  shape,
    -  name = "y",
    -  plot = TRUE,
    -  style = c("W", "B"),
    -  w = shape2mat(shape, match.arg(style)),
    -  binwidth = function(x) 0.5 * sd(x),
    -  ...
    -)
    -
    -# S3 method for geostan_fit
    -sp_diag(
    -  y,
    -  shape,
    -  name = "Residual",
    -  plot = TRUE,
    -  style = c("W", "B"),
    -  w = shape2mat(shape, match.arg(style)),
    -  binwidth = function(x) 0.5 * stats::sd(x),
    -  rates = TRUE,
    -  size = 0.15,
    -  ...
    -)
    -
    -# S3 method for numeric
    -sp_diag(
    -  y,
    -  shape,
    -  name = "y",
    -  plot = TRUE,
    -  style = c("W", "B"),
    -  w = shape2mat(shape, match.arg(style)),
    -  binwidth = function(x) 0.5 * stats::sd(x),
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    y

    A numeric vector, or a fitted geostan model (class geostan_fit).

    shape

    An object of class sf or another spatial object coercible to sf with sf::st_as_sf such as SpatialPolygonsDataFrame.

    name

    The name to use on the plot labels; default to "y" or, if y is a geostan_fit object, to "Residuals".

    plot

    If FALSE, return a list of gg plots.

    style

    Style of connectivity matrix; if w is not provided, style is passed to shape2mat and defaults to "W" for row-standardized.

    w

    An optional spatial connectivity matrix; if not provided, one will be created using shape2mat.

    binwidth

    A function with a single argument that will be passed to the binwidth argument in geom_histogram. The default is to set the width of bins to 0.5 * sd(x).

    ...

    Additional arguments passed to residuals.geostan_fit. For binomial and Poisson models, this includes the option to view the outcome variable as a rate (the default) rather than a count; for stan_car models with auto-Gaussian likelihood (fit$family$family = "auto_gaussian"), the residuals will be detrended by default, but this can be changed using detrend = FALSE`.

    rates

    For Poisson and binomial models, convert the outcome variable to a rate before calculating residuals. Defaults to rates = TRUE.

    size

    Point size and linewidth for point-interval plot of observed vs. fitted values (passed to geom_pointrange).

    - -

    Value

    - -

    A grid of spatial diagnostic plots. When provided with a numeric vector, this function plots a histogram, Moran scatter plot, and map. When provided with a fitted geostan model, the function returns a point-interval plot of observed values against fitted values (mean and 95 percent credible interval), a histogram of Moran coefficient values calculated from the joint posterior distribution of the residuals, and a map of the mean posterior residuals (means of the marginal distributions).

    -

    If plot = TRUE, the ggplots are drawn using grid.arrange; otherwise, they are returned in a list. For the geostan_fit method, the underlying data for the Moran coefficient will also be returned if plot = FALSE.

    -

    See also

    +
    +
    sp_diag(
    +  y,
    +  shape,
    +  name = "y",
    +  plot = TRUE,
    +  mc_style = c("scatter", "hist"),
    +  style = c("W", "B"),
    +  w = shape2mat(shape, match.arg(style)),
    +  binwidth = function(x) 0.5 * sd(x),
    +  ...
    +)
    +
    +# S3 method for geostan_fit
    +sp_diag(
    +  y,
    +  shape,
    +  name = "Residual",
    +  plot = TRUE,
    +  mc_style = c("scatter", "hist"),
    +  style = c("W", "B"),
    +  w = shape2mat(shape, match.arg(style)),
    +  binwidth = function(x) 0.5 * stats::sd(x),
    +  rates = TRUE,
    +  size = 0.15,
    +  ...
    +)
    +
    +# S3 method for numeric
    +sp_diag(
    +  y,
    +  shape,
    +  name = "y",
    +  plot = TRUE,
    +  style = c("W", "B"),
    +  w = shape2mat(shape, match.arg(style)),
    +  binwidth = function(x) 0.5 * stats::sd(x),
    +  ...
    +)
    +
    - +
    +

    Arguments

    +
    y
    +

    A numeric vector, or a fitted geostan model (class geostan_fit).

    +
    shape
    +

    An object of class sf or another spatial object coercible to sf with sf::st_as_sf such as SpatialPolygonsDataFrame.

    +
    name
    +

    The name to use on the plot labels; default to "y" or, if y is a geostan_fit object, to "Residuals".

    +
    plot
    +

    If FALSE, return a list of gg plots.

    +
    mc_style
    +

    Character string indicating how to plot the residual Moran coefficient (only used if y is a fitted model): if mc = "scatter", then moran_plot will be used with the marginal residuals; if mc = "hist", then a histogram of Moran coefficient values will be returned, where each plotted value represents the degree of residual autocorrelation in a draw from the join posterior distribution of model parameters.

    +
    style
    +

    Style of connectivity matrix; if w is not provided, style is passed to shape2mat and defaults to "W" for row-standardized.

    +
    w
    +

    An optional spatial connectivity matrix; if not provided, one will be created using shape2mat.

    +
    binwidth
    +

    A function with a single argument that will be passed to the binwidth argument in geom_histogram. The default is to set the width of bins to 0.5 * sd(x).

    +
    ...
    +

    Additional arguments passed to residuals.geostan_fit. For binomial and Poisson models, this includes the option to view the outcome variable as a rate (the default) rather than a count; for stan_car models with auto-Gaussian likelihood (fit$family$family = "auto_gaussian"), the residuals will be detrended by default, but this can be changed using detrend = FALSE`.

    +
    rates
    +

    For Poisson and binomial models, convert the outcome variable to a rate before calculating residuals. Defaults to rates = TRUE.

    +
    size
    +

    Point size and linewidth for point-interval plot of observed vs. fitted values (passed to geom_pointrange).

    +
    +
    +

    Value

    +

    A grid of spatial diagnostic plots. When provided with a numeric vector, this function plots a histogram, Moran scatter plot, and map. When provided with a fitted geostan model, the function returns a point-interval plot of observed values against fitted values (mean and 95 percent credible interval), either a Moran scatter plot of residuals or a histogram of Moran coefficient values calculated from the joint posterior distribution of the residuals, and a map of the mean posterior residuals (means of the marginal distributions). +If plot = TRUE, the ggplots are drawn using grid.arrange; otherwise, they are returned in a list. For the geostan_fit method, the underlying data for the Moran coefficient will also be returned if plot = FALSE.

    +
    +
    +

    See also

    + +
    -

    Examples

    -
    if (FALSE) {
    -data(georgia)
    -sp_diag(georgia$college, georgia)
    +    
    +

    Examples

    +
    if (FALSE) {
    +data(georgia)
    +sp_diag(georgia$college, georgia)
     
    -bin_fn <- function(y) mad(y)
    -sp_diag(georgia$college, georgia, binwidth = bin_fn)
    +bin_fn <- function(y) mad(y)
    +sp_diag(georgia$college, georgia, binwidth = bin_fn)
     
    -fit <- stan_glm(log(rate.male) ~ 1, data = georgia)
    -sp_diag(fit, georgia)
    +fit <- stan_glm(log(rate.male) ~ 1, data = georgia)
    +sp_diag(fit, georgia)
     
    -cp <- prep_car_data(shape2mat(georgia))
    -fit2 <- stan_car(log(rate.male) ~ 1, data = georgia, car_parts = cp)
    -sp_diag(fit2, georgia)
    -sp_diag(fit2, georgia, detrend = FALSE)
    -}
    +cp <- prep_car_data(shape2mat(georgia))
    +fit2 <- stan_car(log(rate.male) ~ 1, data = georgia, car_parts = cp)
    +sp_diag(fit2, georgia)
    +sp_diag(fit2, georgia, detrend = FALSE)
    +}
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/spatial.html b/docs/reference/spatial.html deleted file mode 100644 index 2fe48f64..00000000 --- a/docs/reference/spatial.html +++ /dev/null @@ -1,188 +0,0 @@ - - - - - - - - -Extract spatial component from a fitted geostan model — spatial • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Extracts the posterior distribution of the spatial component from a fitted geostan model

    -
    - -
    spatial(object, summary = TRUE, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    object

    Fitted geostan model

    summary

    should the posterior distribution be summarized? If FALSE, returns a matrix of samples; else a data.frame with summary statistics of the spatial filter at each observation.

    ...

    additional arguments

    - -

    See also

    - -

    geostan_fit

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/stan_car-1.png b/docs/reference/stan_car-1.png deleted file mode 100644 index fcf4e3ee..00000000 Binary files a/docs/reference/stan_car-1.png and /dev/null differ diff --git a/docs/reference/stan_car-2.png b/docs/reference/stan_car-2.png deleted file mode 100644 index 7a1b0db8..00000000 Binary files a/docs/reference/stan_car-2.png and /dev/null differ diff --git a/docs/reference/stan_car-3.png b/docs/reference/stan_car-3.png deleted file mode 100644 index 6d66eba5..00000000 Binary files a/docs/reference/stan_car-3.png and /dev/null differ diff --git a/docs/reference/stan_car-4.png b/docs/reference/stan_car-4.png deleted file mode 100644 index 1f5e6026..00000000 Binary files a/docs/reference/stan_car-4.png and /dev/null differ diff --git a/docs/reference/stan_car-5.png b/docs/reference/stan_car-5.png deleted file mode 100644 index 6e6675a8..00000000 Binary files a/docs/reference/stan_car-5.png and /dev/null differ diff --git a/docs/reference/stan_car-6.png b/docs/reference/stan_car-6.png deleted file mode 100644 index 04535597..00000000 Binary files a/docs/reference/stan_car-6.png and /dev/null differ diff --git a/docs/reference/stan_car-7.png b/docs/reference/stan_car-7.png deleted file mode 100644 index e924ff31..00000000 Binary files a/docs/reference/stan_car-7.png and /dev/null differ diff --git a/docs/reference/stan_car-8.png b/docs/reference/stan_car-8.png deleted file mode 100644 index 64bfba36..00000000 Binary files a/docs/reference/stan_car-8.png and /dev/null differ diff --git a/docs/reference/stan_car.html b/docs/reference/stan_car.html index 0c0caed0..9340f3f5 100644 --- a/docs/reference/stan_car.html +++ b/docs/reference/stan_car.html @@ -1,93 +1,18 @@ - - - - - - - -Conditional autoregressive (CAR) models — stan_car • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Conditional autoregressive (CAR) models — stan_car • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,396 +61,399 @@

    Conditional autoregressive (CAR) models

    Use the CAR model as a prior on parameters, or fit data to an auto-Gaussian CAR model.

    -
    stan_car(
    -  formula,
    -  slx,
    -  re,
    -  data,
    -  car_parts,
    -  family = gaussian(),
    -  prior = NULL,
    -  ME = NULL,
    -  centerx = FALSE,
    -  prior_only = FALSE,
    -  censor_point,
    -  chains = 4,
    -  iter = 2000,
    -  refresh = 500,
    -  pars = NULL,
    -  control = NULL,
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    formula

    A model formula, following the R formula syntax. Binomial models can be specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    slx

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    re

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    -       alpha_tau ~ Student_t(d.f., location, scale).
    -
    - -

    With the CAR model, any alpha_re term should be at a different level or scale than the observations; that is, at a different scale than the autocorrelation structure of the CAR model itself.

    data

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    car_parts

    A list of data for the CAR model, as returned by prep_car_data.

    family

    The likelihood function for the outcome variable. Current options are auto_gaussian(), binomial(link = "logit"), and poisson(link = "log"); if family = gaussian() is provided, it will automatically be converted to auto_gaussian().

    prior

    A named list of parameters for prior distributions (see priors):

    - -
    intercept

    The intercept is assigned a Gaussian prior distribution (see normal

    . - -
    beta

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    - -
    car_scale

    The scale of the CAR model, car_scale. The scale is assigned a Student's t prior model (constrained to be positive).

    - -
    car_rho

    The spatial autocorrelation parameter in the CAR model, rho, is assigned a uniform prior distribution. By default, the prior will be uniform over all permissible values as determined by the eigenvalues of the connectivity matrix, C. The range of permissible values for rho is automatically printed to the console by prep_car_data.

    - -
    tau

    The scale parameter for any varying intercepts (a.k.a exchangeable random effects, or partial pooling) terms. This scale parameter, tau, is assigned a Student's t prior (constrained to be positive).

    - - -
    ME

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    centerx

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    prior_only

    Logical value; if TRUE, draw samples only from the prior distributions of parameters.

    censor_point

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths.

    chains

    Number of MCMC chains to use.

    iter

    Number of samples per chain.

    refresh

    Stan will print the progress of the sampler every refresh number of samples. Set refresh=0 to silence this.

    pars

    Optional; specify any additional parameters you'd like stored from the Stan model.

    control

    A named list of parameters to control the sampler's behavior. See stan for details.

    ...

    Other arguments passed to sampling. For multi-core processing, you can use cores = parallel::detectCores(), or run options(mc.cores = parallel::detectCores()) first.

    - -

    Source

    +
    +
    stan_car(
    +  formula,
    +  slx,
    +  re,
    +  data,
    +  car_parts,
    +  family = gaussian(),
    +  prior = NULL,
    +  ME = NULL,
    +  centerx = FALSE,
    +  prior_only = FALSE,
    +  censor_point,
    +  chains = 4,
    +  iter = 2000,
    +  refresh = 500,
    +  pars = NULL,
    +  control = NULL,
    +  ...
    +)
    +
    +
    +

    Source

    Cressie, Noel (2015 (1993)). Statistics for Spatial Data. Wiley Classics, Revised Edition.

    Cressie, Noel and Wikle, Christopher (2011). Statistics for Spatio-Temporal Data. Wiley.

    -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    -

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    +

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 .

    Haining, Robert and Li, Guangquan (2020). Modelling Spatial and Spatial-Temporal Data: A Bayesian Approach. CRC Press.

    -

    Value

    - -

    An object of class class geostan_fit (a list) containing:

    -
    summary

    Summaries of the main parameters of interest; a data frame.

    -
    diagnostic

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    -
    stanfit

    an object of class stanfit returned by rstan::stan

    -
    data

    a data frame containing the model data

    -
    family

    the user-provided or default family argument used to fit the model

    -
    formula

    The model formula provided by the user (not including CAR component)

    -
    slx

    The slx formula

    -
    re

    A list containing re, the varying intercepts (re) formula if provided, and +

    +
    +

    Arguments

    +
    formula
    +

    A model formula, following the R formula syntax. Binomial models can be specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    +
    slx
    +

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    +
    re
    +

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    +       alpha_tau ~ Student_t(d.f., location, scale).
    +
    + +

    With the CAR model, any alpha_re term should be at a different level or scale than the observations; that is, at a different scale than the autocorrelation structure of the CAR model itself.

    +
    data
    +

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    +
    car_parts
    +

    A list of data for the CAR model, as returned by prep_car_data.

    +
    family
    +

    The likelihood function for the outcome variable. Current options are auto_gaussian(), binomial(link = "logit"), and poisson(link = "log"); if family = gaussian() is provided, it will automatically be converted to auto_gaussian().

    +
    prior
    +

    A named list of parameters for prior distributions (see priors):

    intercept
    +

    The intercept is assigned a Gaussian prior distribution (see normal

    +. + +
    beta
    +

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    + + +
    car_scale
    +

    The scale of the CAR model, car_scale. The scale is assigned a Student's t prior model (constrained to be positive).

    + + +
    car_rho
    +

    The spatial autocorrelation parameter in the CAR model, rho, is assigned a uniform prior distribution. By default, the prior will be uniform over all permissible values as determined by the eigenvalues of the connectivity matrix, C. The range of permissible values for rho is automatically printed to the console by prep_car_data.

    + + +
    tau
    +

    The scale parameter for any varying intercepts (a.k.a exchangeable random effects, or partial pooling) terms. This scale parameter, tau, is assigned a Student's t prior (constrained to be positive).

    + + + +
    +
    ME
    +

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    +
    centerx
    +

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    +
    prior_only
    +

    Logical value; if TRUE, draw samples only from the prior distributions of parameters.

    +
    censor_point
    +

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths.

    +
    chains
    +

    Number of MCMC chains to use.

    +
    iter
    +

    Number of samples per chain.

    +
    refresh
    +

    Stan will print the progress of the sampler every refresh number of samples. Set refresh=0 to silence this.

    +
    pars
    +

    Optional; specify any additional parameters you'd like stored from the Stan model.

    +
    control
    +

    A named list of parameters to control the sampler's behavior. See stan for details.

    +
    ...
    +

    Other arguments passed to sampling. For multi-core processing, you can use cores = parallel::detectCores(), or run options(mc.cores = parallel::detectCores()) first.

    +
    +
    +

    Value

    +

    An object of class class geostan_fit (a list) containing:

    summary
    +

    Summaries of the main parameters of interest; a data frame.

    + +
    diagnostic
    +

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    + +
    stanfit
    +

    an object of class stanfit returned by rstan::stan

    + +
    data
    +

    a data frame containing the model data

    + +
    family
    +

    the user-provided or default family argument used to fit the model

    + +
    formula
    +

    The model formula provided by the user (not including CAR component)

    + +
    slx
    +

    The slx formula

    + +
    re
    +

    A list containing re, the varying intercepts (re) formula if provided, and Data a data frame with columns id, the grouping variable, and idx, the index values assigned to each group.

    -
    priors

    Prior specifications.

    -
    x_center

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    +
    priors
    +

    Prior specifications.

    + + +
    x_center
    +

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    -
    spatial

    A data frame with the name of the spatial component parameter (either "phi" or, for auto Gaussian models, "trend") and method ("CAR")

    -
    ME

    A list indicating if the object contains an ME model; if so, the user-provided ME list is also stored here.

    -
    C

    Spatial connectivity matrix (in sparse matrix format).

    -
    +
    spatial
    +

    A data frame with the name of the spatial component parameter (either "phi" or, for auto Gaussian models, "trend") and method ("CAR")

    -

    Details

    +
    ME
    +

    A list indicating if the object contains an ME model; if so, the user-provided ME list is also stored here.

    +
    C
    +

    Spatial connectivity matrix (in sparse matrix format).

    + + +
    +
    +

    Details

    CAR models are discussed in Cressie and Wikle (2011, p. 184-88), Cressie (2015, Ch. 6-7), and Haining and Li (2020, p. 249-51).

    The Stan code for this implementation of the CAR model first introduced in Donegan et al. (2021, supplementary material) for models of small area survey data.

    -

    Details and results depend on the family argument, as well as on the particular CAR specification chosen (see prep_car_data).

    Auto-Gaussian

    +

    Details and results depend on the family argument, as well as on the particular CAR specification chosen (see prep_car_data).

    +

    Auto-Gaussian

    -

    When family = auto_gaussian(), the CAR model is specified as follows:

            Y ~ MVGauss(Mu, Sigma)
    +

    When family = auto_gaussian(), the CAR model is specified as follows:

            Y ~ MVGauss(Mu, Sigma)
             Sigma = (I - rho C)^-1 * M * tau^2
    -
    +
    -

    where Mu is the mean vector (with intercept, covariates, etc.), C is a spatial connectivity matrix, and M is a known diagonal matrix with diagonal entries proportional to the conditional variances. C and M are provided by prep_car_data.

    -

    The covariance matrix of the CAR model, Sigma, contains two parameters: car_rho (rho), which controls the degree of spatial autocorrelation, and the scale parameter, car_scale (tau). The range of permissible values for rho depends on the specification of C and M; for options, see prep_car_data and Cressie and Wikle (2011, pp. 184-188).

    -

    The auto-Gaussian model contains an implicit spatial trend (i.e., autocorrelation) component which is calculated as follows (Cressie 2015, p. 564):

            trend = rho * C * (Y - Mu).
    -
    +

    where Mu is the mean vector (with intercept, covariates, etc.), C is a spatial connectivity matrix, and M is a known diagonal matrix with diagonal entries proportional to the conditional variances. C and M are provided by prep_car_data.

    +

    The covariance matrix of the CAR model, Sigma, contains two parameters: car_rho (rho), which controls the degree of spatial autocorrelation, and the scale parameter, car_scale (tau). The range of permissible values for rho depends on the specification of C and M; for options, see prep_car_data and Cressie and Wikle (2011, pp. 184-188).

    +

    The auto-Gaussian model contains an implicit spatial trend (i.e., autocorrelation) component which is calculated as follows (Cressie 2015, p. 564):

            trend = rho * C * (Y - Mu).
    +
    -

    This term can be extracted from a fitted auto-Gaussian model using the spatial method.

    -

    When applied to a fitted auto-Gaussian model, the residuals.geostan_fit method returns `de-trended' residuals by default. That is,

            residual = Y - Mu - trend.
    -
    +

    This term can be extracted from a fitted auto-Gaussian model using the spatial method.

    +

    When applied to a fitted auto-Gaussian model, the residuals.geostan_fit method returns `de-trended' residuals by default. That is,

    residual = Y - Mu - trend.

    To obtain "raw" residuals (Y - Mu), use residuals(fit, detrend = FALSE).

    +
    -

    Poisson

    +
    +

    Poisson

    -

    For family = poisson(), the model is specified as:

            Y ~ Poisson(exp(offset + lambda))
    +

    For family = poisson(), the model is specified as:

            Y ~ Poisson(exp(offset + lambda))
             lambda ~ MVGauss(Mu, Sigma)
             Sigma = (I - rho C)^-1 * M * tau^2
    -
    +

    These models are most often used to calculate small area incidence rates (mortality or disease incidence rates); the user provided offset should be, then, the natural logarithm of the denominator in the rates, e.g., log-population at risk.

    -

    For Poisson models, the spatial method returns the parameter vector phi, which is the log-risk minus the intercept and any covariates:

           phi = lambda - Mu.
    -
    +

    For Poisson models, the spatial method returns the parameter vector phi, which is the log-risk minus the intercept and any covariates:

    phi = lambda - Mu.
    -

    This is the spatial autocorrelation component. This is equivalent to specifying the model as:

            Y ~ Poisson(exp(offset + Mu + phi))
    +

    This is the spatial autocorrelation component. This is equivalent to specifying the model as:

            Y ~ Poisson(exp(offset + Mu + phi))
             phi ~ MVGauss(0, Sigma)
             Sigma = (I - rho C)^-1 * M * tau^2.
    -
    +
    -

    In the Poisson CAR model, phi contains a latent spatial trend as well as additional variation around it. If you would like to extract the latent/implicit spatial trend from phi, you can do so by calculating (following Cressie 2015, p. 564):

            trend = rho * C * phi.
    -
    +

    In the Poisson CAR model, phi contains a latent spatial trend as well as additional variation around it. If you would like to extract the latent/implicit spatial trend from phi, you can do so by calculating (following Cressie 2015, p. 564):

    trend = rho * C * phi.
    +
    -

    Binomial

    +
    +

    Binomial

    -

    For family = binomial(), the model is specified as:

            Y ~ Binomial(N, theta)
    +

    For family = binomial(), the model is specified as:

            Y ~ Binomial(N, theta)
             logit(theta) ~ MVGauss(Mu, Sigma)
             Sigma = (I - rho C)^-1 * M * tau^2
    -
    +

    where outcome data Y are counts, N is the number of trials, and theta is the 'success' rate. Note that the model formula should be structured as: cbind(sucesses, failures) ~ x, such that trials = successes + failures.

    -

    For fitted Binomial models, the spatial method will return the parameter vector phi, equivalent to:

            phi = logit(theta) - Mu.
    -
    +

    For fitted Binomial models, the spatial method will return the parameter vector phi, equivalent to:

    phi = logit(theta) - Mu.
    +
    -

    Spatially lagged covariates (SLX)

    +
    +

    Spatially lagged covariates (SLX)

    -

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, \...)
    -
    +

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, \...)
    +
    -

    is a shortcut for

    stan_glm(y ~ I(W \%*\% x1) + x1 + x2, \...)
    -
    +

    is a shortcut for

    stan_glm(y ~ I(W \%*\% x1) + x1 + x2, \...)
    +
    -

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    +

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    For measurement error (ME) models, the SLX argument is the only way to include spatially lagged covariates since the SLX term needs to be re-calculated on each iteration of the MCMC algorithm.

    +
    -

    Measurement error (ME) models

    +
    +

    Measurement error (ME) models

    -

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates. With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
    +

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates. With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
            x_true ~ MVGauss(mu, Sigma)
            Sigma = (I - rho C)^(-1) M * tau^2
            mu ~ Gauss(0, 100)
            tau ~ student_t(10, 0, 40)
            rho ~ uniform(lower_bound, upper_bound)
    -
    +
    -

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

           x ~ Gauss(x_true, se)
    -       x_true ~ student_t(df, mu, sigma)
    -       df ~ gamma(3, 0.2)
    -       mu ~ Gauss(0, 100)
    -       sigma ~ student_t(10, 0, 40)
    -
    +

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

    x ~ Gauss(x_true, se)
    +       x_true ~ student_t(df, mu, sigma)
    +       df ~ gamma(3, 0.2)
    +       mu ~ Gauss(0, 100)
    +       sigma ~ student_t(10, 0, 40)
    -

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

           x ~ Gauss(x_true, se)
    -      logit(x_true) ~ MVGauss(mu, Sigma)
    -
    +

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

    x ~ Gauss(x_true, se)
    +      logit(x_true) ~ MVGauss(mu, Sigma)

    and similar for the Student t model.

    +
    -

    Censored counts

    +
    +

    Censored counts

    Vital statistics systems and disease surveillance programs typically suppress case counts when they are smaller than a specific theshold value. In such cases, the observation of a censored count is not the same as a missing value; instead, you are informed that the value is an integer somewhere between zero and the threshold value. For Poisson models (family = poisson())), you can use the censor_point argument to encode this information into your model.

    -

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    -
    +

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    +
    -

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    -
    +

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    +

    For example, the US Centers for Disease Control and Prevention's CDC WONDER database censors all death counts between 0 and 9. To model CDC WONDER mortality data, you could provide censor_point = 9 and then the likelihood statmenet for censored counts would equal the summation of the Poisson probability mass function over each integer ranging from zero through 9 (inclusive), conditional on the fitted values (i.e., all model paramters). See Donegan (2021) for additional discussion, references, and Stan code.

    +
    + +
    +
    +

    Author

    +

    Connor Donegan, Connor.Donegan@UTDallas.edu

    +
    -

    Author

    - -

    Connor Donegan, Connor.Donegan@UTDallas.edu

    - -

    Examples

    -
    
    -# \donttest{
    -
    -library(ggplot2)
    -library(bayesplot)
    -library(sf)
    -# for automatic parallel processing
    -##options(mc.cores = parallel::detectCores())
    -
    -
    -# model incidence or mortality rates
    -data(georgia)
    -C <- shape2mat(georgia, style = "B")
    -cp <- prep_car_data(C)
    -
    -fit <- stan_car(deaths.male ~ offset(log(pop.at.risk.male)),
    -                car_parts = cp,
    -                data = georgia,
    -                family = poisson())
    -
    -rstan::stan_rhat(fit$stanfit)
    -rstan::stan_mcse(fit$stanfit)
    -print(fit)
    -sp_diag(fit, georgia)
    -
    -# censored count outcomes
    -# (increasing adapt_delta to 0.99 can help
    -#  to avoid divergent transitions, when needed)
    -sum(is.na(georgia$deaths.female))
    -fit <- stan_car(deaths.female ~ offset(log(pop.at.risk.female)),
    -                car_parts = cp,
    -                data = georgia,
    -                family = poisson(),
    -    ##   control = list(adapt_delta = 0.99, max_treedepth=12),
    -                censor_point = 9)
    -
    -# model observed/expected incidence
    -# in this case, prison sentences 
    -data(sentencing)
    -
    -C <- shape2mat(sentencing, style = "B")
    -cp <- prep_car_data(C, style = "WCAR")
    -log_e <- log(sentencing$expected_sents)
    -fit.car <- stan_car(sents ~ offset(log_e),
    -                    family = poisson(),
    -                    data = sentencing,
    -                    car_parts = cp)
    -
    -# MCMC diagnostics
    -rstan::stan_rhat(fit.car$stanfit)
    -rstan::stan_ess(fit.car$stanfit)
    -
    -# Spatial diagnostics
    -sp_diag(fit.car, sentencing)
    -
    -# posterior predictive distribution
    -yrep <- posterior_predict(fit.car, S = 75)
    -y <- sentencing$sents
    -bayesplot::ppc_dens_overlay(y, yrep)
    -
    -# examine posterior distributions of CAR parameters
    -plot(fit.car, pars = c("car_scale", "car_rho"))
    -
    -# map the spatial autocorrelation term, phi
    -sp.trend <- spatial(fit.car)$mean
    -ggplot(sf::st_as_sf(sentencing)) +
    -  geom_sf(aes(fill = sp.trend)) +
    -  scale_fill_gradient2()
    +    
    +

    Examples

    +
    
    +# \donttest{
    +
    +library(ggplot2)
    +library(bayesplot)
    +library(sf)
    +# for automatic parallel processing
    +##options(mc.cores = parallel::detectCores())
    +
    +
    +# model incidence or mortality rates
    +data(georgia)
    +C <- shape2mat(georgia, style = "B")
    +cp <- prep_car_data(C)
    +
    +fit <- stan_car(deaths.male ~ offset(log(pop.at.risk.male)),
    +                car_parts = cp,
    +                data = georgia,
    +                family = poisson())
    +
    +rstan::stan_rhat(fit$stanfit)
    +rstan::stan_mcse(fit$stanfit)
    +print(fit)
    +sp_diag(fit, georgia)
    +
    +# censored count outcomes
    +# (increasing adapt_delta to 0.99 can help
    +#  to avoid divergent transitions, when needed)
    +sum(is.na(georgia$deaths.female))
    +fit <- stan_car(deaths.female ~ offset(log(pop.at.risk.female)),
    +                car_parts = cp,
    +                data = georgia,
    +                family = poisson(),
    +    ##   control = list(adapt_delta = 0.99, max_treedepth=12),
    +                censor_point = 9)
    +
    +# model observed/expected incidence
    +# in this case, prison sentences 
    +data(sentencing)
    +
    +C <- shape2mat(sentencing, style = "B")
    +cp <- prep_car_data(C, style = "WCAR")
    +log_e <- log(sentencing$expected_sents)
    +fit.car <- stan_car(sents ~ offset(log_e),
    +                    family = poisson(),
    +                    data = sentencing,
    +                    car_parts = cp)
    +
    +# MCMC diagnostics
    +rstan::stan_rhat(fit.car$stanfit)
    +rstan::stan_ess(fit.car$stanfit)
    +
    +# Spatial diagnostics
    +sp_diag(fit.car, sentencing)
    +
    +# posterior predictive distribution
    +yrep <- posterior_predict(fit.car, S = 75)
    +y <- sentencing$sents
    +bayesplot::ppc_dens_overlay(y, yrep)
    +
    +# examine posterior distributions of CAR parameters
    +plot(fit.car, pars = c("car_scale", "car_rho"))
    +
    +# map the spatial autocorrelation term, phi
    +sp.trend <- spatial(fit.car)$mean
    +ggplot(sf::st_as_sf(sentencing)) +
    +  geom_sf(aes(fill = sp.trend)) +
    +  scale_fill_gradient2()
      
    -# calculate log-standardized sentencing ratios (log-SSRs)
    -# (like Standardized Incidence Ratios: observed/exected case counts)
    -SSR <- fitted(fit.car)$mean
    -log.SSR <- log( SSR, base = 2 )
    -ggplot(sf::st_as_sf(sentencing)) +
    - geom_sf(aes(fill = log.SSR)) +
    - scale_fill_gradient2(
    -   midpoint = 0,
    -   name = NULL,
    -   breaks = seq(-3, 3, by = 0.5),
    -   low = "navy",
    -   high = "darkred"
    -   ) +
    - labs(title = "Log-Standardized Sentencing Ratios",
    -      subtitle = "log( Fitted/Expected ), base 2"
    -      ) +
    - theme_void() +
    - theme(
    -   legend.position = "bottom",
    -   legend.key.height = unit(0.35, "cm"),
    -   legend.key.width = unit(1.5, "cm")
    - )
    -
    -## DCAR specification (inverse-distance based)
    -library(sf)
    -A <- shape2mat(georgia, "B")
    -D <- sf::st_distance(sf::st_centroid(georgia))
    -A <- D * A
    -cp <- prep_car_data(A, "DCAR", k = 1)
    -
    -# view distance against inverse distance: (A+gamma)^(-k) / max(C)
    -plot(as.numeric(A),
    -    as.numeric(cp$C)
    -    )
    -
    -fit <- stan_car(deaths.male ~ offset(log(pop.at.risk.male)),
    -               data = georgia,
    -               car = cp,
    -               family = poisson())
    -
    -sp_diag(fit, georgia)
    -
    -# }
    +# calculate log-standardized sentencing ratios (log-SSRs)
    +# (like Standardized Incidence Ratios: observed/exected case counts)
    +SSR <- fitted(fit.car)$mean
    +log.SSR <- log( SSR, base = 2 )
    +ggplot(sf::st_as_sf(sentencing)) +
    + geom_sf(aes(fill = log.SSR)) +
    + scale_fill_gradient2(
    +   midpoint = 0,
    +   name = NULL,
    +   breaks = seq(-3, 3, by = 0.5),
    +   low = "navy",
    +   high = "darkred"
    +   ) +
    + labs(title = "Log-Standardized Sentencing Ratios",
    +      subtitle = "log( Fitted/Expected ), base 2"
    +      ) +
    + theme_void() +
    + theme(
    +   legend.position = "bottom",
    +   legend.key.height = unit(0.35, "cm"),
    +   legend.key.width = unit(1.5, "cm")
    + )
    +
    +## DCAR specification (inverse-distance based)
    +library(sf)
    +A <- shape2mat(georgia, "B")
    +D <- sf::st_distance(sf::st_centroid(georgia))
    +A <- D * A
    +cp <- prep_car_data(A, "DCAR", k = 1)
    +
    +# view distance against inverse distance: (A+gamma)^(-k) / max(C)
    +plot(as.numeric(A),
    +    as.numeric(cp$C)
    +    )
    +
    +fit <- stan_car(deaths.male ~ offset(log(pop.at.risk.male)),
    +               data = georgia,
    +               car = cp,
    +               family = poisson())
    +
    +sp_diag(fit, georgia)
    +
    +# }
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/stan_esf-1.png b/docs/reference/stan_esf-1.png deleted file mode 100644 index 3c8fa3b2..00000000 Binary files a/docs/reference/stan_esf-1.png and /dev/null differ diff --git a/docs/reference/stan_esf-10.png b/docs/reference/stan_esf-10.png deleted file mode 100644 index ea9ef477..00000000 Binary files a/docs/reference/stan_esf-10.png and /dev/null differ diff --git a/docs/reference/stan_esf-2.png b/docs/reference/stan_esf-2.png deleted file mode 100644 index 3da4b415..00000000 Binary files a/docs/reference/stan_esf-2.png and /dev/null differ diff --git a/docs/reference/stan_esf-3.png b/docs/reference/stan_esf-3.png deleted file mode 100644 index 3da4b415..00000000 Binary files a/docs/reference/stan_esf-3.png and /dev/null differ diff --git a/docs/reference/stan_esf-4.png b/docs/reference/stan_esf-4.png deleted file mode 100644 index 6c0d3f4d..00000000 Binary files a/docs/reference/stan_esf-4.png and /dev/null differ diff --git a/docs/reference/stan_esf-5.png b/docs/reference/stan_esf-5.png deleted file mode 100644 index 80dd4aa3..00000000 Binary files a/docs/reference/stan_esf-5.png and /dev/null differ diff --git a/docs/reference/stan_esf-6.png b/docs/reference/stan_esf-6.png deleted file mode 100644 index 846abfc1..00000000 Binary files a/docs/reference/stan_esf-6.png and /dev/null differ diff --git a/docs/reference/stan_esf-7.png b/docs/reference/stan_esf-7.png deleted file mode 100644 index a904f727..00000000 Binary files a/docs/reference/stan_esf-7.png and /dev/null differ diff --git a/docs/reference/stan_esf-8.png b/docs/reference/stan_esf-8.png deleted file mode 100644 index 66b41b26..00000000 Binary files a/docs/reference/stan_esf-8.png and /dev/null differ diff --git a/docs/reference/stan_esf-9.png b/docs/reference/stan_esf-9.png deleted file mode 100644 index 523e4756..00000000 Binary files a/docs/reference/stan_esf-9.png and /dev/null differ diff --git a/docs/reference/stan_esf.html b/docs/reference/stan_esf.html index 7ebee512..516be79e 100644 --- a/docs/reference/stan_esf.html +++ b/docs/reference/stan_esf.html @@ -1,93 +1,18 @@ - - - - - - - -Spatial filtering — stan_esf • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Spatial filtering — stan_esf • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,332 +61,328 @@

    Spatial filtering

    Fit a spatial regression model using eigenvector spatial filtering (ESF).

    -
    stan_esf(
    -  formula,
    -  slx,
    -  re,
    -  data,
    -  C,
    -  EV = make_EV(C, nsa = nsa, threshold = threshold),
    -  nsa = FALSE,
    -  threshold = 0.25,
    -  family = gaussian(),
    -  prior = NULL,
    -  ME = NULL,
    -  centerx = FALSE,
    -  censor_point,
    -  prior_only = FALSE,
    -  chains = 4,
    -  iter = 2000,
    -  refresh = 500,
    -  pars = NULL,
    -  control = NULL,
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    formula

    A model formula, following the R formula syntax. Binomial models are specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    slx

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    re

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    -       alpha_tau ~ Student_t(d.f., location, scale).
    -
    data

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    C

    Spatial connectivity matrix which will be used to calculate eigenvectors, if EV is not provided by the user. Typically, the binary connectivity matrix is best for calculating eigenvectors (i.e., using C = shape2mat(shape, style = "B")). This matrix will also be used to calculate residual spatial autocorrelation and any user specified slx terms; it will be row-standardized before calculating slx terms. See shape2mat.

    EV

    A matrix of eigenvectors from any (transformed) connectivity matrix, presumably spatial (see make_EV). If EV is provided, still also provide a spatial weights matrix C for other purposes; threshold and nsa are ignored for user provided EV.

    nsa

    Include eigenvectors representing negative spatial autocorrelation? Defaults to nsa = FALSE. This is ignored if EV is provided.

    threshold

    Eigenvectors with standardized Moran coefficient values below this threshold value will be excluded from the candidate set of eigenvectors, EV. This defaults to threshold = 0.25, and is ignored if EV is provided.

    family

    The likelihood function for the outcome variable. Current options are family = gaussian(), student_t() and poisson(link = "log"), and binomial(link = "logit").

    prior

    A named list of parameters for prior distributions (see priors):

    - -
    intercept

    The intercept is assigned a Gaussian prior distribution (see normal

    . - -
    beta

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    - -
    sigma

    For family = gaussian() and family = student_t() models, the scale parameter, sigma, is assigned a (half-) Student's t prior distribution. The half-Student's t prior for sigma is constrained to be positive.

    - -
    nu

    nu is the degrees of freedom parameter in the Student's t likelihood (only used when family = student_t()). nu is assigned a gamma prior distribution. The default prior is prior = list(nu = gamma(alpha = 3, beta = 0.2)).

    - -
    tau

    The scale parameter for random effects, or varying intercepts, terms. This scale parameter, tau, is assigned a half-Student's t prior. To set this, use, e.g., prior = list(tau = student_t(df = 20, location = 0, scale = 20)).

    - -
    beta_ev

    The eigenvector coefficients are assigned the horseshoe prior (Piironen and Vehtari, 2017), parameterized by global_scale (to control overall prior sparsity), plus the degrees of freedom and scale of a Student's t model for any large coefficients (see priors). To allow the spatial filter to account for a greater amount of spatial autocorrelation (i.e., if you find the residuals contain spatial autocorrelation), increase the global scale parameter (to a maximum of global_scale = 1).

    - -
    ME

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    centerx

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    censor_point

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths. For example, the US Centers for Disease Control and Prevention censors (does not report) death counts that are nine or fewer, so if you're using CDC WONDER mortality data you could provide censor_point = 9.

    prior_only

    Draw samples from the prior distributions of parameters only.

    chains

    Number of MCMC chains to estimate. Default chains = 4.

    iter

    Number of samples per chain. Default iter = 2000.

    refresh

    Stan will print the progress of the sampler every refresh number of samples. Defaults to 500; set refresh=0 to silence this.

    pars

    Optional; specify any additional parameters you'd like stored from the Stan model.

    control

    A named list of parameters to control the sampler's behavior. See stan for details.

    ...

    Other arguments passed to sampling.

    - -

    Source

    - -

    Chun, Y., D. A. Griffith, M. Lee and P. Sinha (2016). Eigenvector selection with stepwise regression techniques to construct eigenvector spatial filters. Journal of Geographical Systems, 18(1), 67-85. doi: 10.1007/s10109-015-0225-3 +

    +
    stan_esf(
    +  formula,
    +  slx,
    +  re,
    +  data,
    +  C,
    +  EV = make_EV(C, nsa = nsa, threshold = threshold),
    +  nsa = FALSE,
    +  threshold = 0.25,
    +  family = gaussian(),
    +  prior = NULL,
    +  ME = NULL,
    +  centerx = FALSE,
    +  censor_point,
    +  prior_only = FALSE,
    +  chains = 4,
    +  iter = 2000,
    +  refresh = 500,
    +  pars = NULL,
    +  control = NULL,
    +  ...
    +)
    +
    + +
    +

    Source

    +

    Chun, Y., D. A. Griffith, M. Lee and P. Sinha (2016). Eigenvector selection with stepwise regression techniques to construct eigenvector spatial filters. Journal of Geographical Systems, 18(1), 67-85. doi: 10.1007/s10109-015-0225-3 .

    Dray, S., P. Legendre & P. R. Peres-Neto (2006). Spatial modelling: a comprehensive framework for principal coordinate analysis of neighbour matrices (PCNM). Ecological Modeling, 196(3-4), 483-493.

    -

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. Spatial Statistics. doi: 10.1016/j.spasta.2020.100450 - (open access: doi: 10.31219/osf.io/fah3z +

    Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. Spatial Statistics. doi: 10.1016/j.spasta.2020.100450 + (open access: doi: 10.31219/osf.io/fah3z ).

    -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    -

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    +

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 .

    Griffith, Daniel A., and P. R. Peres-Neto (2006). Spatial modeling in ecology: the flexibility of eigenfunction spatial analyses. Ecology 87(10), 2603-2613.

    Griffith, D., and Y. Chun (2014). Spatial autocorrelation and spatial filtering, Handbook of Regional Science. Fischer, MM and Nijkamp, P. eds.

    Griffith, D., Chun, Y. and Li, B. (2019). Spatial Regression Analysis Using Eigenvector Spatial Filtering. Elsevier.

    Piironen, J and A. Vehtari (2017). Sparsity information and regularization in the horseshoe and other shrinkage priors. In Electronic Journal of Statistics, 11(2):5018-5051.

    -

    Value

    - -

    An object of class class geostan_fit (a list) containing:

    -
    summary

    Summaries of the main parameters of interest; a data frame

    -
    diagnostic

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    -
    data

    a data frame containing the model data

    -
    EV

    A matrix of eigenvectors created with w and geostan::make_EV

    -
    C

    The spatial weights matrix used to construct EV

    -
    family

    the user-provided or default family argument used to fit the model

    -
    formula

    The model formula provided by the user (not including ESF component)

    -
    slx

    The slx formula

    -
    re

    A list containing re, the random effects (varying intercepts) formula if provided, and +

    +
    +

    Arguments

    +
    formula
    +

    A model formula, following the R formula syntax. Binomial models are specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    +
    slx
    +

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    +
    re
    +

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    +       alpha_tau ~ Student_t(d.f., location, scale).
    +
    +
    data
    +

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    +
    C
    +

    Spatial connectivity matrix which will be used to calculate eigenvectors, if EV is not provided by the user. Typically, the binary connectivity matrix is best for calculating eigenvectors (i.e., using C = shape2mat(shape, style = "B")). This matrix will also be used to calculate residual spatial autocorrelation and any user specified slx terms; it will be row-standardized before calculating slx terms. See shape2mat.

    +
    EV
    +

    A matrix of eigenvectors from any (transformed) connectivity matrix, presumably spatial (see make_EV). If EV is provided, still also provide a spatial weights matrix C for other purposes; threshold and nsa are ignored for user provided EV.

    +
    nsa
    +

    Include eigenvectors representing negative spatial autocorrelation? Defaults to nsa = FALSE. This is ignored if EV is provided.

    +
    threshold
    +

    Eigenvectors with standardized Moran coefficient values below this threshold value will be excluded from the candidate set of eigenvectors, EV. This defaults to threshold = 0.25, and is ignored if EV is provided.

    +
    family
    +

    The likelihood function for the outcome variable. Current options are family = gaussian(), student_t() and poisson(link = "log"), and binomial(link = "logit").

    +
    prior
    +

    A named list of parameters for prior distributions (see priors):

    intercept
    +

    The intercept is assigned a Gaussian prior distribution (see normal

    +. + +
    beta
    +

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    + + +
    sigma
    +

    For family = gaussian() and family = student_t() models, the scale parameter, sigma, is assigned a (half-) Student's t prior distribution. The half-Student's t prior for sigma is constrained to be positive.

    + + +
    nu
    +

    nu is the degrees of freedom parameter in the Student's t likelihood (only used when family = student_t()). nu is assigned a gamma prior distribution. The default prior is prior = list(nu = gamma(alpha = 3, beta = 0.2)).

    + + +
    tau
    +

    The scale parameter for random effects, or varying intercepts, terms. This scale parameter, tau, is assigned a half-Student's t prior. To set this, use, e.g., prior = list(tau = student_t(df = 20, location = 0, scale = 20)).

    + + +
    beta_ev
    +

    The eigenvector coefficients are assigned the horseshoe prior (Piironen and Vehtari, 2017), parameterized by global_scale (to control overall prior sparsity), plus the degrees of freedom and scale of a Student's t model for any large coefficients (see priors). To allow the spatial filter to account for a greater amount of spatial autocorrelation (i.e., if you find the residuals contain spatial autocorrelation), increase the global scale parameter (to a maximum of global_scale = 1).

    + + +
    +
    ME
    +

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    +
    centerx
    +

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    +
    censor_point
    +

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths. For example, the US Centers for Disease Control and Prevention censors (does not report) death counts that are nine or fewer, so if you're using CDC WONDER mortality data you could provide censor_point = 9.

    +
    prior_only
    +

    Draw samples from the prior distributions of parameters only.

    +
    chains
    +

    Number of MCMC chains to estimate. Default chains = 4.

    +
    iter
    +

    Number of samples per chain. Default iter = 2000.

    +
    refresh
    +

    Stan will print the progress of the sampler every refresh number of samples. Defaults to 500; set refresh=0 to silence this.

    +
    pars
    +

    Optional; specify any additional parameters you'd like stored from the Stan model.

    +
    control
    +

    A named list of parameters to control the sampler's behavior. See stan for details.

    +
    ...
    +

    Other arguments passed to sampling.

    +
    +
    +

    Value

    +

    An object of class class geostan_fit (a list) containing:

    summary
    +

    Summaries of the main parameters of interest; a data frame

    + +
    diagnostic
    +

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    + +
    data
    +

    a data frame containing the model data

    + +
    EV
    +

    A matrix of eigenvectors created with w and geostan::make_EV

    + +
    C
    +

    The spatial weights matrix used to construct EV

    + +
    family
    +

    the user-provided or default family argument used to fit the model

    + +
    formula
    +

    The model formula provided by the user (not including ESF component)

    + +
    slx
    +

    The slx formula

    + +
    re
    +

    A list containing re, the random effects (varying intercepts) formula if provided, and data a data frame with columns id, the grouping variable, and idx, the index values assigned to each group.

    -
    priors

    Prior specifications.

    -
    x_center

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    -
    ME

    The ME data list, if one was provided by the user for measurement error models.

    -
    spatial

    A data frame with the name of the spatial component parameter ("esf") and method ("ESF")

    -
    stanfit

    an object of class stanfit returned by rstan::stan

    -
    +
    priors
    +

    Prior specifications.

    + +
    x_center
    +

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    + +
    ME
    +

    The ME data list, if one was provided by the user for measurement error models.

    + +
    spatial
    +

    A data frame with the name of the spatial component parameter ("esf") and method ("ESF")

    -

    Details

    +
    stanfit
    +

    an object of class stanfit returned by rstan::stan

    + +
    +
    +

    Details

    Eigenvector spatial filtering (ESF) is a method for spatial regression analysis. ESF is extensively covered in Griffith et al. (2019). This function implements the methodology introduced in Donegan et al. (2020), which uses Piironen and Vehtari's (2017) regularized horseshoe prior.

    ESF decomposes spatial autocorrelation into a linear combination of various patterns, typically at different scales (such as local, regional, and global trends). By adding a spatial filter to a regression model, any spatial autocorrelation is shifted from the residuals to the spatial filter. ESF models take the spectral decomposition of a transformed spatial connectivity matrix, C. The resulting eigenvectors, EV, are mutually orthogonal and uncorrelated map patterns. The spatial filter is EV * beta_ev, where beta_ev is a vector of coefficients.

    -

    ESF decomposes the data into a global mean, alpha, global patterns contributed by covariates, X * beta, spatial trends, EV * beta_ev, and residual variation. Thus, for family=gaussian(),

            Y ~ Gauss(alpha + X * beta + EV * beta_ev, sigma).
    -
    +

    ESF decomposes the data into a global mean, alpha, global patterns contributed by covariates, X * beta, spatial trends, EV * beta_ev, and residual variation. Thus, for family=gaussian(),

            Y ~ Gauss(alpha + X * beta + EV * beta_ev, sigma).
    +
    -

    An ESF component can be incorporated into the linear predictor of any generalized linear model. For example, a spatial Poisson model for rare disease incidence may be specified as follows:

            Y ~ Poisson(exp(offset + Mu))
    -        Mu = alpha + EV * beta_ev + A
    -        A ~ Guass(0, tau)
    -        tau ~ student(20, 0, 2)
    -        beta_ev ~ horseshoe(.)
    -
    +

    An ESF component can be incorporated into the linear predictor of any generalized linear model. For example, a spatial Poisson model for rare disease incidence may be specified as follows:

    Y ~ Poisson(exp(offset + Mu))
    +        Mu = alpha + EV * beta_ev + A
    +        A ~ Guass(0, tau)
    +        tau ~ student(20, 0, 2)
    +        beta_ev ~ horseshoe(.)
    -

    The spatial.geostan_fit method will return EV * beta.

    -

    The model can also be extended to the space-time domain; see shape2mat to specify a space-time connectivity matrix.

    -

    The coefficients beta_ev are assigned the regularized horseshoe prior (Piironen and Vehtari, 2017), resulting in a relatively sparse model specification. In addition, numerous eigenvectors are automatically dropped because they represent trace amounts of spatial autocorrelation (this is controlled by the threshold argument). By default, stan_esf will drop all eigenvectors representing negative spatial autocorrelation patterns. You can change this behavior using the nsa argument.

    Spatially lagged covariates (SLX)

    +

    The spatial.geostan_fit method will return EV * beta.

    +

    The model can also be extended to the space-time domain; see shape2mat to specify a space-time connectivity matrix.

    +

    The coefficients beta_ev are assigned the regularized horseshoe prior (Piironen and Vehtari, 2017), resulting in a relatively sparse model specification. In addition, numerous eigenvectors are automatically dropped because they represent trace amounts of spatial autocorrelation (this is controlled by the threshold argument). By default, stan_esf will drop all eigenvectors representing negative spatial autocorrelation patterns. You can change this behavior using the nsa argument.

    +

    Spatially lagged covariates (SLX)

    -

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, ...)
    -
    +

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, ...)
    -

    is a shortcut for

    stan_glm(y ~ I(W %*% x1) + x1 + x2, ...)
    -
    +

    is a shortcut for

    stan_glm(y ~ I(W %*% x1) + x1 + x2, ...)
    -

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    +

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    For measurement error (ME) models, the SLX argument is the only way to include spatially lagged covariates since the SLX term needs to be re-calculated on each iteration of the MCMC algorithm.

    +
    -

    Measurement error (ME) models

    +
    +

    Measurement error (ME) models

    -

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates (Donegan et al. 2021; Donegan 2021). With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
    +

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates (Donegan et al. 2021; Donegan 2021). With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
            x_true ~ MVGauss(mu, Sigma)
            Sigma = (I - rho * C)^(-1) M * tau^2
            mu ~ Gauss(0, 100)
            tau ~ student_t(10, 0, 40)
            rho ~ uniform(lower_bound, upper_bound)
    -
    +
    -

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

           x ~ Gauss(x_true, se)
    -       x_true ~ student_t(df, mu, sigma)
    -       df ~ gamma(3, 0.2)
    -       mu ~ Gauss(0, 100)
    -       sigma ~ student_t(10, 0, 40)
    -
    +

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

    x ~ Gauss(x_true, se)
    +       x_true ~ student_t(df, mu, sigma)
    +       df ~ gamma(3, 0.2)
    +       mu ~ Gauss(0, 100)
    +       sigma ~ student_t(10, 0, 40)
    -

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

           x ~ Gauss(x_true, se)
    -      logit(x_true) ~ MVGauss(mu, Sigma)
    -
    +

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

    x ~ Gauss(x_true, se)
    +      logit(x_true) ~ MVGauss(mu, Sigma)

    and similar for the Student t model.

    +
    -

    Censored counts

    +
    +

    Censored counts

    Vital statistics systems and disease surveillance programs typically suppress case counts when they are smaller than a specific theshold value. In such cases, the observation of a censored count is not the same as a missing value; instead, you are informed that the value is an integer somewhere between zero and the threshold value. For Poisson models (family = poisson())), you can use the censor_point argument to encode this information into your model.

    -

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    -
    +

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    +
    -

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    -
    +

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    +

    For example, the US Centers for Disease Control and Prevention's CDC WONDER database censors all death counts between 0 and 9. To model CDC WONDER mortality data, you could provide censor_point = 9 and then the likelihood statmenet for censored counts would equal the summation of the Poisson probability mass function over each integer ranging from zero through 9 (inclusive), conditional on the fitted values (i.e., all model paramters). See Donegan (2021) for additional discussion, references, and Stan code.

    +
    + +
    +
    +

    Author

    +

    Connor Donegan, Connor.Donegan@UTDallas.edu

    +
    -

    Author

    - -

    Connor Donegan, Connor.Donegan@UTDallas.edu

    - -

    Examples

    -
    # \donttest{
    -library(ggplot2)
    -library(sf)
    -library(bayesplot)
    -data(sentencing)
    -
    -# spatial weights matrix with binary coding scheme
    -C <- shape2mat(sentencing, style = "B")
    -
    -# log-expected number of sentences
    -## expected counts are based on county racial composition and mean sentencing rates
    -log_e <- log(sentencing$expected_sents)
    -
    -# fit spatial Poisson model with ESF + unstructured 'random effects'
    -fit.esf <- stan_esf(sents ~ offset(log_e),
    -                   re = ~ name,
    -                   family = poisson(),
    -                   data = sentencing,
    -                   C = C,
    -                   refresh = 0
    -)
    -
    -# spatial diagnostics 
    -sp_diag(fit.esf, sentencing)
    -
    -# plot marginal posterior distributions of beta_ev (eigenvector coefficients)
    -plot(fit.esf, pars = "beta_ev")
    -
    -# plot the marginal posterior distributions of the spatial filter (ESF * beta_ev)
    -plot(fit.esf, pars = "beta_ev")
    -
    -# posterior predictive distribution
    -yrep <- posterior_predict(fit.esf, S = 75)
    -y <- sentencing$sents
    -bayesplot::ppc_dens_overlay(y, yrep) 
    -
    -# map the spatial filter
    -sp.filter <- spatial(fit.esf)$mean
    -st_as_sf(sentencing) %>%
    - ggplot() +
    - geom_sf(aes(fill = sp.filter)) +
    - scale_fill_gradient2()
    -
    -# calculate log-standardized sentencing ratios (log-SSRs)
    -f <- fitted(fit.esf)$mean
    -SSR <-  f / sentencing$expected_sents
    -log.SSR <- log( SSR, base = 2 )
    -
    -# map the log-SSRs
    -st_as_sf(sentencing) %>%
    - ggplot() +
    - geom_sf(aes(fill = log.SSR)) +
    - scale_fill_gradient2(
    -   midpoint = 0,
    -   name = NULL,
    -   breaks = seq(-3, 3, by = 0.5)
    - ) +
    - labs(title = "Log-Standardized Sentencing Ratios",
    -      subtitle = "log( Fitted/Expected ), base 2"
    - ) +
    - theme_void() +
    - theme(
    -   legend.position = "bottom",
    -   legend.key.height = unit(0.35, "cm"),
    -   legend.key.width = unit(1.5, "cm")
    - )
    -
    -# } 
    +    
    +

    Examples

    +
    # \donttest{
    +library(ggplot2)
    +library(sf)
    +library(bayesplot)
    +data(sentencing)
    +
    +# spatial weights matrix with binary coding scheme
    +C <- shape2mat(sentencing, style = "B")
    +
    +# log-expected number of sentences
    +## expected counts are based on county racial composition and mean sentencing rates
    +log_e <- log(sentencing$expected_sents)
    +
    +# fit spatial Poisson model with ESF + unstructured 'random effects'
    +fit.esf <- stan_esf(sents ~ offset(log_e),
    +                   re = ~ name,
    +                   family = poisson(),
    +                   data = sentencing,
    +                   C = C,
    +                   refresh = 0
    +)
    +
    +# spatial diagnostics 
    +sp_diag(fit.esf, sentencing)
    +
    +# plot marginal posterior distributions of beta_ev (eigenvector coefficients)
    +plot(fit.esf, pars = "beta_ev")
    +
    +# plot the marginal posterior distributions of the spatial filter (ESF * beta_ev)
    +plot(fit.esf, pars = "beta_ev")
    +
    +# posterior predictive distribution
    +yrep <- posterior_predict(fit.esf, S = 75)
    +y <- sentencing$sents
    +bayesplot::ppc_dens_overlay(y, yrep) 
    +
    +# map the spatial filter
    +sp.filter <- spatial(fit.esf)$mean
    +st_as_sf(sentencing) %>%
    + ggplot() +
    + geom_sf(aes(fill = sp.filter)) +
    + scale_fill_gradient2()
    +
    +# calculate log-standardized sentencing ratios (log-SSRs)
    +f <- fitted(fit.esf)$mean
    +SSR <-  f / sentencing$expected_sents
    +log.SSR <- log( SSR, base = 2 )
    +
    +# map the log-SSRs
    +st_as_sf(sentencing) %>%
    + ggplot() +
    + geom_sf(aes(fill = log.SSR)) +
    + scale_fill_gradient2(
    +   midpoint = 0,
    +   name = NULL,
    +   breaks = seq(-3, 3, by = 0.5)
    + ) +
    + labs(title = "Log-Standardized Sentencing Ratios",
    +      subtitle = "log( Fitted/Expected ), base 2"
    + ) +
    + theme_void() +
    + theme(
    +   legend.position = "bottom",
    +   legend.key.height = unit(0.35, "cm"),
    +   legend.key.width = unit(1.5, "cm")
    + )
    +
    +# } 
     
     
    +
    +
    -
    - +

    - - - + diff --git a/docs/reference/stan_example.html b/docs/reference/stan_example.html deleted file mode 100644 index e4f87e09..00000000 --- a/docs/reference/stan_example.html +++ /dev/null @@ -1,198 +0,0 @@ - - - - - - - - -Example geostan model — stan_example • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Run an example geostan model.

    -
    - -
    stan_example(chains = 1, ...)
    - -

    Arguments

    - - - - - - - - - - -
    chains

    Number of MCMC chains to run.

    ...

    Any other arguments you would like to pass to stan_esf.

    - -

    Value

    - -

    Returns a geostan_fit object from a call to stan_esf using the ohio data.

    -

    See also

    - - - -

    Examples

    -
    -if (FALSE) { -fit = stan_example() -plot(fit) -} - -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/stan_glm-1.png b/docs/reference/stan_glm-1.png deleted file mode 100644 index b4540f88..00000000 Binary files a/docs/reference/stan_glm-1.png and /dev/null differ diff --git a/docs/reference/stan_glm-2.png b/docs/reference/stan_glm-2.png deleted file mode 100644 index 3e0e51b7..00000000 Binary files a/docs/reference/stan_glm-2.png and /dev/null differ diff --git a/docs/reference/stan_glm-3.png b/docs/reference/stan_glm-3.png deleted file mode 100644 index 5f2609b5..00000000 Binary files a/docs/reference/stan_glm-3.png and /dev/null differ diff --git a/docs/reference/stan_glm-4.png b/docs/reference/stan_glm-4.png deleted file mode 100644 index 838045db..00000000 Binary files a/docs/reference/stan_glm-4.png and /dev/null differ diff --git a/docs/reference/stan_glm-5.png b/docs/reference/stan_glm-5.png deleted file mode 100644 index e0037eae..00000000 Binary files a/docs/reference/stan_glm-5.png and /dev/null differ diff --git a/docs/reference/stan_glm-6.png b/docs/reference/stan_glm-6.png deleted file mode 100644 index 31b6ba22..00000000 Binary files a/docs/reference/stan_glm-6.png and /dev/null differ diff --git a/docs/reference/stan_glm.html b/docs/reference/stan_glm.html index d45c4b4e..b6ebab27 100644 --- a/docs/reference/stan_glm.html +++ b/docs/reference/stan_glm.html @@ -1,93 +1,18 @@ - - - - - - - -Generalized linear models — stan_glm • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Generalized linear models — stan_glm • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,266 +61,264 @@

    Generalized linear models

    Fit a generalized linear model.

    -
    stan_glm(
    -  formula,
    -  slx,
    -  re,
    -  data,
    -  C,
    -  family = gaussian(),
    -  prior = NULL,
    -  ME = NULL,
    -  centerx = FALSE,
    -  prior_only = FALSE,
    -  censor_point,
    -  chains = 4,
    -  iter = 2000,
    -  refresh = 1000,
    -  pars = NULL,
    -  control = NULL,
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    formula

    A model formula, following the R formula syntax. Binomial models are specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    slx

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    re

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    -       alpha_tau ~ Student_t(d.f., location, scale).
    -
    data

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    C

    Optional spatial connectivity matrix which will be used to calculate residual spatial autocorrelation as well as any user specified slx terms; it will automatically be row-standardized before calculating slx terms. See shape2mat.

    family

    The likelihood function for the outcome variable. Current options are poisson(link = "log"), binomial(link = "logit"), student_t(), and the default gaussian().

    prior

    A named list of parameters for prior distributions (see priors):

    - -
    intercept

    The intercept is assigned a Gaussian prior distribution (see normal

    . - -
    beta

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    - -
    sigma

    For family = gaussian() and family = student_t() models, the scale parameter, sigma, is assigned a (half-) Student's t prior distribution. The half-Student's t prior for sigma is constrained to be positive.

    - -
    nu

    nu is the degrees of freedom parameter in the Student's t likelihood (only used when family = student_t()). nu is assigned a gamma prior distribution. The default prior is prior = list(nu = gamma(alpha = 3, beta = 0.2)).

    - -
    tau

    The scale parameter for random effects, or varying intercepts, terms. This scale parameter, tau, is assigned a half-Student's t prior. To set this, use, e.g., prior = list(tau = student_t(df = 20, location = 0, scale = 20)).

    - -
    ME

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    centerx

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    prior_only

    Draw samples from the prior distributions of parameters only.

    censor_point

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths. For example, the US Centers for Disease Control and Prevention censors (does not report) death counts that are nine or fewer, so if you're using CDC WONDER mortality data you could provide censor_point = 9.

    chains

    Number of MCMC chains to estimate.

    iter

    Number of samples per chain.

    refresh

    Stan will print the progress of the sampler every refresh number of samples; set refresh=0 to silence this.

    pars

    Specify any additional parameters you'd like stored from the Stan model.

    control

    A named list of parameters to control the sampler's behavior. See stan for details.

    ...

    Other arguments passed to sampling. For multi-core processing, you can use cores = parallel::detectCores(), or run options(mc.cores = parallel::detectCores()) first.

    - -

    Source

    - -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    -

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 +

    +
    stan_glm(
    +  formula,
    +  slx,
    +  re,
    +  data,
    +  C,
    +  family = gaussian(),
    +  prior = NULL,
    +  ME = NULL,
    +  centerx = FALSE,
    +  prior_only = FALSE,
    +  censor_point,
    +  chains = 4,
    +  iter = 2000,
    +  refresh = 1000,
    +  pars = NULL,
    +  control = NULL,
    +  ...
    +)
    +
    + +
    +

    Source

    +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    +

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 .

    -

    Value

    - -

    An object of class class geostan_fit (a list) containing:

    -
    summary

    Summaries of the main parameters of interest; a data frame

    -
    diagnostic

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    -
    stanfit

    an object of class stanfit returned by rstan::stan

    -
    data

    a data frame containing the model data

    -
    family

    the user-provided or default family argument used to fit the model

    -
    formula

    The model formula provided by the user (not including ESF component)

    -
    slx

    The slx formula

    -
    re

    A list containing re, the random effects (varying intercepts) formula if provided, and +

    +
    +

    Arguments

    +
    formula
    +

    A model formula, following the R formula syntax. Binomial models are specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    +
    slx
    +

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    +
    re
    +

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    +       alpha_tau ~ Student_t(d.f., location, scale).
    +
    +
    data
    +

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    +
    C
    +

    Optional spatial connectivity matrix which will be used to calculate residual spatial autocorrelation as well as any user specified slx terms; it will automatically be row-standardized before calculating slx terms. See shape2mat.

    +
    family
    +

    The likelihood function for the outcome variable. Current options are poisson(link = "log"), binomial(link = "logit"), student_t(), and the default gaussian().

    +
    prior
    +

    A named list of parameters for prior distributions (see priors):

    intercept
    +

    The intercept is assigned a Gaussian prior distribution (see normal

    +. + +
    beta
    +

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    + + +
    sigma
    +

    For family = gaussian() and family = student_t() models, the scale parameter, sigma, is assigned a (half-) Student's t prior distribution. The half-Student's t prior for sigma is constrained to be positive.

    + + +
    nu
    +

    nu is the degrees of freedom parameter in the Student's t likelihood (only used when family = student_t()). nu is assigned a gamma prior distribution. The default prior is prior = list(nu = gamma(alpha = 3, beta = 0.2)).

    + + +
    tau
    +

    The scale parameter for random effects, or varying intercepts, terms. This scale parameter, tau, is assigned a half-Student's t prior. To set this, use, e.g., prior = list(tau = student_t(df = 20, location = 0, scale = 20)).

    + + +
    +
    ME
    +

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    +
    centerx
    +

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    +
    prior_only
    +

    Draw samples from the prior distributions of parameters only.

    +
    censor_point
    +

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths. For example, the US Centers for Disease Control and Prevention censors (does not report) death counts that are nine or fewer, so if you're using CDC WONDER mortality data you could provide censor_point = 9.

    +
    chains
    +

    Number of MCMC chains to estimate.

    +
    iter
    +

    Number of samples per chain.

    +
    refresh
    +

    Stan will print the progress of the sampler every refresh number of samples; set refresh=0 to silence this.

    +
    pars
    +

    Specify any additional parameters you'd like stored from the Stan model.

    +
    control
    +

    A named list of parameters to control the sampler's behavior. See stan for details.

    +
    ...
    +

    Other arguments passed to sampling. For multi-core processing, you can use cores = parallel::detectCores(), or run options(mc.cores = parallel::detectCores()) first.

    +
    +
    +

    Value

    +

    An object of class class geostan_fit (a list) containing:

    summary
    +

    Summaries of the main parameters of interest; a data frame

    + +
    diagnostic
    +

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    + +
    stanfit
    +

    an object of class stanfit returned by rstan::stan

    + +
    data
    +

    a data frame containing the model data

    + +
    family
    +

    the user-provided or default family argument used to fit the model

    + +
    formula
    +

    The model formula provided by the user (not including ESF component)

    + +
    slx
    +

    The slx formula

    + +
    re
    +

    A list containing re, the random effects (varying intercepts) formula if provided, and Data a data frame with columns id, the grouping variable, and idx, the index values assigned to each group.

    -
    priors

    Prior specifications.

    -
    x_center

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    -
    ME

    The ME data list, if one was provided by the user for measurement error models.

    -
    spatial

    NA, slot is maintained for use in geostan_fit methods.

    -
    +
    priors
    +

    Prior specifications.

    + +
    x_center
    +

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    + +
    ME
    +

    The ME data list, if one was provided by the user for measurement error models.

    + +
    spatial
    +

    NA, slot is maintained for use in geostan_fit methods.

    -

    Details

    -

    Fit a generalized linear model using the R formula interface. Default prior distributions are designed to be weakly informative relative to the data. Much of the functionality intended for spatial models, such as the ability to add spatially lagged covariates and observational error models, are also available in stan_glm. All of geostan's spatial models build on top of the same Stan code used in stan_glm.

    Poisson models and disease mapping

    +
    +
    +

    Details

    +

    Fit a generalized linear model using the R formula interface. Default prior distributions are designed to be weakly informative relative to the data. Much of the functionality intended for spatial models, such as the ability to add spatially lagged covariates and observational error models, are also available in stan_glm. All of geostan's spatial models build on top of the same Stan code used in stan_glm.

    +

    Poisson models and disease mapping

    In spatial statistics, Poisson models are often used to calculate incidence rates (mortality rates, or disease incidence rates) for administrative areas like counties or census tracts. If Y are counts of cases, and P are populations at risk, then the crude rates are Y/P. The purpose is to model risk, eta, for which crude rates are a (noisy) indicator. Our analysis should also respect the fact that the amount of information contained in the observations, Y/P, increases with P. Hierarchical Poisson models are often the best way to incorporate all of this information.

    -

    For the Poisson model, Y is specified as the outcome and the log of the population at risk, log(P), needs to be provided as an offset term. For such a case, disease incidence across the collection of areas could be modeled as:

            Y ~ Poisson(exp(log(P) + eta))
    +

    For the Poisson model, Y is specified as the outcome and the log of the population at risk, log(P), needs to be provided as an offset term. For such a case, disease incidence across the collection of areas could be modeled as:

            Y ~ Poisson(exp(log(P) + eta))
             eta = alpha + A
             A ~ Guass(0, tau)
             tau ~ student(20, 0, 2),
    -
    +

    where alpha is the mean log-risk (incidence rate) and A is a vector of (so-called) random effects, which enable partial pooling of information across observations. Covariates can be added to the model for the log-rates, such that eta = alpha + X * beta + A. See the example section of this document for a demonstration (where the denominator of the outcome is the expected count, rather than population at risk).

    -

    Note that the denominator for the rates is specified as a log-offset to provide a consistent, formula-line interface to the model. However, an equivalent, and perhaps more intuitive, specification is the following:

            Y ~ Poisson(P * exp(eta))
    -
    +

    Note that the denominator for the rates is specified as a log-offset to provide a consistent, formula-line interface to the model. However, an equivalent, and perhaps more intuitive, specification is the following:

    Y ~ Poisson(P * exp(eta))

    where P is still the population at risk and exp(eta) is the level of risk (expressed as a rate). This translation is derived from the rules for manipulating exponents: P * exp(eta) = exp(log(P)) * exp(eta) = exp(log(P) + eta).

    +
    -

    Spatially lagged covariates (SLX)

    +
    +

    Spatially lagged covariates (SLX)

    -

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, ...)
    -
    +

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, ...)
    -

    is a shortcut for

    stan_glm(y ~ I(W %*% x1) + x1 + x2, ...)
    -
    +

    is a shortcut for

    stan_glm(y ~ I(W %*% x1) + x1 + x2, ...)
    -

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    +

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    For measurement error (ME) models, the SLX argument is the only way to include spatially lagged covariates since the SLX term needs to be re-calculated on each iteration of the MCMC algorithm.

    +
    -

    Measurement error (ME) models

    +
    +

    Measurement error (ME) models

    -

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates (Donegan et al. 2021; Donegan 2021). With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
    +

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates (Donegan et al. 2021; Donegan 2021). With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
            x_true ~ MVGauss(mu, Sigma)
            Sigma = (I - rho * C)^(-1) M * tau^2
            mu ~ Gauss(0, 100)
            tau ~ student_t(10, 0, 40)
            rho ~ uniform(lower_bound, upper_bound)
    -
    +
    -

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

           x ~ Gauss(x_true, se)
    -       x_true ~ student_t(df, mu, sigma)
    -       df ~ gamma(3, 0.2)
    -       mu ~ Gauss(0, 100)
    -       sigma ~ student_t(10, 0, 40)
    -
    +

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

    x ~ Gauss(x_true, se)
    +       x_true ~ student_t(df, mu, sigma)
    +       df ~ gamma(3, 0.2)
    +       mu ~ Gauss(0, 100)
    +       sigma ~ student_t(10, 0, 40)
    -

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

           x ~ Gauss(x_true, se)
    -      logit(x_true) ~ MVGauss(mu, Sigma)
    -
    +

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

    x ~ Gauss(x_true, se)
    +      logit(x_true) ~ MVGauss(mu, Sigma)

    and similar for the Student t model.

    +
    -

    Censored counts

    +
    +

    Censored counts

    Vital statistics systems and disease surveillance programs typically suppress case counts when they are smaller than a specific theshold value. In such cases, the observation of a censored count is not the same as a missing value; instead, you are informed that the value is an integer somewhere between zero and the threshold value. For Poisson models (family = poisson()), you can use the censor_point argument to encode this information into your model.

    -

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    -
    +

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    +
    -

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    -
    +

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    +

    For example, the US Centers for Disease Control and Prevention's CDC WONDER database censors all death counts between 0 and 9. To model CDC WONDER mortality data, you could provide censor_point = 9 and then the likelihood statmenet for censored counts would equal the summation of the Poisson probability mass function over each integer ranging from zero through 9 (inclusive), conditional on the fitted values (i.e., all model paramters). See Donegan (2021) for additional discussion, references, and Stan code.

    +
    + +
    +
    +

    Author

    +

    Connor Donegan, Connor.Donegan@UTDallas.edu

    +
    -

    Author

    - -

    Connor Donegan, Connor.Donegan@UTDallas.edu

    - -

    Examples

    -
    # \donttest{
    -library(ggplot2)
    -library(sf)
    -data(sentencing)
    -
    -sentencing$log_e <- log(sentencing$expected_sents)
    -fit.pois <- stan_glm(sents ~ offset(log_e),
    -                     re = ~ name,
    -                     family = poisson(),
    -                     data = sentencing
    - )
    -
    -# MCMC diagnostics plot: Rhat values should all by very near 1
    -rstan::stan_rhat(fit.pois$stanfit)
    - # see effective sample size for all parameters and generated quantities
    - # (including residuals, predicted values, etc.)
    -rstan::stan_ess(fit.pois$stanfit)
    -# or for a particular parameter
    -rstan::stan_ess(fit.pois$stanfit, "alpha_re")
    -
    -# Spatial autocorrelation/residual diagnostics
    -sp_diag(fit.pois, sentencing)
    -
    -## Posterior predictive distribution                                       
    -library(bayesplot)
    -yrep <- posterior_predict(fit.pois, S = 75)
    -y <- sentencing$sents
    -ppc_dens_overlay(y, yrep)
    -# }
    +    
    +

    Examples

    +
    # \donttest{
    +library(ggplot2)
    +library(sf)
    +data(sentencing)
    +
    +sentencing$log_e <- log(sentencing$expected_sents)
    +fit.pois <- stan_glm(sents ~ offset(log_e),
    +                     re = ~ name,
    +                     family = poisson(),
    +                     data = sentencing
    + )
    +
    +# MCMC diagnostics plot: Rhat values should all by very near 1
    +rstan::stan_rhat(fit.pois$stanfit)
    + # see effective sample size for all parameters and generated quantities
    + # (including residuals, predicted values, etc.)
    +rstan::stan_ess(fit.pois$stanfit)
    +# or for a particular parameter
    +rstan::stan_ess(fit.pois$stanfit, "alpha_re")
    +
    +# Spatial autocorrelation/residual diagnostics
    +sp_diag(fit.pois, sentencing)
    +
    +## Posterior predictive distribution                                       
    +library(bayesplot)
    +yrep <- posterior_predict(fit.pois, S = 75)
    +y <- sentencing$sents
    +ppc_dens_overlay(y, yrep)
    +# }
     
     
    +
    +
    -
    - +

    - - - + diff --git a/docs/reference/stan_icar-1.png b/docs/reference/stan_icar-1.png deleted file mode 100644 index fef98cc5..00000000 Binary files a/docs/reference/stan_icar-1.png and /dev/null differ diff --git a/docs/reference/stan_icar-2.png b/docs/reference/stan_icar-2.png deleted file mode 100644 index c75b9dc3..00000000 Binary files a/docs/reference/stan_icar-2.png and /dev/null differ diff --git a/docs/reference/stan_icar-3.png b/docs/reference/stan_icar-3.png deleted file mode 100644 index 4531ccdd..00000000 Binary files a/docs/reference/stan_icar-3.png and /dev/null differ diff --git a/docs/reference/stan_icar-4.png b/docs/reference/stan_icar-4.png deleted file mode 100644 index bc3cfe71..00000000 Binary files a/docs/reference/stan_icar-4.png and /dev/null differ diff --git a/docs/reference/stan_icar-5.png b/docs/reference/stan_icar-5.png deleted file mode 100644 index 25cf6c9b..00000000 Binary files a/docs/reference/stan_icar-5.png and /dev/null differ diff --git a/docs/reference/stan_icar-6.png b/docs/reference/stan_icar-6.png deleted file mode 100644 index 64cce670..00000000 Binary files a/docs/reference/stan_icar-6.png and /dev/null differ diff --git a/docs/reference/stan_icar.html b/docs/reference/stan_icar.html index b5c6dbb6..f4a75f30 100644 --- a/docs/reference/stan_icar.html +++ b/docs/reference/stan_icar.html @@ -1,93 +1,18 @@ - - - - - - - -Intrinsic autoregressive models — stan_icar • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Intrinsic autoregressive models — stan_icar • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,384 +61,379 @@

    Intrinsic autoregressive models

    The intrinsic conditional auto-regressive (ICAR) model for spatial count data. Options include the BYM model, the BYM2 model, and a solo ICAR term.

    -
    stan_icar(
    -  formula,
    -  slx,
    -  re,
    -  data,
    -  C,
    -  family = poisson(),
    -  type = c("icar", "bym", "bym2"),
    -  scale_factor = NULL,
    -  prior = NULL,
    -  ME = NULL,
    -  centerx = FALSE,
    -  censor_point,
    -  prior_only = FALSE,
    -  chains = 4,
    -  iter = 2000,
    -  refresh = 500,
    -  pars = NULL,
    -  control = NULL,
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    formula

    A model formula, following the R formula syntax. Binomial models can be specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    slx

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    re

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    -       alpha_tau ~ Student_t(d.f., location, scale).
    -
    - -

    Before using this term, read the Details section and the type argument. Specifically, if you use type = bym, then an observational-level re term is already included in the model. (Similar for type = bym2.)

    data

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    C

    Spatial connectivity matrix which will be used to construct an edge list for the ICAR model, and to calculate residual spatial autocorrelation as well as any user specified slx terms. It will automatically be row-standardized before calculating slx terms. C must be a binary symmetric n x n matrix.

    family

    The likelihood function for the outcome variable. Current options are binomial(link = "logit") and poisson(link = "log").

    type

    Defaults to "icar" (partial pooling of neighboring observations through parameter phi); specify "bym" to add a second parameter vector theta to perform partial pooling across all observations; specify "bym2" for the innovation introduced by Riebler et al. (2016). See Details for more information.

    scale_factor

    For the BYM2 model, optional. If missing, this will be set to a vector of ones. See Details.

    prior

    A named list of parameters for prior distributions (see priors):

    - -
    intercept

    The intercept is assigned a Gaussian prior distribution (see normal

    . - -
    beta

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    - -
    sigma

    For family = gaussian() and family = student_t() models, the scale parameter, sigma, is assigned a (half-) Student's t prior distribution. The half-Student's t prior for sigma is constrained to be positive.

    - -
    nu

    nu is the degrees of freedom parameter in the Student's t likelihood (only used when family = student_t()). nu is assigned a gamma prior distribution. The default prior is prior = list(nu = gamma(alpha = 3, beta = 0.2)).

    - -
    tau

    The scale parameter for random effects, or varying intercepts, terms. This scale parameter, tau, is assigned a half-Student's t prior. To set this, use, e.g., prior = list(tau = student_t(df = 20, location = 0, scale = 20)).

    - -
    ME

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    centerx

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    censor_point

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths. For example, the US Centers for Disease Control and Prevention censors (does not report) death counts that are nine or fewer, so if you're using CDC WONDER mortality data you could provide censor_point = 9.

    prior_only

    Draw samples from the prior distributions of parameters only.

    chains

    Number of MCMC chains to estimate.

    iter

    Number of samples per chain. .

    refresh

    Stan will print the progress of the sampler every refresh number of samples; set refresh=0 to silence this.

    pars

    Optional; specify any additional parameters you'd like stored from the Stan model.

    control

    A named list of parameters to control the sampler's behavior. See stan for details.

    ...

    Other arguments passed to sampling. For multi-core processing, you can use cores = parallel::detectCores(), or run options(mc.cores = parallel::detectCores()) first.

    - -

    Source

    +
    +
    stan_icar(
    +  formula,
    +  slx,
    +  re,
    +  data,
    +  C,
    +  family = poisson(),
    +  type = c("icar", "bym", "bym2"),
    +  scale_factor = NULL,
    +  prior = NULL,
    +  ME = NULL,
    +  centerx = FALSE,
    +  censor_point,
    +  prior_only = FALSE,
    +  chains = 4,
    +  iter = 2000,
    +  refresh = 500,
    +  pars = NULL,
    +  control = NULL,
    +  ...
    +)
    +
    +
    +

    Source

    Besag, J. (1974). Spatial interaction and the statistical analysis of lattice systems. Journal of the Royal Statistical Society: Series B (Methodological), 36(2), 192-225.

    Besag, J., York, J., & Mollié, A. (1991). Bayesian image restoration, with two applications in spatial statistics. Annals of the institute of statistical mathematics, 43(1), 1-20.

    -

    Donegan, Connor. 2021. Flexible functions for ICAR, BYM, and BYM2 models in Stan. Code repository. https://github.com/ConnorDonegan/Stan-IAR

    -

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    -

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 +

    Donegan, Connor. 2021. Flexible functions for ICAR, BYM, and BYM2 models in Stan. Code repository. https://github.com/ConnorDonegan/Stan-IAR

    +

    Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. Int. J. Env. Res. and Public Health 18 (13): 6856. DOI: 10.3390/ijerph18136856 Data and code: https://github.com/ConnorDonegan/survey-HBM.

    +

    Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. OSF Preprints. doi: 10.31219/osf.io/3ey65 .

    Freni-Sterrantino, Anna, Massimo Ventrucci, and Håvard Rue. 2018. A Note on Intrinsic Conditional Autoregressive Models for Disconnected Graphs. Spatial and Spatio-Temporal Epidemiology 26: 25–34.

    Morris, M., Wheeler-Martin, K., Simpson, D., Mooney, S. J., Gelman, A., & DiMaggio, C. (2019). Bayesian hierarchical spatial models: Implementing the Besag York Mollié model in stan. Spatial and spatio-temporal epidemiology, 31, 100301.

    Riebler, A., Sorbye, S. H., Simpson, D., & Rue, H. (2016). An intuitive Bayesian spatial model for disease mapping that accounts for scaling. Statistical Methods in Medical Research, 25(4), 1145-1165.

    -

    Value

    - -

    An object of class class geostan_fit (a list) containing:

    -
    summary

    Summaries of the main parameters of interest; a data frame

    -
    diagnostic

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    -
    stanfit

    an object of class stanfit returned by rstan::stan

    -
    data

    a data frame containing the model data

    -
    edges

    The edge list representing all unique sets of neighbors and the weight attached to each pair (i.e., their corresponding element in the connectivity matrix C

    -
    family

    the user-provided or default family argument used to fit the model

    -
    formula

    The model formula provided by the user (not including ICAR component)

    -
    slx

    The slx formula

    -
    re

    A list with two name elements, formula and Data, containing the formula re and a data frame with columns id (the grouping variable) and idx (the index values assigned to each group).

    -
    priors

    Prior specifications.

    -
    x_center

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    -
    spatial

    A data frame with the name of the spatial parameter ("phi" if type = "icar" else "convolution") and method (toupper(type)).

    - -
    - -

    Details

    +
    +
    +

    Arguments

    +
    formula
    +

    A model formula, following the R formula syntax. Binomial models can be specified by setting the left hand side of the equation to a data frame of successes and failures, as in cbind(successes, failures) ~ x.

    +
    slx
    +

    Formula to specify any spatially-lagged covariates. As in, ~ x1 + x2 (the intercept term will be removed internally). When setting priors for beta, remember to include priors for any SLX terms.

    +
    re
    +

    To include a varying intercept (or "random effects") term, alpha_re, specify the grouping variable here using formula syntax, as in ~ ID. Then, alpha_re is a vector of parameters added to the linear predictor of the model, and:

           alpha_re ~ N(0, alpha_tau)
    +       alpha_tau ~ Student_t(d.f., location, scale).
    +
    +

    Before using this term, read the Details section and the type argument. Specifically, if you use type = bym, then an observational-level re term is already included in the model. (Similar for type = bym2.)

    +
    data
    +

    A data.frame or an object coercible to a data frame by as.data.frame containing the model data.

    +
    C
    +

    Spatial connectivity matrix which will be used to construct an edge list for the ICAR model, and to calculate residual spatial autocorrelation as well as any user specified slx terms. It will automatically be row-standardized before calculating slx terms. C must be a binary symmetric n x n matrix.

    +
    family
    +

    The likelihood function for the outcome variable. Current options are binomial(link = "logit") and poisson(link = "log").

    +
    type
    +

    Defaults to "icar" (partial pooling of neighboring observations through parameter phi); specify "bym" to add a second parameter vector theta to perform partial pooling across all observations; specify "bym2" for the innovation introduced by Riebler et al. (2016). See Details for more information.

    +
    scale_factor
    +

    For the BYM2 model, optional. If missing, this will be set to a vector of ones. See Details.

    +
    prior
    +

    A named list of parameters for prior distributions (see priors):

    intercept
    +

    The intercept is assigned a Gaussian prior distribution (see normal

    +. + +
    beta
    +

    Regression coefficients are assigned Gaussian prior distributions. Variables must follow their order of appearance in the model formula. Note that if you also use slx terms (spatially lagged covariates), and you use custom priors for beta, then you have to provide priors for the slx terms. Since slx terms are prepended to the design matrix, the prior for the slx term will be listed first.

    + + +
    sigma
    +

    For family = gaussian() and family = student_t() models, the scale parameter, sigma, is assigned a (half-) Student's t prior distribution. The half-Student's t prior for sigma is constrained to be positive.

    + + +
    nu
    +

    nu is the degrees of freedom parameter in the Student's t likelihood (only used when family = student_t()). nu is assigned a gamma prior distribution. The default prior is prior = list(nu = gamma(alpha = 3, beta = 0.2)).

    + + +
    tau
    +

    The scale parameter for random effects, or varying intercepts, terms. This scale parameter, tau, is assigned a half-Student's t prior. To set this, use, e.g., prior = list(tau = student_t(df = 20, location = 0, scale = 20)).

    + + +
    +
    ME
    +

    To model observational uncertainty (i.e. measurement or sampling error) in any or all of the covariates, provide a list of data as constructed by the prep_me_data function.

    +
    centerx
    +

    To center predictors on their mean values, use centerx = TRUE. If the ME argument is used, the modeled covariate (i.e., latent variable), rather than the raw observations, will be centered. When using the ME argument, this is the recommended method for centering the covariates.

    +
    censor_point
    +

    Integer value indicating the maximum censored value; this argument is for modeling censored (suppressed) outcome data, typically disease case counts or deaths. For example, the US Centers for Disease Control and Prevention censors (does not report) death counts that are nine or fewer, so if you're using CDC WONDER mortality data you could provide censor_point = 9.

    +
    prior_only
    +

    Draw samples from the prior distributions of parameters only.

    +
    chains
    +

    Number of MCMC chains to estimate.

    +
    iter
    +

    Number of samples per chain. .

    +
    refresh
    +

    Stan will print the progress of the sampler every refresh number of samples; set refresh=0 to silence this.

    +
    pars
    +

    Optional; specify any additional parameters you'd like stored from the Stan model.

    +
    control
    +

    A named list of parameters to control the sampler's behavior. See stan for details.

    +
    ...
    +

    Other arguments passed to sampling. For multi-core processing, you can use cores = parallel::detectCores(), or run options(mc.cores = parallel::detectCores()) first.

    +
    +
    +

    Value

    +

    An object of class class geostan_fit (a list) containing:

    summary
    +

    Summaries of the main parameters of interest; a data frame

    + +
    diagnostic
    +

    Widely Applicable Information Criteria (WAIC) with a measure of effective number of parameters (eff_pars) and mean log pointwise predictive density (lpd), and mean residual spatial autocorrelation as measured by the Moran coefficient.

    + +
    stanfit
    +

    an object of class stanfit returned by rstan::stan

    + +
    data
    +

    a data frame containing the model data

    + +
    edges
    +

    The edge list representing all unique sets of neighbors and the weight attached to each pair (i.e., their corresponding element in the connectivity matrix C

    + +
    family
    +

    the user-provided or default family argument used to fit the model

    + +
    formula
    +

    The model formula provided by the user (not including ICAR component)

    + +
    slx
    +

    The slx formula

    + +
    re
    +

    A list with two name elements, formula and Data, containing the formula re and a data frame with columns id (the grouping variable) and idx (the index values assigned to each group).

    + +
    priors
    +

    Prior specifications.

    + +
    x_center
    +

    If covariates are centered internally (centerx = TRUE), then x_center is a numeric vector of the values on which covariates were centered.

    + +
    spatial
    +

    A data frame with the name of the spatial parameter ("phi" if type = "icar" else "convolution") and method (toupper(type)).

    + + +
    +
    +

    Details

    The Stan code for the ICAR component of the model and the BYM2 option is from Morris et al. (2019) with adjustments to enable non-binary weights and disconnected graph structures (see Freni-Sterrantino (2018) and Donegan (2021)).

    -

    The exact specification depends on the type argument.

    'icar'

    +

    The exact specification depends on the type argument.

    +

    'icar'

    -

    For Poisson models for count data, y, the basic model specification (type = "icar") is:

            y ~ Poisson(exp(offset + mu + phi))
    -        phi ~ ICAR(spatial_scale)
    -        spatial_scale ~ Gaussian(0, 1)
    -
    +

    For Poisson models for count data, y, the basic model specification (type = "icar") is:

    y ~ Poisson(exp(offset + mu + phi))
    +        phi ~ ICAR(spatial_scale)
    +        spatial_scale ~ Gaussian(0, 1)

    where mu contains an intercept and potentially covariates. The spatial trend, phi, has a mean of zero and a single scale parameter, spatial_scale.

    -

    The ICAR prior model is a CAR model that has a spatial autocorrelation parameter car_alpha equal to 1 (see stan_car). Thus the ICAR prior places high probability on a smooth spatially (or temporally) varying mean. This is rarely sufficient to model the amount of variation present in social and health data.

    +

    The ICAR prior model is a CAR model that has a spatial autocorrelation parameter car_alpha equal to 1 (see stan_car). Thus the ICAR prior places high probability on a smooth spatially (or temporally) varying mean. This is rarely sufficient to model the amount of variation present in social and health data.

    +
    -

    'bym'

    +
    +

    'bym'

    -

    Often, an observational-level random effect term, theta, is added to capture (heterogeneous or unstructured) deviations from mu + phi. The combined term is referred to as a convolution term:

            convolution = phi + theta.
    -
    +

    Often, an observational-level random effect term, theta, is added to capture (heterogeneous or unstructured) deviations from mu + phi. The combined term is referred to as a convolution term:

    convolution = phi + theta.
    -

    This is known as the BYM model (Besag et al. 1991), and can be specified using type = "bym":

            y ~ Poisson(exp(offset + mu + phi + theta))
    -        phi ~ ICAR(spatial_scale)
    -        theta ~ Gaussian(0, theta_scale)
    -        spatial_scale ~ Gaussian(0, 1)
    -        theta_scale ~ Gaussian(0, 1)
    -
    +

    This is known as the BYM model (Besag et al. 1991), and can be specified using type = "bym":

    y ~ Poisson(exp(offset + mu + phi + theta))
    +        phi ~ ICAR(spatial_scale)
    +        theta ~ Gaussian(0, theta_scale)
    +        spatial_scale ~ Gaussian(0, 1)
    +        theta_scale ~ Gaussian(0, 1)
    +
    -

    'bym2'

    +
    +

    'bym2'

    -

    Riebler et al. (2016) introduce a variation on the BYM model (type = "bym2"). This specification combines phi and theta using a mixing parameter, rho, that controls the proportion of the variation that is attributable to the spatially autocorrelated term, phi, rather than the spatially unstructured term, theta. The terms share a single scale parameter:

            convolution = [sqrt(rho/scale_factor) * phi_tilde + sqrt(1 - rho) * theta_tilde] * spatial_scale.
    +

    Riebler et al. (2016) introduce a variation on the BYM model (type = "bym2"). This specification combines phi and theta using a mixing parameter, rho, that controls the proportion of the variation that is attributable to the spatially autocorrelated term, phi, rather than the spatially unstructured term, theta. The terms share a single scale parameter:

            convolution = [sqrt(rho/scale_factor) * phi_tilde + sqrt(1 - rho) * theta_tilde] * spatial_scale.
             phi_tilde ~ Gaussian(0, 1)
             theta_tilde ~ Gaussian(0, 1)
             spatial_scale ~ Gaussian(0, 1)
    -
    +

    The two _tilde terms are standard normal deviates, rho is restricted to values between zero and one, and scale_factor is a constant term provided by the user. By default, scale_factor is equal to one, so that it does nothing. Riebler et al. (2016) argue that the interpretation or meaning of the scale of the ICAR model depends on the graph structure, C. This implies that the same prior distribution assigned to the spatial_scale will differ in its implications if C is changed; in other words, the priors are not transportable across models, and models that use the same nominal prior actually have different priors assigned to spatial_scale.

    -

    Borrowing R code from Morris (2017) and following Freni-Sterrantino et al. (2018), the following R code can be used to create the scale_factor for the BYM2 model (note, this requires the INLA R package), given a spatial adjacency matrix, C:

    ## create a list of data for stan_icar
    -icar.data <- geostan::prep_icar_data(C)
    -## calculate scale_factor for each of k connected group of nodes
    -k <- icar.data$k
    -scale_factor <- vector(mode = "numeric", length = k)
    -for (j in 1:k) {
    -  g.idx <- which(icar.data$comp_id == j) 
    -  if (length(g.idx) == 1) {
    -       scale_factor[j] <- 1
    -       next
    -    }    
    -  Cg <- C[g.idx, g.idx] 
    -  scale_factor[j] <- scale_c(Cg) 
    -}
    -
    - -

    This code adjusts for 'islands' or areas with zero neighbors, and it also handles disconnected graph structures (see Donegan 2021). Following Freni-Sterrantino (2018), disconnected components of the graph structure are given their own intercept term; however, this value is added to phi automatically inside the Stan model. Therefore, the user never needs to make any adjustments for this term. (If you want to avoid complications from a disconnected graph structure, see stan_car).

    -

    Note, the code above requires the scale_c function; it has package dependencies that are not included in geostan. To use scale_c, you have to load the following R function:

    #' compute scaling factor for adjacency matrix, accounting for differences in spatial connectivity 
    -#'
    -#' @param C connectivity matrix
    -#'
    -#' @details
    -#'
    -#' Requires the following packages: 
    -#'
    -#' library(Matrix)
    -#' library(INLA);
    -#' library(spdep)
    -#' library(igraph)
    -#'  
    -#' @source
    -#'
    -#'   Morris, Mitzi (2017). Spatial Models in Stan: Intrinsic Auto-Regressive Models for Areal Data. <https://mc-stan.org/users/documentation/case-studies/icar_stan.html>
    -#'
    -scale_c <- function(C) {
    - geometric_mean <- function(x) exp(mean(log(x))) 
    - N = dim(C)[1]
    - Q =  Diagonal(N, rowSums(C)) - C
    - Q_pert = Q + Diagonal(N) * max(diag(Q)) * sqrt(.Machine$double.eps)
    - Q_inv = inla.qinv(Q_pert, constr=list(A = matrix(1,1,N),e=0))
    - scaling_factor <- geometric_mean(Matrix::diag(Q_inv)) 
    - return(scaling_factor) 
    -}
    -
    - - -

    Spatially lagged covariates (SLX)

    - - -

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, ...)
    -
    - -

    is a shortcut for

    stan_glm(y ~ I(W %*% x1) + x1 + x2, ...)
    -
    - -

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    +

    Borrowing R code from Morris (2017) and following Freni-Sterrantino et al. (2018), the following R code can be used to create the scale_factor for the BYM2 model (note, this requires the INLA R package), given a spatial adjacency matrix, C:

    ## create a list of data for stan_icar
    +icar.data <- geostan::prep_icar_data(C)
    +## calculate scale_factor for each of k connected group of nodes
    +k <- icar.data$k
    +scale_factor <- vector(mode = "numeric", length = k)
    +for (j in 1:k) {
    +  g.idx <- which(icar.data$comp_id == j) 
    +  if (length(g.idx) == 1) {
    +       scale_factor[j] <- 1
    +       next
    +    }    
    +  Cg <- C[g.idx, g.idx] 
    +  scale_factor[j] <- scale_c(Cg) 
    +}
    + +

    This code adjusts for 'islands' or areas with zero neighbors, and it also handles disconnected graph structures (see Donegan 2021). Following Freni-Sterrantino (2018), disconnected components of the graph structure are given their own intercept term; however, this value is added to phi automatically inside the Stan model. Therefore, the user never needs to make any adjustments for this term. (If you want to avoid complications from a disconnected graph structure, see stan_car).

    +

    Note, the code above requires the scale_c function; it has package dependencies that are not included in geostan. To use scale_c, you have to load the following R function:

    #' compute scaling factor for adjacency matrix, accounting for differences in spatial connectivity 
    +#'
    +#' @param C connectivity matrix
    +#'
    +#' @details
    +#'
    +#' Requires the following packages: 
    +#'
    +#' library(Matrix)
    +#' library(INLA);
    +#' library(spdep)
    +#' library(igraph)
    +#'  
    +#' @source
    +#'
    +#'   Morris, Mitzi (2017). Spatial Models in Stan: Intrinsic Auto-Regressive Models for Areal Data. <https://mc-stan.org/users/documentation/case-studies/icar_stan.html>
    +#'
    +scale_c <- function(C) {
    + geometric_mean <- function(x) exp(mean(log(x))) 
    + N = dim(C)[1]
    + Q =  Diagonal(N, rowSums(C)) - C
    + Q_pert = Q + Diagonal(N) * max(diag(Q)) * sqrt(.Machine$double.eps)
    + Q_inv = inla.qinv(Q_pert, constr=list(A = matrix(1,1,N),e=0))
    + scaling_factor <- geometric_mean(Matrix::diag(Q_inv)) 
    + return(scaling_factor) 
    +}
    + +
    + +
    +

    Spatially lagged covariates (SLX)

    + + +

    The slx argument is a convenience function for including SLX terms. For example,

    stan_glm(y ~ x1 + x2, slx = ~ x1, ...)
    + +

    is a shortcut for

    stan_glm(y ~ I(W %*% x1) + x1 + x2, ...)
    + +

    where W is a row-standardized spatial weights matrix (see shape2mat). SLX terms will always be prepended to the design matrix, as above, which is important to know when setting prior distributions for regression coefficients.

    For measurement error (ME) models, the SLX argument is the only way to include spatially lagged covariates since the SLX term needs to be re-calculated on each iteration of the MCMC algorithm.

    +
    -

    Measurement error (ME) models

    +
    +

    Measurement error (ME) models

    -

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates (Donegan et al. 2021; Donegan 2021). With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
    +

    The ME models are designed for surveys with spatial sampling designs, such as the American Community Survey (ACS) estimates (Donegan et al. 2021; Donegan 2021). With estimates, x, and their standard errors, se, the ME models have one of the the following two specifications, depending on the user input:

           x ~ Gauss(x_true, se)
            x_true ~ MVGauss(mu, Sigma)
            Sigma = (I - rho * C)^(-1) M * tau^2
            mu ~ Gauss(0, 100)
            tau ~ student_t(10, 0, 40)
            rho ~ uniform(lower_bound, upper_bound)
    -
    +
    -

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

           x ~ Gauss(x_true, se)
    -       x_true ~ student_t(df, mu, sigma)
    -       df ~ gamma(3, 0.2)
    -       mu ~ Gauss(0, 100)
    -       sigma ~ student_t(10, 0, 40)
    -
    +

    where the covariance matrix, Sigma, has the conditional autoregressive specification, and tau is the scale parameter. If ME$car_parts is not provided by the user, then a non-spatial model will be used instead:

    x ~ Gauss(x_true, se)
    +       x_true ~ student_t(df, mu, sigma)
    +       df ~ gamma(3, 0.2)
    +       mu ~ Gauss(0, 100)
    +       sigma ~ student_t(10, 0, 40)
    -

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

           x ~ Gauss(x_true, se)
    -      logit(x_true) ~ MVGauss(mu, Sigma)
    -
    +

    For strongly skewed variables, such census tract poverty rates, it can be advantageous to apply a logit transformation to x_true before applying the CAR or Student t prior model. When the logit argument is used, the model becomes:

    x ~ Gauss(x_true, se)
    +      logit(x_true) ~ MVGauss(mu, Sigma)

    and similar for the Student t model.

    +
    -

    Censored counts

    +
    +

    Censored counts

    Vital statistics systems and disease surveillance programs typically suppress case counts when they are smaller than a specific theshold value. In such cases, the observation of a censored count is not the same as a missing value; instead, you are informed that the value is an integer somewhere between zero and the threshold value. For Poisson models (family = poisson())), you can use the censor_point argument to encode this information into your model.

    -

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    -
    +

    Internally, geostan will keep the index values of each censored observation, and the index value of each of the fully observed outcome values. For all observed counts, the likelihood statement will be:

     p(y_i | data, model) = Poisson(y_i | fitted_i), 
    +
    -

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    -
    +

    as usual. For each censored count, the likelihood statement will equal the cumulative Poisson distribution function for values zero through the censor point:

      p(y_j | data, model) = sum_{m=0}^censor_point Poisson( c_m | fitted_j),
    +

    For example, the US Centers for Disease Control and Prevention's CDC WONDER database censors all death counts between 0 and 9. To model CDC WONDER mortality data, you could provide censor_point = 9 and then the likelihood statmenet for censored counts would equal the summation of the Poisson probability mass function over each integer ranging from zero through 9 (inclusive), conditional on the fitted values (i.e., all model paramters). See Donegan (2021) for additional discussion, references, and Stan code.

    +
    -

    See also

    - - -

    Author

    - -

    Connor Donegan, Connor.Donegan@UTDallas.edu

    - -

    Examples

    -
    # \donttest{
    -library(rstan)
    -library(bayesplot)
    -library(sf)
    -if (FALSE) {
    -options(mc.cores = parallel::detectCores())
    -}
    -data(sentencing)
    -
    -C <- shape2mat(sentencing, "B")
    -log_e <- log(sentencing$expected_sents)
    -fit.bym <- stan_icar(sents ~ offset(log_e),
    -                     family = poisson(),
    -                     data = sentencing,
    -                     type = "bym",
    -                     C = C
    - )
    -
    -# check effective sample size and convergence
    -rstan::stan_ess(fit.bym$stanfit)
    -rstan::stan_rhat(fit.bym$stanfit)
    -
    -# see some spatial diagnostics
    -sp_diag(fit.bym, sentencing)
    -
    -# posterior predictive distribution
    -yrep <- posterior_predict(fit.bym, S = 100)
    -y <- sentencing$sents
    -bayesplot::ppc_dens_overlay(y, yrep)
    -
    -# map the smooth spatial term
    -sp.trend <- spatial(fit.bym)$mean
    -ggplot( st_as_sf(sentencing) ) +
    -  geom_sf(aes(fill = sp.trend)) +
    -  scale_fill_gradient2(
    -   low = "navy",
    -   high = "darkred"
    -  ) +
    -  theme_void()
    -
    -# calculate log-standardized sentencing ratios (log-SSRs)
    -## (like Standardized Incidence Ratios: observed/exected case counts)
    -f <- fitted(fit.bym)$mean
    -SSR <- f / sentencing$expected_sents
    -log.SSR <- log( SSR, base = 2)
    -
    -ggplot( st_as_sf(sentencing) ) +
    -  geom_sf(aes(fill = log.SSR)) +
    -  scale_fill_gradient2(
    -   low = "navy",
    -   high = "darkred"
    -  ) +
    -  labs(title = "Log-standardized sentencing ratios",
    -       subtitle = "log( Fitted/Expected), base 2") +
    -  theme_void() +
    -  theme(
    -   legend.position = "bottom",
    -   legend.key.height = unit(0.35, "cm"),
    -   legend.key.width = unit(1.5, "cm")
    -  )
    -# }
    +    
    + +
    +

    Author

    +

    Connor Donegan, Connor.Donegan@UTDallas.edu

    +
    + +
    +

    Examples

    +
    # \donttest{
    +library(rstan)
    +library(bayesplot)
    +library(sf)
    +if (FALSE) {
    +options(mc.cores = parallel::detectCores())
    +}
    +data(sentencing)
    +
    +C <- shape2mat(sentencing, "B")
    +log_e <- log(sentencing$expected_sents)
    +fit.bym <- stan_icar(sents ~ offset(log_e),
    +                     family = poisson(),
    +                     data = sentencing,
    +                     type = "bym",
    +                     C = C
    + )
    +
    +# check effective sample size and convergence
    +rstan::stan_ess(fit.bym$stanfit)
    +rstan::stan_rhat(fit.bym$stanfit)
    +
    +# see some spatial diagnostics
    +sp_diag(fit.bym, sentencing)
    +
    +# posterior predictive distribution
    +yrep <- posterior_predict(fit.bym, S = 100)
    +y <- sentencing$sents
    +bayesplot::ppc_dens_overlay(y, yrep)
    +
    +# map the smooth spatial term
    +sp.trend <- spatial(fit.bym)$mean
    +ggplot( st_as_sf(sentencing) ) +
    +  geom_sf(aes(fill = sp.trend)) +
    +  scale_fill_gradient2(
    +   low = "navy",
    +   high = "darkred"
    +  ) +
    +  theme_void()
    +
    +# calculate log-standardized sentencing ratios (log-SSRs)
    +## (like Standardized Incidence Ratios: observed/exected case counts)
    +f <- fitted(fit.bym)$mean
    +SSR <- f / sentencing$expected_sents
    +log.SSR <- log( SSR, base = 2)
    +
    +ggplot( st_as_sf(sentencing) ) +
    +  geom_sf(aes(fill = log.SSR)) +
    +  scale_fill_gradient2(
    +   low = "navy",
    +   high = "darkred"
    +  ) +
    +  labs(title = "Log-standardized sentencing ratios",
    +       subtitle = "log( Fitted/Expected), base 2") +
    +  theme_void() +
    +  theme(
    +   legend.position = "bottom",
    +   legend.key.height = unit(0.35, "cm"),
    +   legend.key.width = unit(1.5, "cm")
    +  )
    +# }
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/reference/student_t.html b/docs/reference/student_t.html deleted file mode 100644 index a172e5d0..00000000 --- a/docs/reference/student_t.html +++ /dev/null @@ -1,197 +0,0 @@ - - - - - - - - -Student t family — student_t • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    create a family object for the Student t likelihood

    -
    - -
    student_t(df = 10, location = 0, scale, variable = NULL)
    - - -

    Value

    - -

    An object of class family

    - -

    Examples

    -
    if (FALSE) {
    -data(georgia)
    -fit = stan_glm(log(rate.male) ~ 1, data = georgia, family = student_t())
    -}
    -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.9001.

    -
    - -
    -
    - - - - - - - - - - diff --git a/docs/reference/theil.html b/docs/reference/theil.html deleted file mode 100644 index 5bb3d74a..00000000 --- a/docs/reference/theil.html +++ /dev/null @@ -1,212 +0,0 @@ - - - - - - - - -Theil's inequality index — theil • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Calculates Theil's entropy-based measure of inequality for a given set of disease incidence rates and populations at risk.

    -
    - -
    theil(count, population, rate, total = TRUE)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    count

    Number of cases (e.g., disease incidence).

    population

    Population size (population at risk).

    rate

    Incidence rates. If provided, case counts will be calculated automatically as cases = rates * Population#'

    total

    If TRUE, the values will all be summed; if FALSE, then each area's contribution to total inequality will be returned.

    - -

    Source

    - -

    Conceicao, P. and P. Ferreira (2000). The young person's guide to the Theil Index: Suggesting intuitive interpretations and exploring analytical applications. University of Texas Inequality Project. UTIP Working Paper Number 14. Accessed May 1, 2021 from https://utip.gov.utexas.edu/papers.html

    -

    Theil, Henri (1972). Statistical Decomposition Analysis. Amsterdan, The Netherlands and London, UK: North-Holland Publishing Company.

    -

    Shannon, Claude E. and Weaver, Warren (1963). The Mathematical Theory of Communication. Urbana and Chicago, USA: University if Illinois Press.

    -

    Value

    - -

    if total = TRUE, a scalar value; if total = FALSE, a vector of numeric values, where each value represents that area's contribution to total inequality.

    -

    Details

    - -

    Theil's index is a good index of inequality in disease and mortality burdens when multiple groups are being considered, as is typical of geospatial analysis. It provides a summary measure of inequality across a set of areal units, such as counties, that may be tracked over time. Also, it is interesting because it is additive, and thus admits of simple decompositions.

    -

    The index measures discrepancies between a population's share of the disease burden, omega, and their share of the population, eta. A situation of zero inequality would imply that each population's share of cases is equal to its population share, or, omega=eta. Each population's contribution to total inequality is calculated as:

                 T_i = omega_i * [log(omega_i/eta_i)],
    -
    - -

    the log-ratio of case-share to population-share, weighted by their share of cases. Theil's index for all areas is the sum of each area's T_i:

                 T = sum_(i=1)^n T_i.
    -
    - -

    Theil's T is thus a weighted mean of log-ratios of case shares to population shares, where each log-ratio (which we may describe as a raw inequality score) is weighted by its share of total cases. The index has a minimum of zero and a maximum of log(N), where N is the number of units (e.g., number of counties). -Theil's index is based on Shannon's information theory, he used it to study a variety of topics, including income inequality and racial segregation. Theil's index is often of great interest because it is additive across multiple scales, such as when the data has a nested structure to it (e.g., counties within states). The Texas Inequality Project provides introductions to, and examples of using, the Theil index (Conceicao and Ferreira, 2000). However, this R function is just a simple implementation for `flat' or non-nested data structures (e.g., a set of counties).

    - -

    Examples

    -
    - -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/waic.html b/docs/reference/waic.html index 1ea8f2e4..9fcdee0a 100644 --- a/docs/reference/waic.html +++ b/docs/reference/waic.html @@ -1,93 +1,18 @@ - - - - - - - -WAIC — waic • geostan - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -WAIC — waic • geostan - - - - - - - - - - - - - - +
    -
    - -
    - -
    +
    @@ -155,71 +61,64 @@

    WAIC

    Widely Application Information Criteria (WAIC) for model comparison

    -
    waic(fit, pointwise = FALSE, digits = 2)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    fit

    An geostan_fit object or any Stan model with a parameter named "log_lik", the pointwise log likelihood of the observations.

    pointwise

    Logical (defaults to `FALSE`), should a vector of values for each observation be returned?

    digits

    Round results to this many digits.

    - -

    Source

    +
    +
    waic(fit, pointwise = FALSE, digits = 2)
    +
    +
    +

    Source

    Watanabe, S. (2010). Asymptotic equivalence of Bayes cross validation and widely application information criterion in singular learning theory. Journal of Machine Learning Research 11, 3571-3594.

    -

    Value

    - +
    +
    +

    Arguments

    +
    fit
    +

    An geostan_fit object or any Stan model with a parameter named "log_lik", the pointwise log likelihood of the observations.

    +
    pointwise
    +

    Logical (defaults to FALSE), should a vector of values for each observation be returned?

    +
    digits
    +

    Round results to this many digits.

    +
    +
    +

    Value

    A vector of length 3 with WAIC, a rough measure of the effective number of parameters estimated by the model Eff_pars, and log predictive density Lpd. If pointwise = TRUE, results are returned in a data.frame.

    -

    See also

    - - +
    +
    +

    See also

    + +
    -

    Examples

    -
    
    -# \donttest{
    -data(georgia)
    -fit <- stan_glm(log(rate.male) ~ 1, data = georgia)
    -waic(fit)
    -# }
    +    
    +

    Examples

    +
    
    +# \donttest{
    +data(georgia)
    +fit <- stan_glm(log(rate.male) ~ 1, data = georgia)
    +waic(fit)
    +# }
     
     
    +
    +
    -
    - +
    - - - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0873902d..c4da974d 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,9 +3,6 @@ https://connordonegan.github.io/geostan/404.html - - https://connordonegan.github.io/geostan/LICENSE.html - https://connordonegan.github.io/geostan/articles/index.html @@ -27,21 +24,12 @@ https://connordonegan.github.io/geostan/reference/aple.html - - https://connordonegan.github.io/geostan/reference/append_priors.html - https://connordonegan.github.io/geostan/reference/auto_gaussian.html - - https://connordonegan.github.io/geostan/reference/count_neighbors.html - https://connordonegan.github.io/geostan/reference/edges.html - - https://connordonegan.github.io/geostan/reference/exp_pars.html - https://connordonegan.github.io/geostan/reference/expected_mc.html @@ -78,9 +66,6 @@ https://connordonegan.github.io/geostan/reference/n_eff.html - - https://connordonegan.github.io/geostan/reference/ohio.html - https://connordonegan.github.io/geostan/reference/posterior_predict.html @@ -93,9 +78,6 @@ https://connordonegan.github.io/geostan/reference/prep_me_data.html - - https://connordonegan.github.io/geostan/reference/prep_sp_me_data.html - https://connordonegan.github.io/geostan/reference/priors.html @@ -117,30 +99,18 @@ https://connordonegan.github.io/geostan/reference/sp_diag.html - - https://connordonegan.github.io/geostan/reference/spatial.html - https://connordonegan.github.io/geostan/reference/stan_car.html https://connordonegan.github.io/geostan/reference/stan_esf.html - - https://connordonegan.github.io/geostan/reference/stan_example.html - https://connordonegan.github.io/geostan/reference/stan_glm.html https://connordonegan.github.io/geostan/reference/stan_icar.html - - https://connordonegan.github.io/geostan/reference/student_t.html - - - https://connordonegan.github.io/geostan/reference/theil.html - https://connordonegan.github.io/geostan/reference/waic.html diff --git a/index.html b/index.html deleted file mode 100644 index a7afd5cd..00000000 --- a/index.html +++ /dev/null @@ -1,260 +0,0 @@ - - - - - - - - - - - - - -index - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - -
    -

    geostan

    -

    The geostan R package supports a complete spatial analysis workflow with hierarchical Bayesian models (HBMs) for areal data and a variety of functions for visualizing spatial data and model results.

    -
    -

    Disease mapping and spatial regression

    -

    Model small-area incidence rates with mortality or disease data recorded across areal units like counties or census tracts.

    -
    -
    -

    Observational uncertainty

    -

    Incorporate information on data reliability into any geostan model. Built specifically for American Community Survey (ACS) data.

    -
    -
    -

    Spatial analysis tools

    -

    Tools for visualizing and measuring spatial autocorrelation and map patterns, for exploratory analysis and model criticism.

    -
    -
    -

    Custom Stan models

    -

    Tools for building custom spatial models in Stan.

    -
    -
    -

    RStan ecosystem

    -

    Compatible with a suite of high-quality R packages for Bayesian inference.

    -
    -
    -

    Installation

    -

    Install geostan from CRAN using:

    -
    install.packages("geostan")
    -
    -
    -

    Citation

    -
      -
    • Donegan, Connor (2021). geostan: Bayesian Spatial Analysis. R package Version 0.1.1 https://connordonegan.github.io/geostan

    • -
    • Donegan, Connor, Yongwan Chun, and Daniel A. Griffith. Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. International Journal of Environmental Research and Public Health 18.13 (2021): 6856. DOI: 10.3390/ijerph18136856

    • -
    -

    All geostan models are built using Stan:

    -
      -
    • Carpenter B., Gelman A., Hoffman M. D., Lee D., Goodrich B., Betancourt M., Brubaker M., Guo J., Li P., and Riddell A. (2017). Stan: A probabilistic programming language. Journal of Statistical Software. 76(1). DOI: 10.18637/jss.v076.i01
    • -
    -


    -

    The geostan package was built with the help of rstantools:

    -

    Gabry, Jonah, Ben Goodrich, and Martin Lysy (2021). rstantools: Tools for Developing R Packages Interfacing with ‘Stan’. R package version 2.1.1 https://mc-stan.org/rstantools/index.html.

    -
    -
    - - - - -
    - - - - - - - - - - - - - - - diff --git a/index.md b/index.md deleted file mode 100644 index 92aff7d4..00000000 --- a/index.md +++ /dev/null @@ -1,37 +0,0 @@ -# geostan - -The **geostan** R package supports a complete spatial analysis -workflow with hierarchical Bayesian models (HBMs) for areal -data and a variety of functions for visualizing spatial data and model results. - -**geostan** is an interface to [Stan](https://mc-stan.org), a state-of-the-art platform for Bayesian inference. - -### Disease mapping and spatial regression - -Model small-area incidence rates with mortality or disease data recorded across areal units like counties or census tracts. - -### Observational uncertainty - -Incorporate information on data reliability into any **geostan** model. Built specifically for American Community Survey (ACS) data. - -### Spatial analysis tools - -Tools for visualizing and measuring spatial autocorrelation and map patterns, for exploratory analysis and model criticism. - -### Custom Stan models - -Tools for building custom spatial models in [Stan](https://mc-stan.org/). - -### RStan ecosystem - -Compatible with a suite of high-quality R packages for Bayesian inference. - -## Installation - -Install **geostan** using: - -``` r -if (!require(drat)) install.packages("drat") -drat::addRepo("connordonegan") -install.packages("geostan") -``` diff --git a/inst/CITATION b/inst/CITATION index 18783e80..b9eea3f6 100755 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,9 +5,9 @@ bibentry( author= "Donegan, Connor", url = "https://connordonegan.github.io/geostan/", year = 2021, - note = "R package version 0.1.1", + note = "R package version 0.1.2", textVersion = paste( - "Donegan, Connor (2021). geostan: Bayesian spatial analysis. R package version 0.1.0.", + "Donegan, Connor (2021). geostan: Bayesian spatial analysis. R package version 0.1.2.", "https://connordonegan.github.io/geostan/" ) ) diff --git a/man/edges.Rd b/man/edges.Rd index a50a8f8a..1a285dd0 100644 --- a/man/edges.Rd +++ b/man/edges.Rd @@ -12,7 +12,7 @@ edges(C, unique_pairs_only = TRUE) \item{unique_pairs_only}{By default, only unique pairs of nodes (i, j) will be included in the output.} } \value{ -Returns a \code{data.frame} with three columns. The first two columns (\code{node1} and \code{node2}) contain the indices of connected pairs of nodes; only unique pairs of nodes are included (unless `unique_pairs_only = FALSE`). The third column (\code{weight}) contains the corresponding matrix element, \code{C[node1, node2]}. +Returns a \code{data.frame} with three columns. The first two columns (\code{node1} and \code{node2}) contain the indices of connected pairs of nodes; only unique pairs of nodes are included (unless \code{unique_pairs_only = FALSE}). The third column (\code{weight}) contains the corresponding matrix element, \code{C[node1, node2]}. } \description{ Creates a list of connected nodes following the graph representation of a spatial connectivity matrix. diff --git a/man/figures/README-example-1.png b/man/figures/README-example-1.png deleted file mode 100644 index 2be0dcf9..00000000 Binary files a/man/figures/README-example-1.png and /dev/null differ diff --git a/man/figures/README-example-2.png b/man/figures/README-example-2.png deleted file mode 100644 index 60ee4edc..00000000 Binary files a/man/figures/README-example-2.png and /dev/null differ diff --git a/man/figures/README-unnamed-chunk-4-1.png b/man/figures/README-unnamed-chunk-4-1.png deleted file mode 100644 index 87b0dedd..00000000 Binary files a/man/figures/README-unnamed-chunk-4-1.png and /dev/null differ diff --git a/man/figures/logo.png b/man/figures/logo.png index f84f3e97..eb042645 100644 Binary files a/man/figures/logo.png and b/man/figures/logo.png differ diff --git a/man/geostan-package.Rd b/man/geostan-package.Rd index d88e3a06..c31ba5be 100644 --- a/man/geostan-package.Rd +++ b/man/geostan-package.Rd @@ -11,11 +11,11 @@ Bayesian spatial modeling powered by Stan. \code{geostan} offers access to a var \references{ Carpenter, B., Gelman, A., Hoffman, M.D., Lee, D., Goodrich, B., Betancourt, M., Brubaker, M., Guo, J., Li, P., Riddell, A., 2017. Stan: A probabilistic programming language. Journal of statistical software 76. \doi{10.18637/jss.v076.i01}. -Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. *Spatial Statistics*. \doi{10.1016/j.spasta.2020.100450} (open access: \doi{10.31219/osf.io/fah3z}). +Donegan, C., Y. Chun and A. E. Hughes (2020). Bayesian estimation of spatial filters with Moran’s Eigenvectors and hierarchical shrinkage priors. \emph{Spatial Statistics}. \doi{10.1016/j.spasta.2020.100450} (open access: \doi{10.31219/osf.io/fah3z}). -Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. *Int. J. Env. Res. and Public Health* 18 (13): 6856. \doi{10.3390/ijerph18136856}. Supplementary material: \url{https://github.com/ConnorDonegan/survey-HBM}. +Donegan, Connor and Chun, Yongwan and Griffith, Daniel A. (2021). Modeling community health with areal data: Bayesian inference with survey standard errors and spatial structure. \emph{Int. J. Env. Res. and Public Health} 18 (13): 6856. \doi{10.3390/ijerph18136856}. Supplementary material: \url{https://github.com/ConnorDonegan/survey-HBM}. -Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. *OSF Preprints*. \doi{10.31219/osf.io/3ey65}. +Donegan, Connor (2021). Spatial conditional autoregressive models in Stan. \emph{OSF Preprints}. \doi{10.31219/osf.io/3ey65}. Gabry, J., Goodrich, B. and Lysy, M. (2020). rstantools: Tools for developers of R packages interfacing with Stan. R package version 2.1.1 \url{https://mc-stan.org/rstantools/}. diff --git a/man/mc.Rd b/man/mc.Rd index 33f50106..1295517d 100644 --- a/man/mc.Rd +++ b/man/mc.Rd @@ -18,7 +18,7 @@ mc(x, w, digits = 3, warn = TRUE) \item{digits}{Number of digits to round results to.} -\item{warn}{If `FALSE`, no warning will be printed to inform you when observations with zero neighbors have been dropped.} +\item{warn}{If \code{FALSE}, no warning will be printed to inform you when observations with zero neighbors have been dropped.} } \value{ The Moran coefficient, a numeric value. diff --git a/man/posterior_predict.Rd b/man/posterior_predict.Rd index 6a359f76..86e45a4d 100644 --- a/man/posterior_predict.Rd +++ b/man/posterior_predict.Rd @@ -11,16 +11,16 @@ posterior_predict(object, S, summary = FALSE, width = 0.95, car_parts, seed) \item{S}{Optional; number of samples to take from the posterior distribution. The default, and maximum, is the total number of samples stored in the model.} -\item{summary}{Should the predictive distribution be summarized by its means and central quantile intervals? If \code{summary = FALSE}, an `S` x `N` matrix of samples will be returned. If \code{summary = TRUE}, then a `data.frame` with the means and `100*width` credible intervals is returned.} +\item{summary}{Should the predictive distribution be summarized by its means and central quantile intervals? If \code{summary = FALSE}, an \code{S} x \code{N} matrix of samples will be returned. If \code{summary = TRUE}, then a \code{data.frame} with the means and \code{100*width} credible intervals is returned.} -\item{width}{Only used if \code{summary = TRUE}, to set the quantiles for the credible intervals. Defaults to `width = 0.95`.} +\item{width}{Only used if \code{summary = TRUE}, to set the quantiles for the credible intervals. Defaults to \code{width = 0.95}.} -\item{car_parts}{Data for CAR model specification; only required for \code{\link[geostan]{stan_car}} with `family = auto_gaussian()`.} +\item{car_parts}{Data for CAR model specification; only required for \code{\link[geostan]{stan_car}} with \code{family = auto_gaussian()}.} \item{seed}{A single integer value to be used in a call to \code{\link[base]{set.seed}} before taking samples from the posterior distribution.} } \value{ -A matrix of size S x N containing samples from the posterior predictive distribution, where S is the number of samples drawn and N is the number of observations. If `summary = TRUE`, a `data.frame` with N rows and 3 columns is returned (with column names `mu`, `lwr`, and `upr`). +A matrix of size S x N containing samples from the posterior predictive distribution, where S is the number of samples drawn and N is the number of observations. If \code{summary = TRUE}, a \code{data.frame} with N rows and 3 columns is returned (with column names \code{mu}, \code{lwr}, and \code{upr}). } \description{ Draw samples from the posterior predictive distribution of a fitted \code{geostan} model. diff --git a/man/sentencing.Rd b/man/sentencing.Rd index 06b9bbef..e93f88f8 100644 --- a/man/sentencing.Rd +++ b/man/sentencing.Rd @@ -7,28 +7,28 @@ \format{ A spatial polygons data frame with the following attributes: \describe{ - \item{name}{County name} - \item{wpop}{White population total for years 1905-1910} - \item{bpop}{Black population total for years 1905-1910} - \item{sents}{Number of state prison sentences, 1905-1910} - \item{plantation_belt}{Binary indicator for inclusion in the plantation belt} - \item{pct_ag_1910}{Percent of land area in agriculture, 1910} - \item{expected_sents}{Expected sentences given demographic information and state level sentencing rates by race} - \item{sir_raw}{Standardized incident ratio (observed/expected sentences)} +\item{name}{County name} +\item{wpop}{White population total for years 1905-1910} +\item{bpop}{Black population total for years 1905-1910} +\item{sents}{Number of state prison sentences, 1905-1910} +\item{plantation_belt}{Binary indicator for inclusion in the plantation belt} +\item{pct_ag_1910}{Percent of land area in agriculture, 1910} +\item{expected_sents}{Expected sentences given demographic information and state level sentencing rates by race} +\item{sir_raw}{Standardized incident ratio (observed/expected sentences)} } } \source{ Donegan, Connor. "The Making of Florida's 'Criminal Class': Race, Modernity and the Convict Leasing Program." Florida Historical Quarterly 97.4 (2019): 408-434. \url{https://osf.io/2wj7s/}. Mullen, Lincoln A. and Bratt, Jordon. "USABoundaries: Historical and Contemporary Boundaries of the United States of America," - Journal of Open Source Software 3, no. 23 (2018): 314, \doi{10.21105/joss.00314}. +Journal of Open Source Software 3, no. 23 (2018): 314, \doi{10.21105/joss.00314}. } \usage{ sentencing } \description{ A spatial polygons data frame of historical 1910 county boundaries of Florida with aggregated state prison sentencing counts and census data. - Sentencing and population counts are aggregates over the period 1905-1910, where populations were interpolated linearly between decennial censuses of 1900 and 1910. +Sentencing and population counts are aggregates over the period 1905-1910, where populations were interpolated linearly between decennial censuses of 1900 and 1910. } \examples{ \dontrun{ diff --git a/man/shape2mat.Rd b/man/shape2mat.Rd index 9e95e806..68664494 100644 --- a/man/shape2mat.Rd +++ b/man/shape2mat.Rd @@ -40,7 +40,7 @@ Creates sparse matrix representations of spatial connectivity structures \details{ Haining and Li (Ch. 4) provide a helpful discussion of spatial connectivity matrices (Ch. 4). -The space-time connectivity matrix can be used for eigenvector space-time filtering (\code{\link[geostan]{stan_esf}}. The `lagged' space-time structure connects each observation to its own past (one period lagged) value and the past value of its neighbors. The `contemporaneous' specification links each observation to its neighbors and to its own in situ past (one period lagged) value (Griffith 2012, p. 23). +The space-time connectivity matrix can be used for eigenvector space-time filtering (\code{\link[geostan]{stan_esf}}. The \verb{lagged' space-time structure connects each observation to its own past (one period lagged) value and the past value of its neighbors. The }contemporaneous' specification links each observation to its neighbors and to its own in situ past (one period lagged) value (Griffith 2012, p. 23). } \examples{ diff --git a/man/waic.Rd b/man/waic.Rd index 689d6221..756ae3de 100644 --- a/man/waic.Rd +++ b/man/waic.Rd @@ -12,7 +12,7 @@ waic(fit, pointwise = FALSE, digits = 2) \arguments{ \item{fit}{An \code{geostan_fit} object or any Stan model with a parameter named "log_lik", the pointwise log likelihood of the observations.} -\item{pointwise}{Logical (defaults to `FALSE`), should a vector of values for each observation be returned?} +\item{pointwise}{Logical (defaults to \code{FALSE}), should a vector of values for each observation be returned?} \item{digits}{Round results to this many digits.} } diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png index eda63b82..e5a1d485 100644 Binary files a/pkgdown/favicon/apple-touch-icon-120x120.png and b/pkgdown/favicon/apple-touch-icon-120x120.png differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png index c2462d00..5884dccd 100644 Binary files a/pkgdown/favicon/apple-touch-icon-152x152.png and b/pkgdown/favicon/apple-touch-icon-152x152.png differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png index c8f9c77b..a976e5b5 100644 Binary files a/pkgdown/favicon/apple-touch-icon-180x180.png and b/pkgdown/favicon/apple-touch-icon-180x180.png differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png index 609b29d9..815f9ae4 100644 Binary files a/pkgdown/favicon/apple-touch-icon-60x60.png and b/pkgdown/favicon/apple-touch-icon-60x60.png differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png index baf34b05..54f002d8 100644 Binary files a/pkgdown/favicon/apple-touch-icon-76x76.png and b/pkgdown/favicon/apple-touch-icon-76x76.png differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png index c8f9c77b..a976e5b5 100644 Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png index dee38c26..51de331e 100644 Binary files a/pkgdown/favicon/favicon-16x16.png and b/pkgdown/favicon/favicon-16x16.png differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png index 020c52dd..12772ca4 100644 Binary files a/pkgdown/favicon/favicon-32x32.png and b/pkgdown/favicon/favicon-32x32.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico index 0cd405b7..f9f83d51 100644 Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ diff --git a/vignettes/measuring-sa.Rmd b/vignettes/measuring-sa.Rmd index 1efeabff..62004ad7 100755 --- a/vignettes/measuring-sa.Rmd +++ b/vignettes/measuring-sa.Rmd @@ -129,32 +129,28 @@ fit <- stan_glm(deaths.male ~ offset(log(pop.at.risk.male)), C = C, refresh = 0 # this line silences Stan's printing ) -print(fit) ``` -Because **geostan** uses Bayesian inference and a Markov chain Monte Carlo (MCMC) algorithm from the Stan modeling language to draw samples from the posterior distribution of parameters, the `fit` object contains not just summaries of results (as printed above) but also full probability distributions for each parameter. We can plot the posterior distribution of any model parameter; below is the probability distribution for the intercept, which is the mean log-county mortality rate. We can see it is centered on $-4.183$, which is a mortality rate of $e^{-4.183} = 153$ per 10,000. - -When necessary, Stan will print important warning messages, such as "Bulk Effective Samples Size (ESS) is too low." Looking at the printed results above, we can see that we kept a total of 4,000 MCMC samples for inference. If we then look at the "n_eff" (i.e., ESS) column in the table of results, we see that the effective sample size is smaller that the nominal sample size of 4,000 (due to serial autocorrelation in the MCMC samples). If the MCMC samples are an approximation of the exact posterior distribution, then the more samples we draw the closer our approximation will be. The Monte Carlo standard error of the estimates ("se_mean") tells us how close we are. To evaluate all model parameters (which you should always do), you can use the following function calls: `rstan::stan_ess(fit$stanfit)`, `rstan::stan_mcse(fit$stanfit)`, and `rstan::stan_rhat(fit$stanfit)` (and see the corresponding help pages, `?rstan::stan_rhat`.) +For a summary of model results: ```{r} -plot(fit, pars = "intercept") +print(fit) ``` -With MCMC samples, we can easily extrapolate our inferences from the posterior distribution of parameters to any function of those parameters [@mackay_2003]. This means we can obtain a posterior distribution for every county mortality rate, and for the difference between the modeled mortality rates (fitted values) and the crude mortality rates (i.e., the residuals: $\text{observed} - \text{modeled}$), and, further, for any function of those residuals. These are generally referred to as 'quantities of interest.' - -Quantities of interest can be useful for model criticism as well as for making inferences from a model. For example, we can measure the degree of spatial autocorrelation in the MCMC samples of residuals, resulting in a probability distribution for the residual autocorrelation. Or, if we wanted to measure health inequality across counties as a function of the county mortality rates, we could calculate that inequality measure for each MCMC sample of fitted values to obtain a probability distribution for the degree of inequality. +The printed summary of results shows that the posterior probability distribution for the intercept, which in this case represents the mean log-mortality rate, is centered on $-4.183$, which is a mortality rate of $e^{-4.183} = 153$ per 10,000. The `2.5%` and `97.5%` columns provide the bounds on the 95\% credible interval (CI) for each parameter; the CI for the intercept is [-4.22, -4.14].^[Stan will print important warning messages when Markov chain Monte Carlo (MCMC) diagnostics indicate any cause for concern, such as "Bulk Effective Samples Size (ESS) is too low." Looking at the printed results, we can see that we kept a total of 4,000 MCMC samples for inference. If we then look at the "n_eff" (i.e., ESS) column in the table of results, we see that the effective sample size is smaller that the nominal sample size of 4,000 (this is almost always the case, due to serial autocorrelation in the MCMC samples). To see diagnostics for all model parameters at once, you can use the following function calls: `rstan::stan_ess(fit$stanfit)`, `rstan::stan_mcse(fit$stanfit)`, and `rstan::stan_rhat(fit$stanfit)` (and see the corresponding help pages, `?rstan::stan_rhat`.)] -Now provide the fitted model, `fit`, and the spatial data, `georgia`, to the `sp_diag` function to see a set of spatial model diagnostics: +Provide the fitted model, `fit`, and the spatial data, `georgia`, to the `sp_diag` function to see a set of spatial model diagnostics: ```{r fig.width = 7.5} sp_diag(fit, georgia) ``` +The point-interval plot on the left shows the raw mortality rates (the raw outcome data) on the x-axis, the fitted values on the y-axis, and a 'perfect fit' (slope = 1, intercept = 0) line for reference. We can see that a number of the fitted values have posterior means that deviate from the observations; but this "shrinkage" towards the mean is not necessarily a problem. In fact, it is often desirable insofar as it indicates that these are counties for which our data provide very little evidence as to what the risk of death is (i.e., the population is very small). (For an introductory discussion of information pooling and other topics as well, see @mcelreath_2016). -The point-interval plot on the left shows the raw mortality rates (the raw outcome data) on the x-axis, the fitted values on the y-axis, and a 'perfect fit' (slope = 1, intercept = 0) line for reference. We can see that a number of the fitted values have posterior means that deviate from the observations; but this "shrinkage" towards the mean is not necessarily a problem. In fact, it is often desirable insofar as it indicates that these are counties for which our data provide very little evidence as to what the risk of death is (i.e., the population is very small). (For a good introductory discussion of information pooling and many other topics as well, see @mcelreath_2016). +The middle panel is a Moran scatter plot of the model residuals, and the map shows the mean residual for each county. The residuals have been taken at their marginal posterior means. However, there is more than one way to measure residual autocorrelation. For an alternative visualization that uses the entire posterior distribution of parameters and provides an estimate of the residual Moran coefficient that will match the printed model results above (`MC = 0.022`), try `sp_diag(fit, georgia, mc_style = "hist")`. -The middle panel represents spatial autocorrelation in the residuals as measured by the Moran coefficient (i.e., autocorrelation in the joint probability distribution of residuals), and the map shows the mean residual for each county (marginal posterior means). In this case, the MC histogram shows that there is a small amount of residual autocorrelation, while the map indicates that this derives mainly from a north-south/metropolitan-rural trend. The trend in the residuals helps us see that shrinking towards the mean mortality rate is less than ideal in this case because we can see that county mortality rates are higher in the southern half of the state than in the greater Atlanta metropolitan area. +In this case, there is a very small amount of residual autocorrelation, and the map indicates that this derives from a slight north-south/metropolitan-rural trend. The trend in the residuals helps us see that shrinking towards the mean mortality rate is less than ideal in this case because we can see that county mortality rates are higher in the southern half of the state than in the greater Atlanta metropolitan area. -We could probably do better than shrinking towards the mean by using one of **geostan**'s spatial models (see the examples in `?stan_car`) or by adding one or more (substantively meaningful) covariates. +We could extend this model by using one of **geostan**'s spatial models (see the examples in `?stan_car`) or by adding one or more (substantively meaningful) covariates. ## References diff --git a/vignettes/spatial-me-models.Rmd b/vignettes/spatial-me-models.Rmd index ba5ca2d5..0de6de89 100644 --- a/vignettes/spatial-me-models.Rmd +++ b/vignettes/spatial-me-models.Rmd @@ -166,7 +166,7 @@ We get three plots: * A point-interval plot showing the ACS estimates on the horizontal axis against a summary of the posterior distribution on the vertical axis. This provides an indication of 1) the amount of uncertainty present in each $x_i$, and 2) the degree to which the mean of the posterior probability distribution for $x_i$ may differ from the raw survey estimates ($\delta_i$). -* A histogram of Moran coefficients calculated for each MCMC sample. The mean of the samples is printed at the top. Zero spatial autocorrelation is indicated by a small negative value (unlike the correlation coefficient, the midpoint of the MC is $-1/(n-1)$ [@chun_2013]). +* A Moran scatter plot of the $\delta_i$ values. Zero spatial autocorrelation is indicated by a small negative value (unlike the correlation coefficient, the midpoint of the MC is $-1/(n-1)$ [@chun_2013]). (Alternatively, autocorrelation can be visualized with a histogram of Moran coefficients that are calculated for each MCMC sample; se the `mc_style` argument.) * A map of the posterior mean for each $\delta_i$ value. @@ -205,7 +205,7 @@ print(fit$stanfit, pars = "x_ICE[91]") # plot(fit, pars = "x_ICE[91]") # plot(fit$stanfit, pars = "x_ICE[91]") ``` -Given the socioeconomic and demographic information on Clinch County, one could argue that the model is being overly conservative by shrinking towards the mean value. However, by examining full posterior distributions, we see in this case that the results are not particularly vulnerable to this concer due to the fact that the model is still positing that the raw estimate *and* more extreme values are quite plausible. Of course, this was already apparent from examination of the `me_diag` plots. +Given the socioeconomic and demographic information on Clinch County, one could argue that the model is being overly conservative by shrinking towards the mean value. However, by examining full posterior distributions, we see in this case that the results are not particularly vulnerable to this concern due to the fact that the model is still positing that the raw estimate *and* more extreme values are quite plausible. Of course, this was already apparent from examination of the `me_diag` plots. ### Working with MCMC samples from ME models @@ -253,7 +253,7 @@ Incorporating these ME models into any other **geostan** model is as simple as r At this point, we can introduce a more appropriate model for the mortality data. We will use a Poisson likelihood for the counts of deaths, provide the log-population at risk as an offset term, and pool information across counties using a non-spatial Gaussian model for the log-rates, $\boldsymbol \phi$: $$y_i \sim Pois(e^{log(P_i) + \phi_i}) \\ \boldsymbol \phi \sim Gauss(\alpha + \boldsymbol x \beta, I \tau^2),$$ -where $\alpha$ is the mean log-mortality rate and $\beta$ is the regression coefficient on the modeled ICE, $\boldsymbol x$. $\tau^2$ is the variance of the log-mortality rates around the fitted regression line, $\alpha + X\beta$. This model for $\boldsymbol \phi$ is equivalent to a CAR model with $\rho=0$ and $M=I\tau^2$. +where $\alpha$ is the mean log-mortality rate and $\beta$ is the regression coefficient on the modeled ICE, $\boldsymbol x$. $\tau^2$ is the variance of the log-mortality rates around the fitted regression line, $\alpha + X\beta$. This model for $\boldsymbol \phi$ is equivalent to a CAR model with $\rho=0$ and $M=I\tau^2$ (a constant variance). ```{r eval = TRUE} fit_2 <- stan_glm(deaths.male ~ offset(log(pop.at.risk.male)) + ICE, @@ -285,7 +285,6 @@ ggplot(georgia) + geom_point(aes(ICE, log(rate.male), col = ICE.se), shape = 6, lwd = 2) + -# geom_label(label = row.names(georgia), aes(ICE, log(rate.male), col = ICE.se)) + labs(x = "ICE Estimate", y = "Crude log mortality") + scale_colour_gradient(low = "white", high = "darkred", name = "SE(ICE)") + theme(panel.background = element_rect(fill = 'gray20'),