Merge pull request #2 from dmlond/master

jmbvitor · jmbvitor · commit b247cb344d0d · 2015-05-12T17:33:44.000+01:00
Here are the Answers to 5-11-15
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,8 @@
+*bwa*
+.Rhistory
 .vagrant
 packer_cache/
+*sai*
+*fasta*
+*fastq*
+*sam*
diff --git a/conf/vagrant/Vagrantfile b/conf/vagrant/Vagrantfile
@@ -1,40 +1,11 @@
-# -*- mode: ruby -*-
-# vi: set ft=ruby :
-
 # Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
 VAGRANTFILE_API_VERSION = "2"
 
 Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
-  # All Vagrant configuration is done here. The most common configuration
-  # options are documented and commented below. For a complete reference,
-  # please see the online documentation at vagrantup.com.
-
-  # Every Vagrant virtual environment requires a box to build off of.
+  
+  # We are using Ubuntu 14.04 "Trusty Tahr"
   config.vm.box = "ubuntu/trusty64"
 
-  # Disable automatic box update checking. If you disable this, then
-  # boxes will only be checked for updates when the user runs
-  # `vagrant box outdated`. This is not recommended.
-  # config.vm.box_check_update = false
-
-  # Create a forwarded port mapping which allows access to a specific port
-  # within the machine from a port on the host machine. In the example below,
-  # accessing "localhost:8080" will access port 80 on the guest machine.
-  # config.vm.network "forwarded_port", guest: 80, host: 8080
-
-  # Create a private network, which allows host-only access to the machine
-  # using a specific IP.
-  # config.vm.network "private_network", ip: "192.168.33.10"
-
-  # Create a public network, which generally matched to bridged network.
-  # Bridged networks make the machine appear as another physical device on
-  # your network.
-  # config.vm.network "public_network"
-
-  # If true, then any SSH connections made will enable agent forwarding.
-  # Default value: false
-  # config.ssh.forward_agent = true
-
   # Share an additional folder to the guest VM. The first argument is
   # the path on the host to the actual folder. The second argument is
   # the path on the guest to mount the folder. And the optional third
@@ -44,79 +15,21 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
   # Provider-specific configuration so you can fine-tune various
   # backing providers for Vagrant. These expose provider-specific options.
   # Example for VirtualBox:
-  #
   config.vm.provider "virtualbox" do |vb|
+    
      # (Don't) boot with headless mode
      vb.gui = true
   
      # Use VBoxManage to customize the VM. For example to change memory:
      vb.customize ["modifyvm", :id, "--memory", "2048"]
   end
-  #
-  # View the documentation for the provider you're using for more
-  # information on available options.
-
-  # Enable provisioning with CFEngine. CFEngine Community packages are
-  # automatically installed. For example, configure the host as a
-  # policy server and optionally a policy file to run:
-  #
-  # config.vm.provision "cfengine" do |cf|
-  #   cf.am_policy_hub = true
-  #   # cf.run_file = "motd.cf"
-  # end
-  #
-  # You can also configure and bootstrap a client to an existing
-  # policy server:
-  #
-  # config.vm.provision "cfengine" do |cf|
-  #   cf.policy_server_address = "10.0.2.15"
-  # end
 
   # Enable provisioning with Puppet stand alone.  Puppet manifests
   # are contained in a directory path relative to this Vagrantfile.
   # You will need to create the manifests directory and a manifest in
   # the file default.pp in the manifests_path directory.
-  #
   config.vm.provision "puppet" do |puppet|
      puppet.manifests_path = "manifests"
      puppet.manifest_file  = "default.pp"
   end
-
-  # Enable provisioning with chef solo, specifying a cookbooks path, roles
-  # path, and data_bags path (all relative to this Vagrantfile), and adding
-  # some recipes and/or roles.
-  #
-  # config.vm.provision "chef_solo" do |chef|
-  #   chef.cookbooks_path = "../my-recipes/cookbooks"
-  #   chef.roles_path = "../my-recipes/roles"
-  #   chef.data_bags_path = "../my-recipes/data_bags"
-  #   chef.add_recipe "mysql"
-  #   chef.add_role "web"
-  #
-  #   # You may also specify custom JSON attributes:
-  #   chef.json = { mysql_password: "foo" }
-  # end
-
-  # Enable provisioning with chef server, specifying the chef server URL,
-  # and the path to the validation key (relative to this Vagrantfile).
-  #
-  # The Opscode Platform uses HTTPS. Substitute your organization for
-  # ORGNAME in the URL and validation key.
-  #
-  # If you have your own Chef Server, use the appropriate URL, which may be
-  # HTTP instead of HTTPS depending on your configuration. Also change the
-  # validation key to validation.pem.
-  #
-  # config.vm.provision "chef_client" do |chef|
-  #   chef.chef_server_url = "https://api.opscode.com/organizations/ORGNAME"
-  #   chef.validation_key_path = "ORGNAME-validator.pem"
-  # end
-  #
-  # If you're using the Opscode platform, your validator client is
-  # ORGNAME-validator, replacing ORGNAME with your organization name.
-  #
-  # If you have your own Chef Server, the default validation client name is
-  # chef-validator, unless you changed the configuration.
-  #
-  #   chef.validation_client_name = "ORGNAME-validator"
 end
diff --git a/conf/vagrant/manifests/default.pp b/conf/vagrant/manifests/default.pp
@@ -38,7 +38,7 @@
 		creates   => '/usr/local/src/bwa-0.7.12.tar.bz2',
 		require   => Package[ 'wget' ];			
 	'unzip_bwa':
-		command   => 'bunzip2 bwa-0.7.12.tar.bz2',
+		command   => 'bunzip2 --keep bwa-0.7.12.tar.bz2',
 		cwd       => '/usr/local/src',
 		creates   => '/usr/local/src/bwa-0.7.12.tar',       
 		require   => [ Exec[ 'dl_bwa' ], Package[ 'bzip2' ] ];
@@ -64,7 +64,7 @@
 		creates   => '/usr/local/src/samtools-1.2.tar.bz2',      
 		require   => Package[ 'wget' ];
 	'unzip_samtools':
-		command   => 'bunzip2 samtools-1.2.tar.bz2',
+		command   => 'bunzip2 --keep samtools-1.2.tar.bz2',
 		cwd       => '/usr/local/src',
 		creates   => '/usr/local/src/samtools-1.2.tar',
 		require   => [ Exec[ 'dl_samtools' ], Package[ 'bzip2' ] ];
@@ -82,5 +82,22 @@
 		command   => 'ln -s /usr/local/src/samtools-1.2/samtools /usr/local/bin/samtools',
 		creates   => '/usr/local/bin/samtools',       
 		require   => Exec[ 'make_samtools' ];
+	
+	# clone the project repo
+	'clone_repo':
+		command   => 'git clone https://github.com/dmlond/arangs2015.git',
+		cwd       => '/home/vagrant',
+		creates   => '/home/vagrant/arangs2015',
+		require   => Package[ 'git' ];
+	'chown_repo':
+		command   => 'sudo chown -R vagrant /home/vagrant/arangs2015',
+		require   => Exec[ 'clone_repo' ];
+	'rm_repo_data':
+		command   => 'rm -rf /home/vagrant/arangs2015/data',
+		require   => Exec[ 'clone_repo' ];
+	'symlink_data':
+		command   => 'ln -s /vagrant_data /home/vagrant/arangs2015/data',
+		require   => Exec[ 'rm_repo_data' ];
+		
 
-}
+}
diff --git a/docs/2015-05-11/git/Worksheet.md b/docs/2015-05-11/git/Worksheet.md
@@ -1,34 +1,116 @@
+create a ~/.gitconfig file with our [example](https://github.com/dmlond/arangs2015/blob/master/docs/2015-05-11/git/example_gitconfig)
 - Clone your personal fork of our repository onto your machine
+choose the SSH clone url
+```bash
+$ git clone your_ssh_clone_url
+```
 - Use git to show information about the commit history for the project
+```bash
+$ git log
+```
 - Show information about files changed during the commits
+```bash
+$ git log --name-only
+```
 - Show information about files changed, with the status of each change for
 commit 4c4ff5ece4c3132c6fa29f3e826e3272686c78e0 (try 4c4ff5ece4)
+```bash
+$ git log -1 -U --name-status -c 4c4ff5ece4c3132c6fa29f3e826e3272686c78e0
+$ git log -1 -U --name-status 4c4ff5ece4
+$ git show 4c4ff5ece4
+```
 - add a file to the repository (use your favorite text editor)
 - use git status to find out the state of the repository
 - try git status --porcelain
 - What if you didnt like this file, how could you get rid of it (dont do it)
+```bash
+$ git checkout -- filename
+```
 - stage this file change
+```bash
+$ git add filename
+```
 - what do git status, and git status --porcelain show
 - How could you get rid of this file now? Go ahead and fully delete the file.
+```bash
+$ git reset HEAD file
+$ rm file
+```
 - Create a new file, and make sure it gets committed to the repository
 - What remote git repositories does your git repository know about?
+```bash
+$ git remote -v
+```
 - push your new changes to the file up to your github repository
+```bash
+$ git push
+```
 - Find the newly committed file in your github repository.  Edit it using
 Github and make a change.
 - Go back to the commandline and use git to pull these changes
+```bash
+$ git pull
+```
 - unix rm the file you created.
+```bash
+$ rm file
+```
 - What do git status and git status --porcelain show?
 - Can you get it back?
+```bash
+$ git checkout -- file
+```
 - rm it again, then stage the removal.
+```bash
+$ rm file
+$ git rm file
+```
 - get it back again
+```bash
+$ git reset HEAD file
+$ git checkout -- file
+```
 - git rm the file.  How is this different than using unix rm?
+you can git rm a file, and it will physically remove it and git rm it at
+the same time
+
 - Commit the removal of the file, and push the changes up to Github
+```bash
+$ git commit
+```
 - Use the Github Web interface to inspect changes for this file.  Can you
 still find the contents of the file in Github?  Do you think it is a
 good idea to store usernames and passwords in publicly available GitHub accounts?
+
+NO, because GitHub allows you to review the history of your files, where they can
+see the passwords.  There are ways to remove them, but it is not easy
+
 - See how far you can go with this by changing and staging various file combinations, then changing already staged files, then reverse staged changes, untracked changes of files with staged changes, and fully revert some of the files.
     - Avoiding what git command will ensure that none of these changes ever
     make it into the git log for the repository, or get pushed to a remote
+         git commit
     - What if you had committed (without pushes) a bunch of changes to your local fork of the repo, but decided that you didnt like any of it, and would like to get rid of all those commits and revert to the state of the repo as it exists in GitHub.  What would be the easiest way to do this?
+    ```bash
+    $ cd ..
+    $ rm -rf arangs2015
+    $ git clone SSH-CLONE-URL
+    ```
 - Edit one of the files, but do not add the changes.  Create a branch called 'try_bowtie'.  Use git status to find out the state of the repo.  Add and commit
 your changes to the branch.  Use git to find out all the branches you have made (you might see branches pulled in when you forked the repository from someone else).  Change to the master branch.  Use git status to find out the state of the repository.  Can you find your changes?  Switch back to the 'try_bowtie' branch.  Can you find your changes now?  Switch back to the master branch.  Merge try_bowtie in to the master branch.  Use git status to find the status of the master branch. Commit and push these changes to Github.  Use the Github web interface to find out if the branch was pushed. Remove the 'try_bowtie' branch from your repository.
+```bash
+$ echo "CHANGED" >> changed_file
+$ git checkout -b try_bowtie
+$ git status
+$ git add changed_file
+$ git commit changed_file
+$ git branch
+$ git checkout master
+$ git status
+$ git checkout try_bowtie
+$ git status
+$ git checkout master
+$ git merge try_bowtie
+$ git commit
+$ git push
+$ git branch -D try_bowtie
+```
diff --git a/docs/2015-05-11/github/Worksheet.md b/docs/2015-05-11/github/Worksheet.md
@@ -3,24 +3,31 @@ GitHub Worksheet
 
 * Create your personal Github Account
 * Find the Arangs2015 repository
-  - Does it have an Open Source License?
-  - Who Owns the Repository?
-  - How many people have forked the repository?
-  - How many contributors are working on this repository?
-  - Who are they?
-  - How many commits have been made? Who made the latest commit?
+  - Does it have an Open Source License? MIT
+  - Who Owns the Repository? dmlond
+  - How many people have forked the repository? 11 (and counting)
+  - How many contributors are working on this repository? 2
+  - Who are they? dmlond, rvosa
+  - How many commits have been made? 73 Who made the latest commit? dmlond
 * Take a look at the data directory.
-  - Where did we get the data that we analyzed for our pipeline?
-  - Do we make it easy for you to download the same data that we used?
+  - Where did we get the data that we analyzed for our pipeline? sra dnanexus
+  - Do we make it easy for you to download the same data that we used? yes by providing links to the actual files
 * Go back to the bin directory
-  - What is consistently found in each directory?
+  - What is consistently found in each directory? Readme.md
   - View the pipeline.sh file
   - View it in raw mode
-  - Who can you blame for this file?
+  - Who can you blame for this file? dmlond and rvosa
   - What is its history?
 * Go back to the repository root
   - What is the git clone url for the repository?
+    https https://github.com/dmlond/arangs2015.git
+    ssh git@github.com:dmlond/arangs2015.git
+    You should clone the ssh one to push stuff back
 * Fork our Repository
 * Check to make sure the `git` executable is available. If not, install it.
+sudo yum install git
 * [Create an SSH key](https://help.github.com/articles/generating-ssh-keys/) so that you can commit and `push` changes back to your repository.
+```bash
+$ ssh-keygen -t rsa
+```
 * [Merge the changes made upstream into your local fork](https://help.github.com/articles/merging-an-upstream-repository-into-your-fork/)
diff --git a/docs/2015-05-11/local_install/Worksheet.md b/docs/2015-05-11/local_install/Worksheet.md
@@ -1,15 +1,24 @@
 We are going to try to run [pipeline.sh](https://github.com/dmlond/arangs2015/blob/master/bin/pipeline.sh). Have a look at the script.
 
-- How many values does the `$FASTQS` variable hold?
-- How many values will the `$SAIS` variable hold when running the script? Why?
-- How many times will the `$SAM` variable be assigned a value? Is it the same value? Why?
+- How many values does the `$FASTQS` variable hold? 2
+- How many values will the `$SAIS` variable hold when running the script? 2 Why? because it loops over $FASTQS
+- How many times will the `$SAM` variable be assigned a value? 2 Is it the same value? No Why? Because it uses a different value of $OUTFILE each time
 - What's the function of the if/else statements, why would they be there?
+to check for the existence of files and not run parts of the pipeline if they already exist
 - Which programs need to be installed to run pipeline.sh?
+bwa, samtools
 - Which versions of these programs do we need (consult the download* scripts)?
-- Download and (try to) compile these programs. 
+bwa 0.7.12
+samtools 1.2
+- Download and (try to) compile these programs.
 - Are there any libraries missing?
 - If there are missing libraries, try to install these with the package manager (`apt-get`)
 - Make sure the compiled programs are on the `$PATH`.
-- Download the data. If you use the download* scripts for this, where will the data end up?
-- Will the pipeline.sh be able to find the data there? 
+```bash
+export PATH="/dir/of/program:${PATH}"
+```
+- Download the data. If you use the download* scripts for this, where will the data end up? ../data
+- Will the pipeline.sh be able to find the data there?
+yes, because it hardcodes it
 - Does it matter where you run the pipeline?
+yes, because it hardcodes data relative to the bin directory
diff --git a/docs/2015-05-11/mindmap_day1.xmind b/docs/2015-05-11/mindmap_day1.xmind
diff --git a/docs/2015-05-11/shell/README.md b/docs/2015-05-11/shell/README.md
@@ -0,0 +1,18 @@
+On Linux, every command (by first approximation) is an executable that resides somewhere on the file system. 
+When you type a command, the operating system scans a select number of folders that might contain the 
+executable that corresponds with the command you typed. This list of folders is defined by the environment
+variable `$PATH`. So, if you type a command that corresponds with an executable that is not in any of the
+folders that are in the `$PATH`, the command will not be found. It does not matter that you are INSIDE the
+folder with the executable RIGHT THERE: it is still not going to be found. How to change that? By updating
+the value of the `$PATH` environment variable. Like so:
+
+ `export PATH=/folder/of/executable:$PATH`
+
+A couple of important points:
+- the folders in the list are separated by `:`
+- using the bash shell (!) the list is read from left to right. In CShell (`csh`) it is read right to left.
+- in general, shells split "words" on spaces, unless the whole sentence is inside quotes. So, if any of the
+folders has spaces in it (`/Documents and Settings/`) you have to quote the list, otherwise it will stop
+at `/Documents`
+- If you want to put a word right after `$PATH`, you can delimit the variable name with curly braces: 
+`${PATH}foo`
diff --git a/docs/2015-05-12/Worksheet.md b/docs/2015-05-12/Worksheet.md