Locked History Actions

attachment:step_10.py of Events/GCC2013/TrainingDay/API

Attachment 'step_10.py'

Download

   1 #!/usr/bin/env python
   2 
   3 """
   4 Step 10
   5 ======
   6 In this step, we'll do everything from steps 1 through 9 - then,
   7 we'll copy the forward and reverse read HDAs to a library.
   8 
   9 The output will be in a new history named 'Step 10'.
  10 """
  11 import os
  12 import sys
  13 import pprint
  14 import time
  15 
  16 import users_1
  17 import histories_3
  18 import tools_1
  19 import workflows_1
  20 import datasets_3
  21 import hdas_3
  22 
  23 # at this point you should know the drill - we need several functions from these
  24 import libraries_1
  25 import lddas_1
  26 
  27 
  28 NEW_HISTORY_NAME = 'Step 10'
  29 DATA_TO_UPLOAD = 'data/myIlluminaRun.solexa.fastq'
  30 
  31 SOLEXA_QC_WORKFLOW_NAME = 'Joined Solexa QC'
  32 SOLEXA_QC_WORKFLOW_INPUT_STEP = 6
  33 
  34 STATISTICS_DATASET_NAME = 'statistics'
  35 STATISTICS_DATASET_COLUMN = 5
  36 
  37 FREAD_DATASET_NAME = 'forward reads'
  38 RREAD_DATASET_NAME = 'reverse reads'
  39 
  40 # here's the name of the library to copy the reads to
  41 LIBRARY_FOR_READS = 'Reads Library'
  42 
  43 # ----------------------------------------------------------------------------- main
  44 if __name__ == '__main__':
  45     try:
  46         # check the connection
  47         users = users_1.get_users()
  48 
  49         # create a new history
  50         new_history = histories_3.create_history( NEW_HISTORY_NAME )
  51         print 'created history!', new_history[ 'name' ]
  52         new_history_id = new_history[ 'id' ]
  53         new_history_details = histories_3.get_history( new_history_id )
  54 
  55         # upload a file
  56         tool_output_datasets = tools_1.upload_hda( new_history_id, DATA_TO_UPLOAD )
  57         uploaded_file_data = tool_output_datasets[ 'outputs' ][0]
  58         print 'uploaded hda!', uploaded_file_data[ 'name' ]
  59         uploaded_file_id = uploaded_file_data[ 'id' ]
  60 
  61         # and use get_hda to get details on the new, uploaded HDA
  62         uploaded_hda_details = hdas_3.get_hda( new_history_id, uploaded_file_id )
  63                                # ^^^ --------------------------------------- we're using a new version
  64                                #                                             - don't forget to change things like this
  65         uploaded_hda_state = uploaded_hda_details[ 'state' ]
  66 
  67         # wait for the upload to finish
  68         while uploaded_hda_state != 'ok':
  69             print '\t uploaded_hda_state:', uploaded_hda_state
  70             print '\t (waiting 4 seconds...)'
  71             time.sleep( 4.0 )
  72 
  73             # keep checking to get any new state the HDA might move into
  74             uploaded_hda_details = hdas_3.get_hda( new_history_id, uploaded_file_id )
  75             uploaded_hda_state = uploaded_hda_details[ 'state' ]
  76 
  77         # here's the new stuff...moving fast now
  78 
  79         # get the info of the all workflows available to us
  80         all_workflows = workflows_1.get_workflows()
  81 
  82         # let's search that info for the name of the one we want in the list of all workflows
  83         found_workflow = None
  84         for workflow in all_workflows:
  85             if workflow[ 'name' ] == SOLEXA_QC_WORKFLOW_NAME:
  86                 found_workflow = workflow
  87 
  88         if not found_workflow:
  89             raise Exception( 'If you see this error, let one of the workhop presenters know' )
  90 
  91         print 'found workflow!', found_workflow[ 'name' ]
  92         target_workflow_id = found_workflow[ 'id' ]
  93         target_workflow_details = workflows_1.get_workflow( target_workflow_id )
  94 
  95         # now we'll run it:
  96         print 'running',  found_workflow[ 'name' ], 'workflow...'
  97         workflow_output = workflows_1.run_single_input_workflow_on_hda( target_workflow_id,
  98             new_history_id, uploaded_file_id, SOLEXA_QC_WORKFLOW_INPUT_STEP )
  99         print 'workflow started!'
 100 
 101         # the 'outputs' list of the workflow_output dictionary are the ids of the HDAs the workflow creates
 102         output_hda_ids = workflow_output[ 'outputs' ]
 103 
 104         # wait for them all to finish
 105         for hda_id in output_hda_ids:
 106             workflow_hda_details = hdas_3.get_hda( new_history_id, hda_id )
 107             workflow_hda_state = workflow_hda_details[ 'state' ]
 108             workflow_hda_name = workflow_hda_details[ 'name' ]
 109             print workflow_hda_name
 110 
 111             while workflow_hda_state != 'ok':
 112                 print '\t state:', workflow_hda_state
 113                 print '\t (waiting 4 seconds...)'
 114                 time.sleep( 4.0 )
 115 
 116                 # keep checking to get any new state the HDA might move into
 117                 workflow_hda_details = hdas_3.get_hda( new_history_id, hda_id )
 118                 workflow_hda_state = workflow_hda_details[ 'state' ]
 119 
 120             print '\t ok'
 121         print 'workflow complete!'
 122 
 123         # get some statistics using the datasets api
 124         found_statistics = None
 125         hda_summaries = hdas_3.get_hdas( new_history_id )
 126         for hda_summary in hda_summaries:
 127             if hda_summary[ 'name' ] == STATISTICS_DATASET_NAME:
 128                 found_statistics = hda_summary
 129                 break
 130         if not found_statistics:
 131             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 132         statistics_dataset_id = found_statistics[ 'id' ]
 133 
 134         column_data = datasets_3.get_dataset_column( statistics_dataset_id, STATISTICS_DATASET_COLUMN )
 135         column_metadata = column_data[ 'meta' ][0]
 136         mean_perbase_quality = column_metadata[ 'mean' ]
 137         median_perbase_quality = column_metadata[ 'median' ]
 138         print 'mean:', mean_perbase_quality, 'median:', median_perbase_quality
 139 
 140         # now we'll rename and annotate the split forward and reverse reads
 141         # we'll need to find them first - familiar pattern
 142         forward_reads_hda = None
 143         reverse_reads_hda = None
 144         # we've already got a list from the statistics step above
 145         for hda_summary in hda_summaries:
 146             if   hda_summary[ 'name' ] == FREAD_DATASET_NAME:
 147                 forward_reads_hda = hda_summary
 148             elif hda_summary[ 'name' ] == RREAD_DATASET_NAME:
 149                 reverse_reads_hda = hda_summary
 150         if not forward_reads_hda or not reverse_reads_hda:
 151             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 152         print 'found fwd/rev reads:', forward_reads_hda[ 'name' ], reverse_reads_hda[ 'name' ]
 153 
 154         # first, we'll use the first part of the file we uploaded for part of the new names
 155         uploaded_basename = os.path.basename( DATA_TO_UPLOAD )
 156         uploaded_prefix = uploaded_basename.split( '.' )[0]
 157         annotation_string  = 'mean perbase quality: ' + str( mean_perbase_quality ) + '; '
 158         annotation_string += 'median perbase quality: ' + str( median_perbase_quality ) + '; '
 159 
 160         forward_reads_hda_id = forward_reads_hda[ 'id' ]
 161         hdas_3.update_hda( new_history_id, forward_reads_hda_id, {
 162             'name'          : uploaded_prefix + '.fwd.fastqsanger',
 163             'annotation'    : annotation_string
 164         })
 165 
 166         reverse_reads_hda_id = reverse_reads_hda[ 'id' ]
 167         hdas_3.update_hda( new_history_id, reverse_reads_hda_id, {
 168             'name'          : uploaded_prefix + '.rev.fastqsanger',
 169             'annotation'    : annotation_string
 170         })
 171 
 172         forward_reads_hda_details = hdas_3.get_hda( new_history_id, forward_reads_hda_id )
 173         forward_reads_hda_name = forward_reads_hda_details[ 'name' ]
 174         reverse_reads_hda_details = hdas_3.get_hda( new_history_id, reverse_reads_hda_id )
 175         reverse_reads_hda_name = reverse_reads_hda_details[ 'name' ]
 176         print 'changed read HDA names and annotations:', forward_reads_hda_name, reverse_reads_hda_name
 177 
 178         # now - we'll move the QC'd fastq data into a public library...
 179 
 180         # first, we'll have to find the right library - we'll get the names of accessible libraries and search
 181         found_library = None
 182         libraries = libraries_1.get_libraries()
 183         for library in libraries:
 184             if library[ 'name' ] == LIBRARY_FOR_READS:
 185                 found_library = library
 186         if not found_library:
 187             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 188         library = found_library
 189         print 'found reads library:', library[ 'name' ]
 190 
 191         # it would be better to have a new folder for each set of reads, but
 192         #   for simplicity's sake, we'll copy to the root folder
 193         # let's find that using lddas_1.get_lddas
 194         library_id = library[ 'id' ]
 195         library_contents = lddas_1.get_lddas( library_id )
 196 
 197         root_folder = None
 198         for contents in library_contents:
 199             # since libraries (unlike histories) can contain other containers (folders) and 'be nested'
 200             #   both library folders AND lddas will be returned from the API call
 201             # we can distinguish between the two using the field/attribute 'type'
 202             if contents[ 'type' ] == 'folder' and contents[ 'name' ] == '/':
 203                 root_folder = contents
 204                 break
 205         if not root_folder:
 206             raise Exception( 'If you see this error, let one of the workhop presenters know' )
 207         print 'found root folder:', root_folder[ 'name' ]
 208 
 209         # now, we'll copy the read HDAs to the root folder using lddas_1.copy_hda_to_ldda
 210         print 'copying HDAs to library:', library[ 'name' ]
 211         root_folder_id = root_folder[ 'id' ]
 212         print '\t forward read:', forward_reads_hda_name
 213         fwd_returned = lddas_1.copy_hda_to_ldda( library_id, root_folder_id, forward_reads_hda_id )
 214         print '\t reverse read:', reverse_reads_hda_name
 215         rev_returned = lddas_1.copy_hda_to_ldda( library_id, root_folder_id, reverse_reads_hda_id )
 216 
 217         #NOTE: that this file is verbose in order to make things clear - if you end up writing functions or scripts
 218         #   this long, it's good practice to move parts into their own functions
 219         
 220         print "We're done! Congrats on getting this far. We'd like to thank you by buying you a beer"
 221         print "(Please see Nate to collect)"
 222 
 223     except Exception, exc:
 224         print 'Error copying HDAs to library:', str( exc )
 225         sys.exit( 1 )
 226 
 227     print 'Forward reads in library:'
 228     pprint.pprint( fwd_returned, indent=2 )
 229     print 'Reverse reads in library:'
 230     pprint.pprint( rev_returned, indent=2 )

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2013-06-24 16:01:09, 8.2 KB) [[attachment:Galaxy-Workflow-Joined_Solexa_QC.ga]]
  • [get | view] (2013-06-24 16:46:36, 14.0 KB) [[attachment:all-scripts.tar.gz]]
  • [get | view] (2013-06-24 16:01:23, 5.2 KB) [[attachment:common.py]]
  • [get | view] (2013-06-24 16:04:07, 0.9 KB) [[attachment:datasets_1.py]]
  • [get | view] (2013-06-24 16:04:15, 1.2 KB) [[attachment:datasets_2.py]]
  • [get | view] (2013-06-24 16:04:22, 3.4 KB) [[attachment:datasets_3.py]]
  • [get | view] (2013-06-24 16:04:33, 1.5 KB) [[attachment:hdas_1.py]]
  • [get | view] (2013-06-24 16:04:50, 1.7 KB) [[attachment:hdas_2.py]]
  • [get | view] (2013-06-24 16:05:07, 2.2 KB) [[attachment:hdas_3.py]]
  • [get | view] (2013-06-24 16:03:38, 0.9 KB) [[attachment:histories_1.py]]
  • [get | view] (2013-06-24 16:03:52, 2.1 KB) [[attachment:histories_2.py]]
  • [get | view] (2013-06-24 16:05:25, 2.2 KB) [[attachment:histories_3.py]]
  • [get | view] (2013-06-24 16:05:34, 3.3 KB) [[attachment:lddas_1.py]]
  • [get | view] (2013-06-24 16:05:41, 1.3 KB) [[attachment:libraries_1.py]]
  • [get | view] (2013-06-24 16:01:36, 0.8 KB) [[attachment:setup.py]]
  • [get | view] (2013-06-24 16:02:45, 1.1 KB) [[attachment:step_1.py]]
  • [get | view] (2013-06-24 16:07:15, 9.9 KB) [[attachment:step_10.py]]
  • [get | view] (2013-06-24 16:03:28, 1.4 KB) [[attachment:step_2.py]]
  • [get | view] (2013-06-24 16:06:01, 1.6 KB) [[attachment:step_3.py]]
  • [get | view] (2013-06-24 16:06:11, 1.1 KB) [[attachment:step_4.py]]
  • [get | view] (2013-06-24 16:06:20, 1.9 KB) [[attachment:step_5.py]]
  • [get | view] (2013-06-24 16:06:44, 2.5 KB) [[attachment:step_6.py]]
  • [get | view] (2013-06-24 16:06:53, 4.9 KB) [[attachment:step_7.py]]
  • [get | view] (2013-06-24 16:07:05, 5.9 KB) [[attachment:step_8.py]]
  • [get | view] (2013-06-30 13:51:46, 8.0 KB) [[attachment:step_9.py]]
  • [get | view] (2013-06-24 16:41:51, 1.8 KB) [[attachment:tools_1.py]]
  • [get | view] (2013-06-24 16:44:32, 2.0 KB) [[attachment:upload_to_history.py]]
  • [get | view] (2013-06-24 16:03:03, 1.3 KB) [[attachment:users_1.py]]
  • [get | view] (2013-06-24 16:44:44, 2.9 KB) [[attachment:workflows_1.py]]
 All files | Selected Files: delete move to page

You are not allowed to attach a file to this page.