StarCluster - Mailing List Archive

Re: [Starcluster] error when starting cluster

From: Damian Eads <no email>
Date: Tue, 20 Apr 2010 14:44:36 -0700

Hi Justin,

It worked, thanks very much for the prompt fix. Before I received your
e-mail, I killed 6 of my 8 octcore instances to save money. Tell me if
you think this will work.

   1. Through the AWS web console, detach currently used volumes.
   2. Manually reboot the instances currently running.
   3. Manually launch additional spot instances in the same
availability group as the ones currently running.
   4. Rerun starcluster start -x mycluster dtest

Being able to restart the cluster without first terminating the
instances and then relaunching them will save money. Do you think this
will work? I don't mind doing it manually.

Thanks a lot in advance!

Damian


On Tue, Apr 20, 2010 at 2:16 PM, Justin Riley <jtriley_at_mit.edu> wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Hi Damian,
>
> I believe I've fixed this in github. Could you pull and give it another
> shot?
>
> Also, I've added support for master/node001/etc aliases to the sshnode
> action. So, you should now be able to:
>
> $ starcluster sshnode mycluster master
> $ starcluster sshnode mycluster node001
> etc
>
> Please let me know if the latest github code fixes your problem below
> and if you have any other issues.
>
> Thanks,
>
> ~Justin
>
> On 04/20/2010 04:38 PM, Damian Eads wrote:
>> Hi Justin,
>>
>> I just did a git pull and got the following error when I tried
>> creating my cluster. Ideas?
>>
>> Thanks,
>>
>> Damian
>>
>> eads_at_street:~/work/repo/StarCluster$ starcluster start -x mycluster dtest
>> /tmp/qqq/lib/python2.6/site-packages/pycrypto-2.0.1-py2.6-linux-x86_64.egg/Crypto/Hash/SHA.py:6:
>> DeprecationWarning: the sha module is deprecated; use the hashlib
>> module instead
>> /tmp/qqq/lib/python2.6/site-packages/pycrypto-2.0.1-py2.6-linux-x86_64.egg/Crypto/Hash/MD5.py:6:
>> DeprecationWarning: the md5 module is deprecated; use hashlib instead
>> /var/lib/python-support/python2.6/IPython/Magic.py:38:
>> DeprecationWarning: the sets module is deprecated
>>   from sets import Set
>> StarCluster - (http://web.mit.edu/starcluster)
>> Software Tools for Academics and Researchers (STAR)
>> Please submit bug reports to starcluster_at_mit.edu
>>
>>>>> Validating cluster settings...
>>>>> Cluster settings are valid
>>>>> Starting cluster...
>>>>> Waiting for cluster to start...
>>>>> The master node is ec2-174-129-172-124.compute-1.amazonaws.com
>>>>> Attaching volume vol-c5e85dac to master node...
>>>>> Setting up the cluster...
>>>>> Mounting EBS volume vol-c5e85dac on /data...
>> ssh.py:66 - WARNING - specified key does not end in either rsa or dsa,
>> trying both
>>>>> Using private key /home/eads/deadskey.pem (rsa)
>> ERROR: An unexpected error occurred while tokenizing input
>> The following traceback may be corrupted or invalid
>> The error message is: ('EOF in multi-line statement', (405, 0))
>>
>> ---------------------------------------------------------------------------
>> TypeError                                 Traceback (most recent call last)
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/EGG-INFO/scripts/starcluster
>> in <module>()
>>       3 __requires__ = 'StarCluster==0.9999'
>>       4 import pkg_resources
>> ----> 5 pkg_resources.run_script('StarCluster==0.9999', 'starcluster')
>>       6
>>       7
>>
>> /usr/lib/python2.6/dist-packages/pkg_resources.pyc in run_script(self,
>> requires, script_name)
>>     446         ns.clear()
>>     447         ns['__name__'] = name
>> --> 448         self.require(requires)[0].run_script(script_name, ns)
>>     449
>>     450
>>
>> /usr/lib/python2.6/dist-packages/pkg_resources.pyc in run_script(self,
>> script_name, namespace)
>>    1171             )
>>    1172             script_code = compile(script_text,script_filename,'exec')
>> -> 1173             exec script_code in namespace, namespace
>>    1174
>>    1175     def _has(self, path):
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/EGG-INFO/scripts/starcluster
>> in <module>()
>>       4
>>       5
>> ----> 6
>>       7
>>       8
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/starcluster/cli.pyc
>> in main()
>>     850         sys.exit(0)
>>     851     try:
>> --> 852         sc.execute(args)
>>     853     except exception.BaseException,e:
>>     854         log.error(e.msg)
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/starcluster/cli.pyc
>> in execute(self, args)
>>     169             log.info('Cluster settings are valid')
>>     170             if not self.opts.validate_only:
>> --> 171                 scluster.start(create=not self.opts.no_create)
>>     172                 if self.opts.login_master:
>>     173                     cluster.ssh_to_master(tag, self.cfg)
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/starcluster/utils.pyc
>> in wrapper(*arg, **kargs)
>>      23         """Raw timing function """
>>      24         time1 = time.time()
>> ---> 25         res = func(*arg, **kargs)
>>      26         time2 = time.time()
>>      27         log.info('%s took %0.3f mins' % (func.func_name,
>> (time2-time1)/60.0))
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/starcluster/cluster.pyc
>> in start(self, create)
>>     476             self.nodes, self.master_node,
>>     477             self.cluster_user, self.cluster_shell,
>> --> 478             self.volumes
>>     479         )
>>     480         self.create_receipt()
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/starcluster/clustersetup.pyc
>> in run(self, nodes, master, user, user_shell, volumes)
>>     312         self._volumes = volumes
>>     313         self._setup_ebs_volume()
>> --> 314         self._setup_cluster_user()
>>     315         self._setup_scratch()
>>     316         self._setup_etc_hosts()
>>
>> /tmp/qqq/lib/python2.6/site-packages/StarCluster-0.9999-py2.6.egg/starcluster/clustersetup.pyc
>> in _setup_cluster_user(self)
>>      67             max_uid = max(uid_db.keys())
>>      68             max_gid = uid_db[max_uid][1]
>> ---> 69             uid, gid = max_uid+1, max_gid+1
>>      70
>>      71         log.debug("Cluster user gid/uid: (%d, %d)" % (uid,gid))
>>
>> TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
>> eads_at_street:~/work/repo/StarCluster$
>> _______________________________________________
>> Starcluster mailing list
>> Starcluster_at_mit.edu
>> http://mailman.mit.edu/mailman/listinfo/starcluster
>
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v2.0.14 (GNU/Linux)
> Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
>
> iEYEARECAAYFAkvOGZgACgkQ4llAkMfDcrlA0wCfREUw39vSLczIMJgss1Te129m
> z2YAn3hcAEmcDInJH8Mfmaa+tZyQa3oe
> =f/Kl
> -----END PGP SIGNATURE-----
>



-- 
-----------------------------------------------------
Damian Eads                           Ph.D. Candidate
University of California             Computer Science
1156 High Street         Machine Learning Lab, E2-489
Santa Cruz, CA 95064    http://www.soe.ucsc.edu/~eads
Received on Tue Apr 20 2010 - 17:44:37 EDT
This archive was generated by hypermail 2.3.0.

Search:

Sort all by:

Date

Month

Thread

Author

Subject