Skip to content

Commit

Permalink
Merge branch 'release-0.9-beta'
Browse files Browse the repository at this point in the history
  • Loading branch information
gjost committed Sep 11, 2014
2 parents 2156c8c + 75a345f commit bc9f8a7
Show file tree
Hide file tree
Showing 12 changed files with 762 additions and 267 deletions.
12 changes: 7 additions & 5 deletions ddr/DDR/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = 0.1
VERSION = '0.9-beta'
CONFIG_FILES = ['/etc/ddr/ddr.cfg', '/etc/ddr/local.cfg']

class NoConfigError(Exception):
Expand All @@ -24,11 +24,13 @@ def natural_sort( l ):

def natural_order_string( id ):
"""Convert a collection/entity ID into form that can be sorted naturally.
Assumes a valid format DDR ID or else, blammo.
"""
alnum = re.findall('\d+', id).pop()
return alnum
@param id: A valid format DDR ID
"""
alnum = re.findall('\d+', id)
if not alnum:
raise Exception('Valid DDR ID required.')
return alnum.pop()


class Timer( object ):
Expand Down
11 changes: 9 additions & 2 deletions ddr/DDR/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def entity_update(user_name, user_mail, collection_path, entity_uid, updated_fil

@command
@local_only
def entity_annex_add(user_name, user_mail, collection_path, entity_uid, updated_files, new_annex_files, agent=''):
def entity_annex_add(user_name, user_mail, collection_path, entity_uid, updated_files, new_annex_files, agent='', entity=None):
"""Command-line function for git annex add-ing a file and updating metadata.
All this function does is git annex add the file, update changelog and
Expand All @@ -748,17 +748,24 @@ def entity_annex_add(user_name, user_mail, collection_path, entity_uid, updated_
It does not mark the file as master/mezzanine/access/etc or edit any metadata.
It does not perform any background processing on the file.
TODO Refactor this when ddr-local models moved into ddr-cmdln
WARNING - UGLY HACK!
The 'entity' arg is intended to allow ddr-local to pass in Entity
objects and use their checksums() method.
@param user_name: Username for use in changelog, git log
@param user_mail: User email address for use in changelog, git log
@param collection_path: Absolute path to collection repo.
@param entity_uid: A valid DDR entity UID
@param updated_files: list of paths to updated files (relative to collection repo).
@param new_annex_files: List of paths to new files (relative to entity files dir).
@param agent: (optional) Name of software making the change.
@param entity: (optional) Entity object (see above)
@return: message ('ok' if successful)
"""
collection = DDRCollection(collection_path)
entity = DDREntity(collection.entity_path(entity_uid))
if not entity:
entity = DDREntity(collection.entity_path(entity_uid))

repo = dvcs.repository(collection.path, user_name, user_mail)
repo.git.checkout('master')
Expand Down
5 changes: 4 additions & 1 deletion ddr/DDR/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def relative_path(prefix_path, payload_file):
self._config.remove_section('Files')
self._config.add_section('Files')
for md5,path in entity.checksums('md5'):
size = os.path.getsize(path)
try:
size = os.path.getsize(path)
except:
size = 'UNKNOWNSIZE'
path = relative_path(entity.files_path, path)
self._config.set('Files', md5, '{} ; {}'.format(size,path))
121 changes: 84 additions & 37 deletions ddr/DDR/docstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,25 @@
INDEX = 'documents0'
PATH = '/var/www/media/base'
import os
from DDR import models
from DDR import docstore
docstore.delete_index(HOSTS, INDEX)
docstore.create_index(HOSTS, INDEX)
docstore.put_mappings(HOSTS, INDEX, docstore.HARD_CODED_MAPPINGS_PATH, models.MODELS_DIR)
docstore.put_facets(HOSTS, INDEX, docstore.HARD_CODED_FACETS_PATH)
docstore.put_mappings(HOSTS, INDEX, docstore.MAPPINGS_PATH, models.MODELS_DIR)
docstore.put_facets(HOSTS, INDEX, docstore.FACETS_PATH)
docstore.delete(HOSTS, INDEX, os.path.basename(PATH), recursive=True)
docstore.index(HOSTS, INDEX, PATH, recursive=True, public=True )
------------------------------------------------------------------------
"""
from __future__ import print_function
import ConfigParser
from datetime import datetime
import json
import logging
Expand All @@ -35,15 +39,21 @@

from elasticsearch import Elasticsearch

from DDR import CONFIG_FILES, NoConfigError
from DDR import natural_sort
from DDR import models

config = ConfigParser.ConfigParser()
configs_read = config.read(CONFIG_FILES)
if not configs_read:
raise NoConfigError('No config file!')

MAPPINGS_PATH = config.get('cmdln','vocab_mappings_path')
FACETS_PATH = config.get('cmdln','vocab_facets_path')

MAX_SIZE = 1000000
DEFAULT_PAGE_SIZE = 20

HARD_CODED_MAPPINGS_PATH = '/usr/local/src/ddr-cmdln/ddr/DDR/mappings.json'
HARD_CODED_FACETS_PATH = '/usr/local/src/ddr-cmdln/ddr/DDR/facets'

SUCCESS_STATUSES = [200, 201]
STATUS_OK = ['completed']
PUBLIC_OK = [1,'1']
Expand Down Expand Up @@ -132,7 +142,7 @@ def _parse_cataliases( cataliases ):
indices_aliases.append( (i,a) )
return indices_aliases

def set_alias( hosts, alias, index ):
def set_alias( hosts, alias, index, remove=False ):
"""Point alias at specified index; create index if doesn't exist.
IMPORTANT: There is only ever ONE index at a time. All existing
Expand All @@ -141,6 +151,7 @@ def set_alias( hosts, alias, index ):
@param hosts: list of dicts containing host information.
@param alias: Name of the alias
@param index: Name of the alias' target index.
@param remove: boolean
"""
alias = make_index_name(alias)
index = make_index_name(index)
Expand All @@ -150,8 +161,9 @@ def set_alias( hosts, alias, index ):
# delete existing aliases
for i,a in _parse_cataliases(es.cat.aliases(h=['index','alias'])):
es.indices.delete_alias(index=i, name=a)
# set the alias
es.indices.put_alias(index=index, name=alias, body='')
if not remove:
# set the alias
es.indices.put_alias(index=index, name=alias, body='')

def target_index( hosts, alias ):
"""Get the name of the index to which the alias points
Expand Down Expand Up @@ -187,7 +199,6 @@ def create_index( hosts, index ):
status = es.indices.create(index=index, body=body)
return status


def delete_index( hosts, index ):
"""Delete the specified index.
Expand Down Expand Up @@ -247,21 +258,22 @@ def _make_mappings( mappings_path, index, models_dir ):
for mapping in mappings['documents']:
model = mapping.keys()[0]
json_path = os.path.join(models_dir, '%s.json' % model)
with open(json_path, 'r') as f:
data = json.loads(f.read())
for field in data:
fname = field['name']
mapping[model]['properties'][fname] = field['elasticsearch']['properties']
# mappings for parent_id, etc
if model == 'collection':
mapping[model]['properties']['parent_id'] = ID_PROPERTIES
elif model == 'entity':
mapping[model]['properties']['parent_id'] = ID_PROPERTIES
mapping[model]['properties']['collection_id'] = ID_PROPERTIES
elif model == 'file':
mapping[model]['properties']['parent_id'] = ID_PROPERTIES
mapping[model]['properties']['collection_id'] = ID_PROPERTIES
mapping[model]['properties']['entity_id'] = ID_PROPERTIES
if os.path.exists(json_path):
with open(json_path, 'r') as f:
data = json.loads(f.read())
for field in data:
fname = field['name']
mapping[model]['properties'][fname] = field['elasticsearch']['properties']
# mappings for parent_id, etc
if model == 'collection':
mapping[model]['properties']['parent_id'] = ID_PROPERTIES
elif model == 'entity':
mapping[model]['properties']['parent_id'] = ID_PROPERTIES
mapping[model]['properties']['collection_id'] = ID_PROPERTIES
elif model == 'file':
mapping[model]['properties']['parent_id'] = ID_PROPERTIES
mapping[model]['properties']['collection_id'] = ID_PROPERTIES
mapping[model]['properties']['entity_id'] = ID_PROPERTIES
return mappings
elif 'meta' in index:
return mappings['meta']
Expand Down Expand Up @@ -293,8 +305,7 @@ def put_mappings( hosts, index, mappings_path, models_dir ):
statuses.append( {'model':model, 'status':status} )
return statuses


def put_facets( hosts, index, path=HARD_CODED_FACETS_PATH ):
def put_facets( hosts, index, path=FACETS_PATH ):
"""PUTs facets from file into ES.
curl -XPUT 'http://localhost:9200/meta/facet/format' -d '{ ... }'
Expand All @@ -308,7 +319,7 @@ def put_facets( hosts, index, path=HARD_CODED_FACETS_PATH ):
logger.debug('index_facets(%s, %s, %s)' % (hosts, index, path))
statuses = []
es = _get_connection(hosts)
for facet_json in os.listdir(HARD_CODED_FACETS_PATH):
for facet_json in os.listdir(FACETS_PATH):
facet = facet_json.split('.')[0]
srcpath = os.path.join(path, facet_json)
with open(srcpath, 'r') as f:
Expand All @@ -317,7 +328,7 @@ def put_facets( hosts, index, path=HARD_CODED_FACETS_PATH ):
statuses.append(status)
return statuses

def list_facets( path=HARD_CODED_FACETS_PATH ):
def list_facets( path=FACETS_PATH ):
return [filename.replace('.json', '') for filename in os.listdir(path)]

def facet_terms( hosts, index, facet, order='term', all_terms=True, model=None ):
Expand Down Expand Up @@ -380,6 +391,41 @@ def facet_terms( hosts, index, facet, order='term', all_terms=True, model=None )
results = es.search(index=index, doc_type=model, body=payload)
return results['facets']['results']

def repo( hosts, index, path ):
"""Add or update base repository metadata.
"""
# get and validate file
with open(path, 'r') as f:
body = f.read()
data = json.loads(body)
if (not (data.get('id') and data.get('repo'))) or (data.get('org')):
raise Exception('Data file is not well-formed.')
document_id = data['id']
# add/update
doctype = 'repo'
es = _get_connection(hosts)
results = es.index(index=index, doc_type=doctype, id=document_id, body=data)
return results

def org( hosts, index, path, remove=False):
"""Add/update or remove organization metadata.
"""
# get and validate file
with open(path, 'r') as f:
body = f.read()
data = json.loads(body)
if (not (data.get('id') and data.get('repo') and data.get('org'))):
raise Exception('Data file is not well-formed.')
document_id = data['id']
# add/update/remove
doctype = 'organization'
es = _get_connection(hosts)
if remove and exists(hosts, index, doctype, document_id):
results = es.delete(index=index, doc_type=doctype, id=document_id)
else:
results = es.index(index=index, doc_type=doctype, id=document_id, body=data)
return results


# post -----------------------------------------------------------------

Expand Down Expand Up @@ -927,22 +973,23 @@ def search( hosts, index, model='', query='', term={}, filters={}, sort=[], fiel
)
return results


def delete( hosts, index, model, document_id, recursive=False ):
"""
def delete( hosts, index, document_id, recursive=False ):
"""Delete a document and optionally its children.
@param hosts: list of dicts containing host information.
@param index:
@param model:
@param document_id:
@param recursive: True or False
"""
model = models.split_object_id(document_id)[0]
es = _get_connection(hosts)
if recursive:
fieldname = None
if model == 'collection': fieldname = 'collection_id'
elif model == 'entity': fieldname = 'entity_id'
assert False
if model == 'collection': doc_type = 'collection,entity,file'
elif model == 'entity': doc_type = 'entity,file'
elif model == 'file': doc_type = 'file'
query = 'id:"%s"' % document_id
return es.delete_by_query(index=index, doc_type=doc_type, q=query)
else:
es = _get_connection(hosts)
return es.delete(index=index, doc_type=model, id=document_id)


Expand Down
33 changes: 16 additions & 17 deletions ddr/DDR/dvcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,11 @@ def annex_whereis_file(repo, file_path_rel):
print('----------')
return _parse_annex_whereis(stdout)

def _gitolite_info_authorized( status, lines ):
if status == 0 and lines:
if len(lines) and ('this is git' in lines[0]) and ('running gitolite' in lines[0]):
logging.debug(' OK ')
return True
def _gitolite_info_authorized( gitolite_out ):
lines = gitolite_out.split('\n')
if lines and len(lines) and ('this is git' in lines[0]) and ('running gitolite' in lines[0]):
logging.debug(' OK ')
return True
logging.debug(' NO CONNECTION')
return False

Expand Down Expand Up @@ -309,38 +309,37 @@ def gitolite_connect_ok(server):
@return: True or False
"""
logging.debug(' DDR.commands.gitolite_connect_ok()')
status,lines = gitolite_info(server)
return _gitolite_info_authorized(status, lines)
return _gitolite_info_authorized(gitolite_info(server))

def gitolite_orgs( gitoliteinfo ):
def gitolite_orgs( gitolite_out ):
"""Returns list of orgs to which user has access
@param gitoliteinfo: lines part of gitolite_info() output
@param gitolite_out: raw output of gitolite_info()
@returns: list of organization IDs
"""
repos_orgs = []
for line in gitoliteinfo:
for line in gitolite_out.split('\n'):
if 'R W C' in line:
parts = line.replace('R W C', '').strip().split('-')
repo_org = '-'.join([parts[0], parts[1]])
if repo_org not in repos_orgs:
repos_orgs.append(repo_org)
return repos_orgs

def gitolite_info(server):
def gitolite_info(server, timeout=60):
"""
@param server: USERNAME@DOMAIN
@return: status,lines
@param timeout: int Maximum seconds to wait for reponse
@return: raw Gitolite output from SSH
"""
status = None; lines = []
cmd = 'ssh {} info'.format(server)
logging.debug(' {}'.format(cmd))
r = envoy.run(cmd, timeout=30)
r = envoy.run(cmd, timeout=int(timeout))
logging.debug(' {}'.format(r.status_code))
status = r.status_code
if r.status_code == 0:
lines = r.std_out.split('\n')
return status,lines
if r.status_code != 0:
raise Exception('Bad reply from Gitolite server: %s' % r.std_err)
return r.std_out

def _parse_list_staged( diff ):
staged = []
Expand Down
Loading

0 comments on commit bc9f8a7

Please sign in to comment.