Skip to content

Commit 2562552

Browse files
committed
Merge branch 'release-0.1'
2 parents dfc736c + 3af1bd7 commit 2562552

7 files changed

Lines changed: 147 additions & 41 deletions

File tree

DESCRIPTION.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Baleen is a tool for ingesting formal natural language data from the discourse of professional and amateur writers: e.g. bloggers and news outlets. Rather than performing web scraping, Baleen focuses on data ingestion through the use of RSS feeds. It performs as much raw data collection as it can, saving data into a Mongo document store.
2+
3+
For more, please see the full documentation at: http://baleen-ingest.readthedocs.org/en/latest/

MANIFEST.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
include *.md
2+
include *.txt
3+
include *.yml
4+
include Makefile
5+
recursive-include docs *.md
6+
recursive-include docs *.jpg
7+
recursive-include tests *.py

baleen/__init__.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,10 @@
1818
## Imports
1919
##########################################################################
2020

21+
from .version import get_version
22+
2123
##########################################################################
22-
## Vesioning
24+
## Package Version
2325
##########################################################################
2426

25-
__version__ = (0,1,0)
26-
27-
def get_version():
28-
"""
29-
Returns the string containing the version number
30-
"""
31-
return "%i.%i.%i" % __version__
27+
__version__ = get_version()

baleen/version.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# baleen.version
2+
# Stores version information such that it can be read by setuptools.
3+
#
4+
# Author: Benjamin Bengfort <benjamin@bengfort.com>
5+
# Created: Thu Feb 18 20:14:16 2016 -0500
6+
#
7+
# Copyright (C) 2016 Bengfort.com
8+
# For license information, see LICENSE.txt
9+
#
10+
# ID: version.py [] benjamin@bengfort.com $
11+
12+
"""
13+
Stores version information such that it can be read by setuptools.
14+
"""
15+
16+
##########################################################################
17+
## Imports
18+
##########################################################################
19+
20+
__version_info__ = {
21+
'major': 0,
22+
'minor': 1,
23+
'micro': 0,
24+
'releaselevel': 'final',
25+
'serial': 0,
26+
}
27+
28+
29+
def get_version(short=False):
30+
"""
31+
Computes a string representation of the version from __version_info__.
32+
"""
33+
assert __version_info__['releaselevel'] in ('alpha', 'beta', 'final')
34+
vers = ["%(major)i.%(minor)i" % __version_info__, ]
35+
if __version_info__['micro']:
36+
vers.append(".%(micro)i" % __version_info__)
37+
if __version_info__['releaselevel'] != 'final' and not short:
38+
vers.append('%s%i' % (__version_info__['releaselevel'][0],
39+
__version_info__['serial']))
40+
return ''.join(vers)

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[metadata]
2+
description-file = README.md

setup.py

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,50 @@
11
#!/usr/bin/env python
22
# setup
3-
# Setup script for baleen
3+
# Setup script for installing baleen
44
#
55
# Author: Benjamin Bengfort <benjamin@bengfort.com>
66
# Created: Fri Sep 19 10:59:24 2014 -0400
77
#
8-
# Copyright (C) 2014 District Data Labs
8+
# Copyright (C) 2014 Bengfort.com
99
# For license information, see LICENSE.txt and NOTICE.md
1010
#
1111
# ID: setup.py [] benjamin@bengfort.com $
1212

1313
"""
14-
Setup script for baleen
14+
Setup script for installing baleen.
15+
See http://bbengfort.github.io/programmer/2016/01/20/packaging-with-pypi.html
1516
"""
1617

1718
##########################################################################
1819
## Imports
1920
##########################################################################
2021

21-
try:
22-
from setuptools import setup
23-
from setuptools import find_packages
24-
except ImportError:
25-
raise ImportError("Could not import \"setuptools\"."
26-
"Please install the setuptools package.")
22+
import os
23+
import re
24+
import codecs
25+
26+
from setuptools import setup
27+
from setuptools import find_packages
2728

2829
##########################################################################
2930
## Package Information
3031
##########################################################################
3132

32-
## Discover the packages
33-
packages = find_packages(where=".", exclude=("tests", "bin", "docs", "fixtures",))
33+
## Basic information
34+
NAME = "baleen"
35+
DESCRIPTION = "An automated ingestion service for blogs to construct a corpus for NLP research."
36+
AUTHOR = "Benjamin Bengfort"
37+
EMAIL = "benjamin@bengfort.com"
38+
LICENSE = "MIT"
39+
REPOSITORY = "https://github.com/bbengfort/baleen"
40+
PACKAGE = "baleen"
3441

35-
## Load the requirements
36-
requires = []
37-
with open('requirements.txt', 'r') as reqfile:
38-
for line in reqfile:
39-
requires.append(line.strip())
42+
## Define the keywords
43+
KEYWORDS = ('nlp', 'baleen', 'ingestion', 'blogs', 'rss')
4044

4145
## Define the classifiers
42-
classifiers = (
46+
## See https://pypi.python.org/pypi?%3Aaction=list_classifiers
47+
CLASSIFIERS = (
4348
'Development Status :: 4 - Beta',
4449
'Environment :: Console',
4550
'Intended Audience :: Developers',
@@ -53,24 +58,71 @@
5358
'Topic :: Utilities',
5459
)
5560

56-
## Define the keywords
57-
keywords = ('nlp', 'baleen', 'ingestion', 'blogs', 'rss')
61+
## Important Paths
62+
PROJECT = os.path.abspath(os.path.dirname(__file__))
63+
REQUIRE_PATH = "requirements.txt"
64+
VERSION_PATH = os.path.join(PACKAGE, "version.py")
65+
PKG_DESCRIBE = "DESCRIPTION.txt"
66+
67+
## Directories to ignore in find_packages
68+
EXCLUDES = (
69+
"tests", "bin", "docs", "fixtures", "register", "notebooks",
70+
)
71+
72+
##########################################################################
73+
## Helper Functions
74+
##########################################################################
5875

76+
def read(*parts):
77+
"""
78+
Assume UTF-8 encoding and return the contents of the file located at the
79+
absolute path from the REPOSITORY joined with *parts.
80+
"""
81+
with codecs.open(os.path.join(PROJECT, *parts), 'rb', 'utf-8') as f:
82+
return f.read()
83+
84+
85+
def get_version(path=VERSION_PATH):
86+
"""
87+
Reads the __init__.py defined in the VERSION_PATH to find the get_version
88+
function, and executes it to ensure that it is loaded correctly.
89+
"""
90+
namespace = {}
91+
exec(read(path), namespace)
92+
return namespace['get_version']()
93+
94+
95+
def get_requires(path=REQUIRE_PATH):
96+
"""
97+
Yields a generator of requirements as defined by the REQUIRE_PATH which
98+
should point to a requirements.txt output by `pip freeze`.
99+
"""
100+
for line in read(path).splitlines():
101+
line = line.strip()
102+
if line and not line.startswith('#'):
103+
yield line
104+
105+
##########################################################################
59106
## Define the configuration
107+
##########################################################################
108+
60109
config = {
61-
"name": "baleen",
62-
"version": "0.1.0",
63-
"description": "An automated ingestion service for blogs to construct a corpus for NLP research.",
64-
"license": "MIT",
65-
"author": "Benjamin Bengfort",
66-
"author_email": "benjamin@bengfort.com",
67-
"url": "https://github.com/bbengfort/baleen",
68-
"download_url": 'https://github.com/bbengfort/baleen/tarball/v0.1.0',
69-
"packages": packages,
70-
"install_requires": requires,
71-
"classifiers": classifiers,
72-
"keywords": keywords,
73-
"zip_safe": True,
110+
"name": NAME,
111+
"version": get_version(),
112+
"description": DESCRIPTION,
113+
"long_description": read(PKG_DESCRIBE),
114+
"license": LICENSE,
115+
"author": AUTHOR,
116+
"author_email": EMAIL,
117+
"maintainer": AUTHOR,
118+
"maintainer_email": EMAIL,
119+
"url": REPOSITORY,
120+
"download_url": "{}/tarball/v{}".format(REPOSITORY, get_version()),
121+
"packages": find_packages(where=PROJECT, exclude=EXCLUDES),
122+
"install_requires": list(get_requires()),
123+
"classifiers": CLASSIFIERS,
124+
"keywords": KEYWORDS,
125+
"zip_safe": False,
74126
"scripts": ['bin/baleen'],
75127
}
76128

tests/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919

2020
import unittest
2121

22+
##########################################################################
23+
## Module Constants
24+
##########################################################################
25+
26+
TEST_VERSION = "0.1" ## Also the expected version onf the package
27+
2228
##########################################################################
2329
## Test Cases
2430
##########################################################################
@@ -45,4 +51,4 @@ def test_version(self):
4551
Assert that the version is sane
4652
"""
4753
import baleen
48-
self.assertEqual("0.1.0", baleen.get_version())
54+
self.assertEqual(TEST_VERSION, baleen.__version__)

0 commit comments

Comments
 (0)