Skip to content

Commit

Permalink
Implement Geo::StreetAddress::US's normalizer (#17)
Browse files Browse the repository at this point in the history
* add type parser back

* create tests from perl

* fix error caused by perl wrapper

* add normalization map

* readme update to reflect changes

* use delete instead of setting undefined

* fix puncuation regex

* remove redunant type check.

* comment out street type match again

* add missing semicolon

* update order of keys in test objects.

* remove test console log

* remove substitution flag

* 1.0.0
  • Loading branch information
jgimbel authored and hassansin committed Jul 14, 2018
1 parent 1a84376 commit 8c45f4b
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 41 deletions.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
## US Street Address Parser [![Build Status](https://travis-ci.org/hassansin/parse-address.svg)](https://travis-ci.org/hassansin/parse-address)
# US Street Address Parser [![Build Status](https://travis-ci.org/hassansin/parse-address.svg)](https://travis-ci.org/hassansin/parse-address)

This is Node.js port for Perl [Geo::StreetAddress::US](http://search.cpan.org/~timb/Geo-StreetAddress-US-1.04/US.pm) package

*Description from Geo::StreetAddress::US*:

>Geo::StreetAddress::US is a regex-based street address and street intersection parser for the United States. Its basic goal is to be as forgiving as possible when parsing user-provided address strings. Geo::StreetAddress::US knows about directional prefixes and suffixes, fractional building numbers, building units, grid-based addresses (such as those used in parts of Utah), 5 and 9 digit ZIP codes, and all of the official USPS abbreviations for street types and state names... [more](http://search.cpan.org/~timb/Geo-StreetAddress-US-1.04/US.pm)
### Usage:
## Usage:

```javascript
//from node:
Expand All @@ -19,14 +19,12 @@ var parsed = parser.parseLocation('1005 N Gravenstein Highway Sebastopol CA 9547
var parsed = parseAddress.parseLocation('1005 N Gravenstein Highway Sebastopol CA 95472');

//Parsed address:
{
{
number: '1005',
prefix: 'N',
street: 'Gravenstein',
type: 'Highway',
type: 'Hwy',
city: 'Sebastopol',
state: 'CA',
zip: '95472' }

```

```
25 changes: 24 additions & 1 deletion address.js
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,19 @@
var Direction_Code;
var initialized = false;

var Normalize_Map = {
prefix: Directional,
prefix1: Directional,
prefix2: Directional,
suffix: Directional,
suffix1: Directional,
suffix2: Directional,
type: Street_Type,
type1: Street_Type,
type2: Street_Type,
state: State_Code,
}

function capitalize(s){
return s && s[0].toUpperCase() + s.slice(1);
}
Expand Down Expand Up @@ -632,7 +645,17 @@
return;
var key = isFinite(k.split('_').pop())? k.split('_').slice(0,-1).join('_'): k ;
if(parts[k])
parsed[key] = parts[k].trim().replace(/[^\w\s\-\#\&]/,'');
parsed[key] = parts[k].trim().replace(/^\s+|\s+$|[^\w\s\-#&]/g, '');
});
each(Normalize_Map, function(map,key) {
if(parsed[key] && map[parsed[key].toLowerCase()]) {
parsed[key] = map[parsed[key].toLowerCase()];
}
});

['type', 'type1', 'type2'].forEach(function(key){
if(key in parsed)
parsed[key] = parsed[key].charAt(0).toUpperCase() + parsed[key].slice(1).toLowerCase();
});

if(parsed.city){
Expand Down
13 changes: 13 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "parse-address",
"version": "0.0.10",
"version": "1.0.0",
"description": "US Street Address Parser",
"main": "address.js",
"scripts": {
Expand Down
64 changes: 32 additions & 32 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,23 @@ var address = {
'1005 Gravenstein Highway North, 95472': {
number: '1005',
street: 'Gravenstein',
type: 'Highway',
suffix: 'North',
type: 'Hwy',
suffix: 'N',
zip: '95472'
},
'1005 N Gravenstein Highway, Sebastopol, CA': {
number: '1005',
prefix: 'N',
street: 'Gravenstein',
type: 'Highway',
type: 'Hwy',
city: 'Sebastopol',
state: 'CA'
},
'1005 N Gravenstein Highway, Suite 500, Sebastopol, CA': {
number: '1005',
prefix: 'N',
street: 'Gravenstein',
type: 'Highway',
type: 'Hwy',
sec_unit_type: 'Suite',
sec_unit_num: '500',
city: 'Sebastopol',
Expand All @@ -60,7 +60,7 @@ var address = {
number: '1005',
prefix: 'N',
street: 'Gravenstein',
type: 'Highway',
type: 'Hwy',
city: 'Sebastopol',
state: 'CA',
zip: '95472'
Expand All @@ -69,7 +69,7 @@ var address = {
number: '1005',
prefix: 'N',
street: 'Gravenstein',
type: 'Highway',
type: 'Hwy',
city: 'Sebastopol',
state: 'CA',
zip: '95472'
Expand Down Expand Up @@ -150,7 +150,7 @@ var address = {
'1600 Pennsylvania Avenue Washington DC': {
number: '1600',
street: 'Pennsylvania',
type: 'Avenue',
type: 'Ave',
city: 'Washington',
state: 'DC'
},
Expand Down Expand Up @@ -193,7 +193,7 @@ var address = {
},
'100 S.E. Washington Ave, Minneapolis, MN': {
number: '100',
prefix: 'SE.',
prefix: 'SE',
street: 'Washington',
type: 'Ave',
city: 'Minneapolis',
Expand All @@ -202,7 +202,7 @@ var address = {
'3813 1/2 Some Road, Los Angeles, CA': {
number: '3813',
street: 'Some',
type: 'Road',
type: 'Rd',
city: 'Los Angeles',
state: 'CA'
},
Expand Down Expand Up @@ -257,16 +257,16 @@ var address = {
'Mission & Valencia Streets San Francisco CA': {
street1: 'Mission',
street2: 'Valencia',
type2: 'Street',
type2: 'St',
city: 'San Francisco',
state: 'CA',
type1: 'Street'
type1: 'St'
},
'Mission Avenue and Valencia Street San Francisco CA': {
street1: 'Mission',
type1: 'Avenue',
type1: 'Ave',
street2: 'Valencia',
type2: 'Street',
type2: 'St',
city: 'San Francisco',
state: 'CA'
},
Expand All @@ -281,7 +281,7 @@ var address = {
number: '123',
street: 'Maple',
city: 'Rochester',
state: 'New York'
state: 'NY'
},
'233 S Wacker Dr 60606-6306': {
number: '233',
Expand All @@ -299,21 +299,21 @@ var address = {
zip: '60606',
plus4: '6306'
},
"233 S Wacker Dr 60606 6306": {
number: "233",
prefix: "S",
street: "Wacker",
type: "Dr",
zip: "60606",
plus4: "6306"
},
"S Wacker Dr 60606 6306": {
prefix: "S",
street: "Wacker",
type: "Dr",
zip: "60606",
plus4: "6306"
},
'233 S Wacker Dr 60606 6306': {
number: '233',
prefix: 'S',
street: 'Wacker',
type: 'Dr',
zip: '60606',
plus4: '6306'
},
'S Wacker Dr 60606 6306': {
prefix: 'S',
street: 'Wacker',
type: 'Dr',
zip: '60606',
plus4: '6306'
},
'233 S Wacker Dr lobby 60606': {
number: '233',
prefix: 'S',
Expand Down Expand Up @@ -344,7 +344,7 @@ var address = {
sec_unit_num: '42',
number: '99',
street: 'Some',
type: 'Road',
type: 'Rd',
city: 'Some City',
state: 'LA'
},
Expand Down Expand Up @@ -385,7 +385,7 @@ var address = {
}
};

Object.keys(address).forEach(function(k) {
Object.keys(address).forEach(function (k) {
var parsed = parser.parseLocation(k);
assert.deepEqual(address[k], parsed);
});
});

0 comments on commit 8c45f4b

Please sign in to comment.