From 8c45f4b6a63eb51807a5240d097da075f3065aba Mon Sep 17 00:00:00 2001 From: Joel Date: Sat, 14 Jul 2018 18:17:09 -0500 Subject: [PATCH] Implement Geo::StreetAddress::US's normalizer (#17) * add type parser back * create tests from perl * fix error caused by perl wrapper * add normalization map * readme update to reflect changes * use delete instead of setting undefined * fix puncuation regex * remove redunant type check. * comment out street type match again * add missing semicolon * update order of keys in test objects. * remove test console log * remove substitution flag * 1.0.0 --- README.md | 12 ++++----- address.js | 25 +++++++++++++++++- package-lock.json | 13 ++++++++++ package.json | 2 +- test.js | 64 +++++++++++++++++++++++------------------------ 5 files changed, 75 insertions(+), 41 deletions(-) create mode 100644 package-lock.json diff --git a/README.md b/README.md index 18a183c..f2f0191 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## US Street Address Parser [![Build Status](https://travis-ci.org/hassansin/parse-address.svg)](https://travis-ci.org/hassansin/parse-address) +# US Street Address Parser [![Build Status](https://travis-ci.org/hassansin/parse-address.svg)](https://travis-ci.org/hassansin/parse-address) This is Node.js port for Perl [Geo::StreetAddress::US](http://search.cpan.org/~timb/Geo-StreetAddress-US-1.04/US.pm) package @@ -6,7 +6,7 @@ This is Node.js port for Perl [Geo::StreetAddress::US](http://search.cpan.org/~t >Geo::StreetAddress::US is a regex-based street address and street intersection parser for the United States. Its basic goal is to be as forgiving as possible when parsing user-provided address strings. Geo::StreetAddress::US knows about directional prefixes and suffixes, fractional building numbers, building units, grid-based addresses (such as those used in parts of Utah), 5 and 9 digit ZIP codes, and all of the official USPS abbreviations for street types and state names... [more](http://search.cpan.org/~timb/Geo-StreetAddress-US-1.04/US.pm) -### Usage: +## Usage: ```javascript //from node: @@ -19,14 +19,12 @@ var parsed = parser.parseLocation('1005 N Gravenstein Highway Sebastopol CA 9547 var parsed = parseAddress.parseLocation('1005 N Gravenstein Highway Sebastopol CA 95472'); //Parsed address: -{ +{ number: '1005', prefix: 'N', street: 'Gravenstein', - type: 'Highway', + type: 'Hwy', city: 'Sebastopol', state: 'CA', zip: '95472' } - -``` - +``` \ No newline at end of file diff --git a/address.js b/address.js index 265d8ea..b7b82a6 100644 --- a/address.js +++ b/address.js @@ -459,6 +459,19 @@ var Direction_Code; var initialized = false; + var Normalize_Map = { + prefix: Directional, + prefix1: Directional, + prefix2: Directional, + suffix: Directional, + suffix1: Directional, + suffix2: Directional, + type: Street_Type, + type1: Street_Type, + type2: Street_Type, + state: State_Code, + } + function capitalize(s){ return s && s[0].toUpperCase() + s.slice(1); } @@ -632,7 +645,17 @@ return; var key = isFinite(k.split('_').pop())? k.split('_').slice(0,-1).join('_'): k ; if(parts[k]) - parsed[key] = parts[k].trim().replace(/[^\w\s\-\#\&]/,''); + parsed[key] = parts[k].trim().replace(/^\s+|\s+$|[^\w\s\-#&]/g, ''); + }); + each(Normalize_Map, function(map,key) { + if(parsed[key] && map[parsed[key].toLowerCase()]) { + parsed[key] = map[parsed[key].toLowerCase()]; + } + }); + + ['type', 'type1', 'type2'].forEach(function(key){ + if(key in parsed) + parsed[key] = parsed[key].charAt(0).toUpperCase() + parsed[key].slice(1).toLowerCase(); }); if(parsed.city){ diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..14ca802 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,13 @@ +{ + "name": "parse-address", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "xregexp": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-3.2.0.tgz", + "integrity": "sha1-yzYBmHv+JpW1hAAMGPHEqMMih44=" + } + } +} diff --git a/package.json b/package.json index e3fb4a3..2d1f1d6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "parse-address", - "version": "0.0.10", + "version": "1.0.0", "description": "US Street Address Parser", "main": "address.js", "scripts": { diff --git a/test.js b/test.js index 5406121..6c15cc5 100644 --- a/test.js +++ b/test.js @@ -24,15 +24,15 @@ var address = { '1005 Gravenstein Highway North, 95472': { number: '1005', street: 'Gravenstein', - type: 'Highway', - suffix: 'North', + type: 'Hwy', + suffix: 'N', zip: '95472' }, '1005 N Gravenstein Highway, Sebastopol, CA': { number: '1005', prefix: 'N', street: 'Gravenstein', - type: 'Highway', + type: 'Hwy', city: 'Sebastopol', state: 'CA' }, @@ -40,7 +40,7 @@ var address = { number: '1005', prefix: 'N', street: 'Gravenstein', - type: 'Highway', + type: 'Hwy', sec_unit_type: 'Suite', sec_unit_num: '500', city: 'Sebastopol', @@ -60,7 +60,7 @@ var address = { number: '1005', prefix: 'N', street: 'Gravenstein', - type: 'Highway', + type: 'Hwy', city: 'Sebastopol', state: 'CA', zip: '95472' @@ -69,7 +69,7 @@ var address = { number: '1005', prefix: 'N', street: 'Gravenstein', - type: 'Highway', + type: 'Hwy', city: 'Sebastopol', state: 'CA', zip: '95472' @@ -150,7 +150,7 @@ var address = { '1600 Pennsylvania Avenue Washington DC': { number: '1600', street: 'Pennsylvania', - type: 'Avenue', + type: 'Ave', city: 'Washington', state: 'DC' }, @@ -193,7 +193,7 @@ var address = { }, '100 S.E. Washington Ave, Minneapolis, MN': { number: '100', - prefix: 'SE.', + prefix: 'SE', street: 'Washington', type: 'Ave', city: 'Minneapolis', @@ -202,7 +202,7 @@ var address = { '3813 1/2 Some Road, Los Angeles, CA': { number: '3813', street: 'Some', - type: 'Road', + type: 'Rd', city: 'Los Angeles', state: 'CA' }, @@ -257,16 +257,16 @@ var address = { 'Mission & Valencia Streets San Francisco CA': { street1: 'Mission', street2: 'Valencia', - type2: 'Street', + type2: 'St', city: 'San Francisco', state: 'CA', - type1: 'Street' + type1: 'St' }, 'Mission Avenue and Valencia Street San Francisco CA': { street1: 'Mission', - type1: 'Avenue', + type1: 'Ave', street2: 'Valencia', - type2: 'Street', + type2: 'St', city: 'San Francisco', state: 'CA' }, @@ -281,7 +281,7 @@ var address = { number: '123', street: 'Maple', city: 'Rochester', - state: 'New York' + state: 'NY' }, '233 S Wacker Dr 60606-6306': { number: '233', @@ -299,21 +299,21 @@ var address = { zip: '60606', plus4: '6306' }, - "233 S Wacker Dr 60606 6306": { - number: "233", - prefix: "S", - street: "Wacker", - type: "Dr", - zip: "60606", - plus4: "6306" - }, - "S Wacker Dr 60606 6306": { - prefix: "S", - street: "Wacker", - type: "Dr", - zip: "60606", - plus4: "6306" - }, + '233 S Wacker Dr 60606 6306': { + number: '233', + prefix: 'S', + street: 'Wacker', + type: 'Dr', + zip: '60606', + plus4: '6306' + }, + 'S Wacker Dr 60606 6306': { + prefix: 'S', + street: 'Wacker', + type: 'Dr', + zip: '60606', + plus4: '6306' + }, '233 S Wacker Dr lobby 60606': { number: '233', prefix: 'S', @@ -344,7 +344,7 @@ var address = { sec_unit_num: '42', number: '99', street: 'Some', - type: 'Road', + type: 'Rd', city: 'Some City', state: 'LA' }, @@ -385,7 +385,7 @@ var address = { } }; -Object.keys(address).forEach(function(k) { +Object.keys(address).forEach(function (k) { var parsed = parser.parseLocation(k); assert.deepEqual(address[k], parsed); -}); +}); \ No newline at end of file