''Chopping'' up a ntext field

  • I need to cycle through a table and update address1, address2, address3 and address4 fields with the contents of a multiline field (ntext) named address.  How can I use the carriage return as the separator to determine the different lines of the address field?

    TIA

    James Knight

  • Hi...

    in VB/VBA you can use the split function. You´ll get back an Array:

    Dim vTmp as Variant

    vTmp = Split(RS.Fields("address"), vbCrLf)

    RS.Fields("address1) = vTmp(0)

    RS.Fields("address2) = vTmp(1)

    RS.Fields("address3) = vTmp(2)

    RS.Fields("address4) = vTmp(3)

    RS.Update

    May this help you.

    Greetings from Germany

    Roland

  • James,

    Lots of ways to do this in T-SQL. Two quick-and-dirty ways:

    METHOD 1:

    DROP TABLE textaddr

    DROP TABLE addresses

    GO

    CREATE TABLE textaddr

    (

      id int PRIMARY KEY IDENTITY(1,1),

      addr ntext null

    )

    CREATE TABLE addresses

    (

      id int PRIMARY KEY,

      address1 varchar(50),

      address2 varchar(50),

      address3 varchar(50),

      address4 varchar(50)

    )

    SET NOCOUNT ON

    INSERT textaddr (addr) VALUES ( 'Line 1a' + Char(13) + 'Line 2a' + Char(13) + 'Line 3a' + Char(13) + 'Line 4a' )

    INSERT textaddr (addr) VALUES ( 'Line 1b' + Char(13) + 'Line 2b' + Char(13) + 'Line 3b' + Char(13) + 'Line 4b' )

    INSERT textaddr (addr) VALUES ( 'Line 1c' + Char(13) + 'Line 2c' + Char(13) + 'Line 3c' + Char(13) + 'Line 4c' )

    INSERT textaddr (addr) VALUES ( 'Line 1d' + Char(13) + 'Line 2d' + Char(13) + 'Line 3d' + Char(13) + 'Line 4d' )

    -- In this example, I'll assume that you already

    -- got rows in the table, and that you want to update them.

    INSERT addresses (id) SELECT id FROM textaddr ORDER BY id

    -- Get everything into the table, address1 ok, rest in address2

    UPDATE addresses

       SET address1 = Substring(t.addr, 1, CharIndex(Char(13), t.addr, 1) - 1),

           address2 = Substring(t.addr, CharIndex(Char(13), t.addr, 1) + 1, 4000) + Char(13)+ Char(13)+ Char(13)

      FROM addresses a JOIN textaddr t ON a.id = t.id

    -- Split address2, store first line in address2, rest in addr3

    UPDATE addresses

       SET address3 = Substring(address2, CharIndex(Char(13), address2, 1) + 1, 4000),

           address2 = Substring(address2, 1, CharIndex(Char(13), address2, 1) - 1)

    -- Split address3, store first line in address3, rest in addr4

    UPDATE addresses

       SET address4 = Substring(address3, CharIndex(Char(13), address3, 1) + 1, 4000),

           address3 = Substring(address3, 1, CharIndex(Char(13), address3, 1) - 1)

    -- Split address4, store first line in address4, discard the rest

    UPDATE addresses

       SET address4 = Substring(address4, 1, CharIndex(Char(13), address4, 1) - 1)

    SELECT * FROM addresses

    SET NOCOUNT OFF

    METHOD 2:

    -- UDF required by method 2.

    -- I use dbo.fGetToken all the time. I modified it for use here with

    -- the nvarchar type, thus the N at the end of the function name.

    CREATE FUNCTION dbo.fGetTokenN

    (

      @parm nvarchar(4000),

      @delim nvarchar(100),

      @whichOccur smallint

    )

    RETURNS varchar(8000)

    AS

    BEGIN

    DECLARE @occur int, @spos int, @startPos int, @maxlen int

    DECLARE @token nvarchar(4000)

    DECLARE @delimLen int

    -- Len() function ignores trailing blanks, so if our delimiter is a space,

    -- or multiple spaces, Len() will returns zero.

    IF Len(@delim) = 0 AND DataLength(@delim) > 0

      SET @delimLen = DataLength(@delim) / 2  -- divide by 2 if using nvarchar

    ELSE

      SET @delimLen = Len(@delim)

    SET @occur = 0

    SET @startPos = 1

    SET @spos = 1 - @delimLen

    SET @maxLen = Len(@parm)

    WHILE @occur < @whichOccur AND @parm <> N'' AND @parm IS NOT NULL AND @startPos < @maxLen

    BEGIN

      SET @occur = @occur + 1

      SET @startPos = @spos + @delimLen

      SET @spos = CHARINDEX( @delim , @parm, @startPos )

      IF @spos = 0

        BREAK

    END

    IF @occur <> @whichOccur

      SET @token = N'' -- or NULL

    ELSE

      IF @spos = 0

        SET @token = Substring(@parm, @startPos, 4000)

      ELSE

        SET @token = SubString( @parm, @startPos, @spos - @startPos)

    RETURN @token

    END

    GO

    -- This is example 2

    DROP TABLE textaddr

    DROP TABLE addresses

    GO

    CREATE TABLE textaddr

    (

      id int PRIMARY KEY IDENTITY(1,1),

      addr ntext null

    )

    CREATE TABLE addresses

    (

      id int PRIMARY KEY,

      address1 varchar(50),

      address2 varchar(50),

      address3 varchar(50),

      address4 varchar(50)

    )

    SET NOCOUNT ON

    INSERT textaddr (addr) VALUES ( 'Line 1a' + Char(13) + 'Line 2a' + Char(13) + 'Line 3a' + Char(13) + 'Line 4a' )

    INSERT textaddr (addr) VALUES ( 'Line 1b' + Char(13) + 'Line 2b' + Char(13) + 'Line 3b' + Char(13) + 'Line 4b' )

    INSERT textaddr (addr) VALUES ( 'Line 1c' + Char(13) + 'Line 2c' + Char(13) + 'Line 3c' + Char(13) + 'Line 4c' )

    INSERT textaddr (addr) VALUES ( 'Line 1d' + Char(13) + 'Line 2d' + Char(13) + 'Line 3d' + Char(13) + 'Line 4d' )

    -- assume that you are updating existing records

    INSERT addresses (id) SELECT id FROM textaddr ORDER BY id

    SET NOCOUNT OFF

    UPDATE addresses

       SET address1 = dbo.fGetTokenN(t.addr, Char(13), 1) ,

           address2 = dbo.fGetTokenN(t.addr, Char(13), 2) ,

           address3 = dbo.fGetTokenN(t.addr, Char(13), 3) ,

           address4 = dbo.fGetTokenN(t.addr, Char(13), 4)

      FROM addresses a JOIN textaddr t ON a.id = t.id

    SELECT * FROM addresses ORDER BY id

     

  • Thanks for that! I have a slight problem though.....There are some addresses that have a null value in the address field (in smaller towns, a company might be so big is doesn't even have a road name applied to it) and some which only have a single line (no char(13)).  These seem to cause the script to fail.

    Thanks,

     

    James Knight

  • James,

    The update method has a problem with single line addresses. Nulls aren't a problem in my testing. However, the UDF method is cleaner looking, and works for all situations. I was just trying to provide a couple of different methods in case performance was an issue. Also, the UDF returns an empty string when the requested value doesn't exists (i.e. 3 lines, and you request line 4). Just change the function to return NULL instead of N'' if that's what you want.

    IF @occur <> @whichOccur

      SET @token = NULL

    ELSE

      IF @spos = 0 ....

    Here's the code that I tested (just added more INSERTS to test)

    DROP TABLE textaddr

    DROP TABLE addresses

    GO

    CREATE TABLE textaddr

    (

      id int PRIMARY KEY IDENTITY(1,1),

      addr ntext null

    )

    CREATE TABLE addresses

    (

      id int PRIMARY KEY,

      address1 varchar(50),

      address2 varchar(50),

      address3 varchar(50),

      address4 varchar(50)

    )

    SET NOCOUNT ON

    INSERT textaddr (addr) VALUES ( 'Line 1a' + Char(13) + 'Line 2a' + Char(13) + 'Line 3a' + Char(13) + 'Line 4a' )

    INSERT textaddr (addr) VALUES ( 'Line 1b' + Char(13) + 'Line 2b' + Char(13) + 'Line 3b' + Char(13) + 'Line 4b' )

    INSERT textaddr (addr) VALUES ( 'Line 1c' + Char(13) + 'Line 2c' + Char(13) + 'Line 3c' + Char(13) + 'Line 4c' )

    INSERT textaddr (addr) VALUES ( 'Line 1d' + Char(13) + 'Line 2d' + Char(13) + 'Line 3d' + Char(13) + 'Line 4d' )

    INSERT textaddr (addr) VALUES ( null )

    INSERT textaddr (addr) VALUES ( 'Line 1f'  )

    INSERT textaddr (addr) VALUES ( 'Line 1g'  )

    INSERT textaddr (addr) VALUES ( 'Line 1h' + Char(13) + 'Line 2h' )

    INSERT textaddr (addr) VALUES ( 'Line 1j' + Char(13) + 'Line 2j' + Char(13) + 'Line 3j' + Char(13) + 'Line 4j' + Char(13) + 'Line 5j' )

    -- assume that you are updating existing records

    INSERT addresses (id) SELECT id FROM textaddr ORDER BY id

    SET NOCOUNT OFF

    UPDATE addresses

       SET address1 = dbo.fGetTokenN(t.addr, Char(13), 1) ,

           address2 = dbo.fGetTokenN(t.addr, Char(13), 2) ,

           address3 = dbo.fGetTokenN(t.addr, Char(13), 3) ,

           address4 = dbo.fGetTokenN(t.addr, Char(13), 4)

      FROM addresses a JOIN textaddr t ON a.id = t.id

    SELECT * FROM addresses ORDER BY id

    /*

    I got these results:

    (9 row(s) affected)

    id  address1   address2   address3   address4  

    --- ---------- ---------- ---------- ----------

    1   Line 1a    Line 2a    Line 3a    Line 4a

    2   Line 1b    Line 2b    Line 3b    Line 4b

    3   Line 1c    Line 2c    Line 3c    Line 4c

    4   Line 1d    Line 2d    Line 3d    Line 4d

    5                                   

    6   Line 1f                         

    7   Line 1g                         

    8   Line 1h    Line 2h              

    9   Line 1j    Line 2j    Line 3j    Line 4j

    (9 row(s) affected

    */

  • Works a treat apart from 1 thing.......it doesn't delete the char(13)'s while breaking up the field.  I had a look at the udf, but the code is going way over my head (easily done I know!)

    Cheers,

    James

  • Hmm..it deletes all the Char(13)'s in my tests. Does it do this for all rows, or just some? 

    Change the last SELECT statement in the example to read:

    SELECT *

      FROM addresses

     WHERE CharIndex( Char(13), address1, 1) > 0

        OR CharIndex( Char(13), address2, 1) > 0

        OR CharIndex( Char(13), address3, 1) > 0

        OR CharIndex( Char(13), address4, 1) > 0

     ORDER BY id

    That will list any rows for which anyone of the address columns contains a Char(13). Also, are the lines in the text column delimited with just a Char(13), or with Char(13)+Char(10) (carriage return + line feed)?

    Run this:

    SELECT *

      FROM addresses

     WHERE CharIndex( Char(10), address1, 1) > 0

        OR CharIndex( Char(10), address2, 1) > 0

        OR CharIndex( Char(10), address3, 1) > 0

        OR CharIndex( Char(10), address4, 1) > 0

     ORDER BY id

    If the text is delimited with Char(13)+Char(10), just change the function call like this:

    UPDATE addresses

       SET address1 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 1) ,

           address2 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 2) ,

           address3 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 3) ,

           address4 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 4)

      FROM addresses a JOIN textaddr t ON a.id = t.id

  • You were right, there were line feeds as well.

    UPDATE address

       SET addr_address1 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 1) ,

           addr_address2 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 2) ,

           addr_address3 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 3) ,

           addr_address4 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 4) ,

           addr_address5 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 5)

    This worked perfectly.  Now I just need to do some cleaning up and set all fields comprised of just spaces to NULL.

    Many thanks,

    James

  • As I mentioned earlier, you can modify the fGetTokenN function to return NULL instead of the empty string for addresses that are less than 4 lines. Note the bold text near the bottom of the function.

    DROP FUNCTION dbo.fGetTokenN

    GO

    CREATE FUNCTION dbo.fGetTokenN

    (

      @parm nvarchar(4000),

      @delim nvarchar(100),

      @whichOccur smallint

    )

    RETURNS nvarchar(4000)

    AS

    BEGIN

    DECLARE @occur int, @spos int, @startPos int, @maxlen int

    DECLARE @token nvarchar(4000)

    DECLARE @delimLen int

    -- Len() function ignores trailing blanks, so if our delimiter is a space,

    -- or multiple spaces, Len() will returns zero.

    IF Len(@delim) = 0 AND DataLength(@delim) > 0

      SET @delimLen = DataLength(@delim) / 2  -- divide by 2 if using nvarchar

    ELSE

      SET @delimLen = Len(@delim)

    SET @occur = 0

    SET @startPos = 1

    SET @spos = 1 - @delimLen

    SET @maxLen = Len(@parm)

    WHILE @occur < @whichOccur AND @parm <> N'' AND @parm IS NOT NULL AND @startPos < @maxLen

    BEGIN

      SET @occur = @occur + 1

      SET @startPos = @spos + @delimLen

      SET @spos = CHARINDEX( @delim , @parm, @startPos )

      IF @spos = 0

        BREAK

    END

    IF @occur <> @whichOccur

      SET @token = NULL -- instead of N'' 

    ELSE

      IF @spos = 0

        SET @token = Substring(@parm, @startPos, 4000)

      ELSE

        SET @token = SubString( @parm, @startPos, @spos - @startPos)

    RETURN @token

    END

  • Hi,

    I did make that amendment, I was just saying that I need to clean up the data somewhat.  Thanks for your help.

    Much appreciated,

    James Knight

Viewing 10 posts - 1 through 9 (of 9 total)

You must be logged in to reply to this topic. Login to reply